From bcbb02a663670ad1695970fa33ed71dbe44c6276 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20P=C3=A9rez?= Date: Wed, 6 Nov 2024 02:51:04 +0100 Subject: [PATCH] feat: :sparkles: add title to tvshow --- core/internal/handlers/tvshow.go | 5 +++-- core/internal/scraper/tvshow.go | 29 +++++++++++++++++++++-------- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/core/internal/handlers/tvshow.go b/core/internal/handlers/tvshow.go index 96f94dc..ab560ab 100644 --- a/core/internal/handlers/tvshow.go +++ b/core/internal/handlers/tvshow.go @@ -12,11 +12,12 @@ func (hq *Handlers) GetTVShow(c *gin.Context) { ttShowID := c.Query("ttid") slog.Info("GetTVShow", "ttid", ttShowID) - seasons := scraper.ScrapeSeasons(ttShowID) + title, seasons := scraper.ScrapeSeasons(ttShowID) - slog.Info("scraped seasons", "ttid", ttShowID) + slog.Info("scraped seasons", "ttid", ttShowID, "title", title) c.JSON(http.StatusOK, gin.H{ + "title": title, "seasons": seasons, }) } diff --git a/core/internal/scraper/tvshow.go b/core/internal/scraper/tvshow.go index 44d4ca7..7832914 100644 --- a/core/internal/scraper/tvshow.go +++ b/core/internal/scraper/tvshow.go @@ -24,20 +24,29 @@ type Episode struct { type Season []Episode -const seasonsSelector = "ul.ipc-tabs a[data-testid='tab-season-entry']" -const episodesSelector = "section.sc-1e7f96be-0.ZaQIL" -const nextSeasonButtonSelector = "#next-season-btn" -const imdbEpisodesURL = "https://www.imdb.com/title/%s/episodes?season=%d" +const ( + titleSelector = "h2.sc-b8cc654b-9.dmvgRY" + seasonsSelector = "ul.ipc-tabs a[data-testid='tab-season-entry']" + episodesSelector = "section.sc-1e7f96be-0.ZaQIL" + nextSeasonButtonSelector = "#next-season-btn" + imdbEpisodesURL = "https://www.imdb.com/title/%s/episodes?season=%d" + visitURL = "https://www.imdb.com/title/%s/episodes" +) -func ScrapeSeasons(ttImdb string) []Season { +func ScrapeSeasons(ttImdb string) (string, []Season) { c := colly.NewCollector( colly.AllowedDomains("imdb.com", "www.imdb.com"), ) + c.OnRequest(func(r *colly.Request) { + r.Headers.Set("Accept-Language", "en-US") + }) + var allSeasons []Season var seasons []int + var title string - c.OnHTML("ul.ipc-tabs a[data-testid='tab-season-entry']", func(e *colly.HTMLElement) { + c.OnHTML(seasonsSelector, func(e *colly.HTMLElement) { seasonText := strings.TrimSpace(e.Text) seasonNum, err := strconv.Atoi(seasonText) if err == nil { @@ -45,6 +54,10 @@ func ScrapeSeasons(ttImdb string) []Season { } }) + c.OnHTML(titleSelector, func(e *colly.HTMLElement) { + title = e.Text + }) + c.OnScraped(func(r *colly.Response) { seasonMap := make(map[int]bool) uniqueSeasons := []int{} @@ -75,10 +88,10 @@ func ScrapeSeasons(ttImdb string) []Season { episodeCollector.Wait() }) - c.Visit(fmt.Sprintf("https://www.imdb.com/title/%s/episodes", ttImdb)) + c.Visit(fmt.Sprintf(visitURL, ttImdb)) c.Wait() - return allSeasons + return title, allSeasons } func extractEpisodesFromSeason(data string) Season {