package services import ( "errors" "fmt" "log" "net/http" //"strconv" "github.com/PuerkitoBio/goquery" "github.com/go-rod/rod" "github.com/mmcdole/gofeed" "github.com/jtom38/newsbot/collector/domain/model" ) type YoutubeClient struct { SourceID uint Url string ChannelID string AvatarUri string Config YoutubeConfig } type YoutubeConfig struct { Debug bool } var ( // This is a local slice to store what URI's have been seen to remove extra calls to the DB YoutubeUriCache []*string ErrThumbnailMissing = errors.New("unable to find the video thumbnail on a youtube video") ErrTagsMissing = errors.New("unable to find the tags on the video") ErrAvatarMissing = errors.New("unable to find the avatar image on the page") ErrChannelIdMissing = errors.New("unable to find the channelId on the requested page") ) const YOUTUBE_FEED_URL string = "https://www.youtube.com/feeds/videos.xml?channel_id=" func NewYoutubeClient(SourceID uint, Url string) YoutubeClient { yc := YoutubeClient{ SourceID: SourceID, Url: Url, } /* cc := NewConfigClient() debug, err := strconv.ParseBool(cc.GetConfig(YOUTUBE_DEBUG)) if err != nil { panic("'YOUTUBE_DEBUG' was not a bool value")} yc.Config.Debug = debug */ return yc } // CheckSource will go and run all the commands needed to process a source. func (yc *YoutubeClient) CheckSource() error { docParser, err := yc.GetParser(yc.Url) if err != nil { return err } // Check cache/db for existing value // If we have the value, skip channelId, err := yc.GetChannelId(docParser) if err != nil { return err } if channelId == "" { return ErrChannelIdMissing } yc.ChannelID = channelId // Check the cache/db forthe value. // if we have the value, skip avatar, err := yc.GetAvatarUri() if err != nil { return err } if avatar == "" { return ErrAvatarMissing } yc.AvatarUri = avatar feed, err := yc.PullFeed() if err != nil { return err } newPosts, err := yc.CheckForNewPosts(feed) if err != nil { return err } //TODO post to the API for _, item := range newPosts { article := yc.ConvertToArticle(item) YoutubeUriCache = append(YoutubeUriCache, &item.Link) // Add the post to local cache log.Println(article) } return nil } func (yc *YoutubeClient) GetParser(uri string) (*goquery.Document, error) { html, err := http.Get(uri) if err != nil { log.Println(err) } defer html.Body.Close() doc, err := goquery.NewDocumentFromReader(html.Body) if err != nil { return nil, err } return doc, nil } // This pulls the youtube page and finds the ChannelID. // This value is required to generate the RSS feed URI func (yc *YoutubeClient) GetChannelId(doc *goquery.Document) (string, error) { meta := doc.Find("meta") for _, item := range meta.Nodes { if item.Attr[0].Val == "channelId" { yc.ChannelID = item.Attr[1].Val return yc.ChannelID, nil } } return "", ErrChannelIdMissing } // This will parse the page to find the current Avatar of the channel. func (yc *YoutubeClient) GetAvatarUri() (string, error) { var AvatarUri string browser := rod.New().MustConnect() page := browser.MustPage(yc.Url) res := page.MustElement("#channel-header-container > yt-img-shadow:nth-child(1) > img:nth-child(1)").MustAttribute("src") if *res == "" || res == nil { return AvatarUri, ErrAvatarMissing } AvatarUri = *res defer browser.Close() defer page.Close() return AvatarUri, nil } // This will parse and look for the tags that has been defined by the user. func (yc *YoutubeClient) GetTags(parser *goquery.Document) (string, error) { meta := parser.Find("meta") for _, item := range meta.Nodes { if item.Attr[0].Val == "keywords" { res := item.Attr[1].Val return res, nil } } return "", ErrTagsMissing } func (yc *YoutubeClient) GetVideoThumbnail(parser *goquery.Document) (string, error) { meta := parser.Find("meta") for _, item := range meta.Nodes { if item.Attr[0].Val == "og:image" { res := item.Attr[1].Val return res, nil } } return "", ErrThumbnailMissing } // This will pull the RSS feed items and return the results func (yc *YoutubeClient) PullFeed() (*gofeed.Feed, error) { feedUri := fmt.Sprintf("%v%v", YOUTUBE_FEED_URL, yc.ChannelID) fp := gofeed.NewParser() feed, err := fp.ParseURL(feedUri) if err != nil { return nil, err } return feed, nil } // CheckForNewPosts will talk to the Database to see if it has a record for the posts that have been extracted. // If the post does not exist, it will be added. func (yc *YoutubeClient) CheckForNewPosts(feed *gofeed.Feed) ([]*gofeed.Item, error) { var newPosts []*gofeed.Item for _, item := range feed.Items { // Check the cache/db to see if this URI has been seen already uriExists := yc.CheckUriCache(&item.Link) if uriExists { continue } //TODO Check the DB if the cache is not aware //TODO If the db knew about it, append it to the local cache // if its new, append it. newPosts = append(newPosts, item) } return newPosts, nil } func (yc *YoutubeClient) CheckUriCache(uri *string) bool { for _, item := range YoutubeUriCache { if item == uri { return true } } return false } func (yc *YoutubeClient) ConvertToArticle(item *gofeed.Item) model.Articles { parser, err := yc.GetParser(item.Link) if err != nil { log.Printf("Unable to process %v, submit this link as an issue.\n", item.Link) } tags, err := yc.GetTags(parser) if err != nil { msg := fmt.Sprintf("%v. %v", ErrTagsMissing, item.Link) log.Println(msg) } thumb, err := yc.GetVideoThumbnail(parser) if err != nil { msg := fmt.Sprintf("%v. %v", ErrThumbnailMissing, item.Link) log.Println(msg) } var article = model.Articles{ SourceID: yc.SourceID, Tags: tags, Title: item.Title, Url: item.Link, PubDate: *item.PublishedParsed, Thumbnail: thumb, Description: item.Description, AuthorName: item.Author.Name, AuthorImage: yc.AvatarUri, } return article }