using System.ServiceModel.Syndication; using System.Xml; using Newsbot.Collector.Database.Repositories; using Newsbot.Collector.Domain.Consts; using Newsbot.Collector.Domain.Interfaces; using Newsbot.Collector.Domain.Models; using Newsbot.Collector.Domain.Models.Config; using Newsbot.Collector.Services.HtmlParser; using Serilog; namespace Newsbot.Collector.Services.Jobs; public class YoutubeWatcherJobOptions { public ConfigSectionConnectionStrings? ConnectionStrings { get; set; } public int SleepTimer { get; set; } = 3000; } public class YoutubeWatcherJob { private readonly YoutubeWatcherJobOptions _options; private IArticlesRepository _articles; private IIconsRepository _icons; private ILogger _logger; private IDiscordQueueRepository _queue; private ISourcesRepository _source; public YoutubeWatcherJob() { _options = new YoutubeWatcherJobOptions(); _articles = new ArticlesTable(""); _queue = new DiscordQueueTable(""); _source = new SourcesTable(""); _icons = new IconsTable(""); _logger = JobLogger.GetLogger("", "YoutubeWatcherJob"); } public void InitAndExecute(YoutubeWatcherJobOptions options) { options.ConnectionStrings ??= new ConfigSectionConnectionStrings(); _articles = new ArticlesTable(options.ConnectionStrings.Database ?? ""); _queue = new DiscordQueueTable(options.ConnectionStrings.Database ?? ""); _source = new SourcesTable(options.ConnectionStrings.Database ?? ""); _icons = new IconsTable(options.ConnectionStrings.Database ?? ""); _logger = JobLogger.GetLogger(options.ConnectionStrings.OpenTelemetry ?? "", "YoutubeWatcherJob"); Execute(); } private void Execute() { var videos = new List(); var sources = _source.ListByType(SourceTypes.YouTube, 100); foreach (var source in sources) CheckSource(source); } private void CheckSource(SourceModel source) { var channelId = ""; if (source.YoutubeId == "") { channelId = GetChannelId(source.Url); _source.UpdateYoutubeId(source.ID, channelId); } else { channelId = source.YoutubeId; } // Make sure we have a Icon for the channel var icon = _icons.GetBySourceId(source.ID); if (icon.Id == Guid.Empty) { } var url = $"https://www.youtube.com/feeds/videos.xml?channel_id={channelId}"; var newVideos = CheckFeed(url, source); foreach (var video in newVideos) _articles.New(video); } private string GetChannelId(string url) { // Collect the Channel ID and store it for later. var pageReader = new HtmlPageReader(new HtmlPageReaderOptions { Url = url }); pageReader.Parse(); var id = pageReader.Data.Header.YoutubeChannelID ?? ""; if (id == "") _logger.Error(new Exception("Unable to find the Youtube Channel ID for the requested url."), url); return id; } private List CheckFeed(string url, SourceModel source) { var videos = new List(); using var reader = XmlReader.Create(url); var feed = SyndicationFeed.Load(reader); foreach (var post in feed.Items.ToList()) { var articleUrl = post.Links[0].Uri.AbsoluteUri; if (IsThisUrlKnown(articleUrl)) continue; var videoDetails = new HtmlPageReader(new HtmlPageReaderOptions { Url = articleUrl }); videoDetails.Parse(); var article = new ArticlesModel { //Todo add the icon AuthorName = post.Authors[0].Name, Title = post.Title.Text, Tags = FetchTags(post), URL = articleUrl, PubDate = post.PublishDate.DateTime, Thumbnail = videoDetails.Data.Header.Image, Description = videoDetails.Data.Header.Description, SourceID = source.ID, Video = "true" }; videos.Add(article); Thread.Sleep(_options.SleepTimer); } return videos; } private bool IsThisUrlKnown(string url) { var isKnown = _articles.GetByUrl(url); if (isKnown.URL == url) return true; return false; } private static string FetchTags(SyndicationItem post) { var result = ""; foreach (var tag in post.Categories) result += $"{tag.Name},"; return result; } }