using System.ServiceModel.Syndication; using System.Xml; using Newsbot.Collector.Database.Repositories; using Newsbot.Collector.Domain.Consts; using Newsbot.Collector.Domain.Interfaces; using Newsbot.Collector.Domain.Models; using Newsbot.Collector.Services.HtmlParser; using Serilog; namespace Newsbot.Collector.Services.Jobs; public class YoutubeWatcherJobOptions { public string? DatabaseConnectionString { get; set; } public string? OpenTelemetryConnectionString { get; set; } public int SleepTimer { get; set; } = 3000; public bool IsEnabled { get; set; } = true; } public class YoutubeWatcherJob { private const string JobName = "YoutubeWatcherJob"; private readonly YoutubeWatcherJobOptions _options; private IArticlesRepository _articles; private IIconsRepository _icons; private ILogger _logger; private IDiscordQueueRepository _queue; private ISourcesRepository _source; public YoutubeWatcherJob() { _options = new YoutubeWatcherJobOptions(); _articles = new ArticlesTable(""); _queue = new DiscordQueueTable(""); _source = new SourcesTable(""); _icons = new IconsTable(""); _logger = JobLogger.GetLogger("", JobName); } public void InitAndExecute(YoutubeWatcherJobOptions options) { _articles = new ArticlesTable(options.DatabaseConnectionString ?? ""); _queue = new DiscordQueueTable(options.DatabaseConnectionString ?? ""); _source = new SourcesTable(options.DatabaseConnectionString ?? ""); _icons = new IconsTable(options.DatabaseConnectionString ?? ""); _logger = JobLogger.GetLogger(options.OpenTelemetryConnectionString ?? "", JobName); Execute(); } private void Execute() { var sources = _source.ListByType(SourceTypes.YouTube, 100); foreach (var source in sources) { if (!source.Enabled) { _logger.Debug($"{JobName} - {source.Name} was disabled and will be skipped."); continue; } var channelId = source.YoutubeId; if (channelId == "") { channelId = GetChannelId(source.Url); _source.UpdateYoutubeId(source.ID, channelId); } // Make sure we have a Icon for the channel var icon = _icons.GetBySourceId(source.ID); if (icon.Id == Guid.Empty) Console.WriteLine("I was triggered :V"); _logger.Information($"{JobName} - Checking '{source.Name}'"); var url = $"https://www.youtube.com/feeds/videos.xml?channel_id={channelId}"; var newVideos = CheckFeed(url, source); _logger.Debug($"{JobName} - Collected {newVideos.Count} new videos"); foreach (var video in newVideos) { _logger.Debug($"{JobName} - {video.AuthorName} '{video.Title}' was found"); _articles.New(video); _queue.New(new DiscordQueueModel { ArticleID = video.ID }); } } _logger.Information($"{JobName} - Done"); } private string GetChannelId(string url) { // Collect the Channel ID and store it for later. var pageReader = new HtmlPageReader(new HtmlPageReaderOptions { Url = url }); pageReader.Parse(); var id = pageReader.Data.Header.YoutubeChannelID ?? ""; if (id == "") _logger.Error(new Exception($"{JobName} - Unable to find the Youtube Channel ID for the requested url."), url); return id; } private List CheckFeed(string url, SourceModel source) { var videos = new List(); using var reader = XmlReader.Create(url); var feed = SyndicationFeed.Load(reader); foreach (var post in feed.Items.ToList()) { var articleUrl = post.Links[0].Uri.AbsoluteUri; if (IsThisUrlKnown(articleUrl)) continue; var videoDetails = new HtmlPageReader(new HtmlPageReaderOptions { Url = articleUrl }); videoDetails.Parse(); var article = new ArticlesModel { //Todo add the icon AuthorName = post.Authors[0].Name, Title = post.Title.Text, Tags = FetchTags(post), URL = articleUrl, PubDate = post.PublishDate.DateTime, Thumbnail = videoDetails.Data.Header.Image, Description = videoDetails.Data.Header.Description, SourceID = source.ID, Video = "true" }; videos.Add(article); Thread.Sleep(_options.SleepTimer); } return videos; } private bool IsThisUrlKnown(string url) { var isKnown = _articles.GetByUrl(url); if (isKnown.URL == url) return true; return false; } private static string FetchTags(SyndicationItem post) { var result = ""; foreach (var tag in post.Categories) result += $"{tag.Name},"; return result; } }