using System.ServiceModel.Syndication; using System.Xml; using Newsbot.Collector.Database.Repositories; using Newsbot.Collector.Domain.Consts; using Newsbot.Collector.Domain.Entities; using Newsbot.Collector.Domain.Interfaces; using Newsbot.Collector.Domain.Models; using Newsbot.Collector.Services.HtmlParser; using Serilog; namespace Newsbot.Collector.Services.Jobs; public class YoutubeWatcherJobOptions { public string? DatabaseConnectionString { get; set; } public string? OpenTelemetryConnectionString { get; set; } public int SleepTimer { get; set; } = 3000; public bool IsEnabled { get; set; } = true; } public class YoutubeWatcherJob { private const string JobName = "YoutubeWatcherJob"; private readonly YoutubeWatcherJobOptions _options; private IArticlesRepository _articles; private IAuthorTable _author; private IIconsRepository _icons; private ILogger _logger; private IDiscordQueueRepository _queue; private ISourcesRepository _source; public YoutubeWatcherJob() { _options = new YoutubeWatcherJobOptions(); _articles = new ArticlesTable(""); _author = new AuthorsTable(""); _queue = new DiscordQueueTable(""); _source = new SourcesTable(""); _icons = new IconsTable(""); _logger = JobLogger.GetLogger("", JobName); } public void InitAndExecute(YoutubeWatcherJobOptions options) { _articles = new ArticlesTable(options.DatabaseConnectionString ?? ""); _author = new AuthorsTable(options.DatabaseConnectionString ?? ""); _queue = new DiscordQueueTable(options.DatabaseConnectionString ?? ""); _source = new SourcesTable(options.DatabaseConnectionString ?? ""); _icons = new IconsTable(options.DatabaseConnectionString ?? ""); _logger = JobLogger.GetLogger(options.OpenTelemetryConnectionString ?? "", JobName); Execute(); } private void Execute() { var sources = _source.ListByType(SourceTypes.YouTube, 100); foreach (var source in sources) { if (!source.Enabled) { _logger.Debug($"{JobName} - {source.Name} was disabled and will be skipped."); continue; } var channelId = source.YoutubeId; if (channelId == "") { channelId = GetChannelId(source.Url); _source.UpdateYoutubeId(source.Id, channelId); } // Make sure we have a Icon for the channel var icon = _icons.GetBySourceId(source.Id); if (icon.Id == Guid.Empty) Console.WriteLine("I was triggered :V"); _logger.Information($"{JobName} - Checking '{source.Name}'"); var url = $"https://www.youtube.com/feeds/videos.xml?channel_id={channelId}"; var newVideos = FindMissingPosts(url, source); _logger.Debug($"{JobName} - Collected {newVideos.Count} new videos"); foreach (var video in newVideos) { var author = _author.GetById(video.AuthorId); author.Wait(); if (author.Result is null) { _logger.Warning("Missing author record for article id {VideoId}", video.Id); } else { _logger.Debug("{JobName} - {ResultName} \'{VideoTitle}\' was found", JobName, author.Result.Name, video.Title); } _articles.New(video); _queue.New(new DiscordQueueEntity { ArticleId = video.Id }); } } _logger.Information($"{JobName} - Done"); } private string GetChannelId(string url) { // Collect the Channel ID and store it for later. var pageReader = new HtmlPageReader(new HtmlPageReaderOptions { Url = url }); pageReader.Parse(); var id = pageReader.Data.Header.YoutubeChannelID ?? ""; if (id == "") _logger.Error(new Exception($"{JobName} - Unable to find the Youtube Channel ID for the requested url"), ""); return id; } private List FindMissingPosts(string url, SourceEntity source) { var videos = new List(); using var reader = XmlReader.Create(url); var feed = SyndicationFeed.Load(reader); foreach (var post in feed.Items.ToList()) { var article = CheckFeedItem(post, source.Id); if (article is null) continue; videos.Add(article); } return videos; } private ArticlesEntity? CheckFeedItem(SyndicationItem post, Guid sourceId) { var articleUrl = post.Links[0].Uri.AbsoluteUri; if (IsThisUrlKnown(articleUrl)) return null; var videoDetails = new HtmlPageReader(new HtmlPageReaderOptions { Url = articleUrl }); videoDetails.Parse(); var author = _author.CreateIfMissingAsync(new AuthorEntity { Image = post.Authors[0].Uri, Name = post.Authors[0].Name }); author.Wait(); var article = new ArticlesEntity { //Todo add the icon AuthorId = author.Result.Id, Title = post.Title.Text, Tags = FetchTags(post), Url = articleUrl, PubDate = post.PublishDate.DateTime, Thumbnail = videoDetails.Data.Header.Image, Description = videoDetails.Data.Header.Description, SourceId = sourceId, Video = "true" }; return article; } private bool IsThisUrlKnown(string url) { var isKnown = _articles.GetByUrl(url); if (isKnown.Url == url) return true; return false; } private static string FetchTags(SyndicationItem post) { var result = ""; foreach (var tag in post.Categories) result += $"{tag.Name},"; return result; } }