From adb4799206ee94f7086f56be4147e50e487cc5f5 Mon Sep 17 00:00:00 2001 From: James Tombleson Date: Fri, 31 Mar 2023 23:00:15 -0700 Subject: [PATCH] Features/missing files (#14) * Added jobs Controller to trigger collection. * Added backgroundjobs to move them out of program.cs * new column to track youtube ID values and adding a sourceid column on the icon for linking * Added icon table repo * added interface for IconsRepo * hey the missing config models * adding section const keys to pull blocks of configs * Added youtubewatcher to the code but not ready to enable it in the background. More testing needed. * Test... improvements? --- Newsbot.Collector.Api/BackgroundJobs.cs | 26 +++ .../Controllers/JobsController.cs | 47 ++++++ .../20230316185340_sources_youtube_id.sql | 13 ++ .../20230326223545_icon_sourceid.sql | 13 ++ .../Repositories/IconsTable.cs | 65 ++++++++ .../Consts/ConfigSectionsConst.cs | 16 ++ .../Interfaces/IIconsRepository.cs | 11 ++ .../Config/ConfigSectionConnectionStrings.cs | 7 + .../ConfigSectionNotificationsDiscord.cs | 6 + .../Models/Config/ConfigSectionRedditModel.cs | 9 ++ .../Models/Config/ConfigSectionRssModel.cs | 6 + .../Config/ConfigSectionYoutubeModel.cs | 7 + .../HtmlParser/BrowserClient.cs | 30 ++++ Newsbot.Collector.Services/Jobs/JobLogger.cs | 22 +++ .../Jobs/YoutubeWatcherJob.cs | 152 ++++++++++++++++++ .../Services/BrowserClientTests.cs | 29 ++++ Newsbot.Collector.Tests/TestHelper.cs | 5 + 17 files changed, 464 insertions(+) create mode 100644 Newsbot.Collector.Api/BackgroundJobs.cs create mode 100644 Newsbot.Collector.Api/Controllers/JobsController.cs create mode 100644 Newsbot.Collector.Database/Migrations/20230316185340_sources_youtube_id.sql create mode 100644 Newsbot.Collector.Database/Migrations/20230326223545_icon_sourceid.sql create mode 100644 Newsbot.Collector.Database/Repositories/IconsTable.cs create mode 100644 Newsbot.Collector.Domain/Consts/ConfigSectionsConst.cs create mode 100644 Newsbot.Collector.Domain/Interfaces/IIconsRepository.cs create mode 100644 Newsbot.Collector.Domain/Models/Config/ConfigSectionConnectionStrings.cs create mode 100644 Newsbot.Collector.Domain/Models/Config/ConfigSectionNotificationsDiscord.cs create mode 100644 Newsbot.Collector.Domain/Models/Config/ConfigSectionRedditModel.cs create mode 100644 Newsbot.Collector.Domain/Models/Config/ConfigSectionRssModel.cs create mode 100644 Newsbot.Collector.Domain/Models/Config/ConfigSectionYoutubeModel.cs create mode 100644 Newsbot.Collector.Services/HtmlParser/BrowserClient.cs create mode 100644 Newsbot.Collector.Services/Jobs/JobLogger.cs create mode 100644 Newsbot.Collector.Services/Jobs/YoutubeWatcherJob.cs create mode 100644 Newsbot.Collector.Tests/Services/BrowserClientTests.cs create mode 100644 Newsbot.Collector.Tests/TestHelper.cs diff --git a/Newsbot.Collector.Api/BackgroundJobs.cs b/Newsbot.Collector.Api/BackgroundJobs.cs new file mode 100644 index 0000000..95312d4 --- /dev/null +++ b/Newsbot.Collector.Api/BackgroundJobs.cs @@ -0,0 +1,26 @@ +using Hangfire; +using Newsbot.Collector.Domain.Consts; +using Newsbot.Collector.Domain.Models.Config; +using Newsbot.Collector.Services.Jobs; + +namespace Newsbot.Collector.Api; + +public class BackgroundJobs +{ + public static void SetupRecurringJobs(IConfiguration configuration) + { + RecurringJob.AddOrUpdate("RSS", x => x.InitAndExecute(new RssWatcherJobOptions + { + ConnectionStrings = + configuration.GetValue(ConfigSectionsConst.ConnectionStrings), + Config = configuration.GetValue(ConfigSectionsConst.Rss) + }), "15 0-23 * * *"); + + RecurringJob.AddOrUpdate("Discord Alerts", x => + x.InitAndExecute(new DiscordNotificationJobOptions + { + ConnectionStrings = configuration.GetValue(ConfigSectionsConst.ConnectionStrings), + Config = configuration.GetValue(ConfigSectionsConst.NotificationsDiscord) + }), "5/10 * * * *"); + } +} \ No newline at end of file diff --git a/Newsbot.Collector.Api/Controllers/JobsController.cs b/Newsbot.Collector.Api/Controllers/JobsController.cs new file mode 100644 index 0000000..e53f989 --- /dev/null +++ b/Newsbot.Collector.Api/Controllers/JobsController.cs @@ -0,0 +1,47 @@ +using Hangfire; +using Microsoft.AspNetCore.Mvc; +using Microsoft.Extensions.Options; +using Newsbot.Collector.Database.Repositories; +using Newsbot.Collector.Domain.Interfaces; +using Newsbot.Collector.Domain.Models.Config; +using Newsbot.Collector.Services.Jobs; + +namespace Newsbot.Collector.Api.Controllers; + +[ApiController] +[Route("api/jobs")] +public class JobsController +{ + private readonly ConfigSectionConnectionStrings _connectionStrings; + private readonly ConfigSectionRssModel _rssConfig; + private readonly ILogger _logger; + private readonly ISourcesRepository _sources; + + public JobsController(ILogger logger, IOptions connectionStrings, + IOptions rss) + { + _logger = logger; + _connectionStrings = connectionStrings.Value; + _rssConfig = rss.Value; + _sources = new SourcesTable(connectionStrings.Value.Database ?? ""); + } + + [HttpPost("check/rss")] + public void CheckReddit() + { + BackgroundJob.Enqueue(x => x.InitAndExecute(new RssWatcherJobOptions + { + ConnectionStrings = _connectionStrings, + Config = _rssConfig + })); + } + + [HttpPost("check/youtube")] + public void CheckYoutube() + { + BackgroundJob.Enqueue(x => x.InitAndExecute(new YoutubeWatcherJobOptions + { + ConnectionStrings = _connectionStrings + })); + } +} \ No newline at end of file diff --git a/Newsbot.Collector.Database/Migrations/20230316185340_sources_youtube_id.sql b/Newsbot.Collector.Database/Migrations/20230316185340_sources_youtube_id.sql new file mode 100644 index 0000000..2123faa --- /dev/null +++ b/Newsbot.Collector.Database/Migrations/20230316185340_sources_youtube_id.sql @@ -0,0 +1,13 @@ +-- +goose Up +-- +goose StatementBegin +SELECT 'up SQL query'; +ALTER TABLE sources + ADD COLUMN YoutubeId TEXT; +-- +goose StatementEnd + +-- +goose Down +-- +goose StatementBegin +SELECT 'down SQL query'; +ALTER TABLE sources + DROP COLUMN YoutubeId; +-- +goose StatementEnd diff --git a/Newsbot.Collector.Database/Migrations/20230326223545_icon_sourceid.sql b/Newsbot.Collector.Database/Migrations/20230326223545_icon_sourceid.sql new file mode 100644 index 0000000..54f220f --- /dev/null +++ b/Newsbot.Collector.Database/Migrations/20230326223545_icon_sourceid.sql @@ -0,0 +1,13 @@ +-- +goose Up +-- +goose StatementBegin +SELECT 'up SQL query'; +ALTER TABLE icons + ADD COLUMN SourceId uuid; +-- +goose StatementEnd + +-- +goose Down +-- +goose StatementBegin +SELECT 'down SQL query'; +ALTER TABLE icons + DROP COLUMN SourceId; +-- +goose StatementEnd diff --git a/Newsbot.Collector.Database/Repositories/IconsTable.cs b/Newsbot.Collector.Database/Repositories/IconsTable.cs new file mode 100644 index 0000000..3a12682 --- /dev/null +++ b/Newsbot.Collector.Database/Repositories/IconsTable.cs @@ -0,0 +1,65 @@ +using System.Data; +using Dapper; +using Microsoft.Extensions.Configuration; +using Newsbot.Collector.Domain.Interfaces; +using Newsbot.Collector.Domain.Models; +using Npgsql; + +namespace Newsbot.Collector.Database.Repositories; + +public class IconsTable : IIconsRepository +{ + private readonly string _connectionString; + + public IconsTable(string connectionString) + { + _connectionString = connectionString; + } + + public IconsTable(IConfiguration configuration) + { + var connstr = configuration.GetConnectionString("database"); + if (connstr is null) connstr = ""; + _connectionString = connstr; + } + + public void New(IconModel model) + { + model.Id = Guid.NewGuid(); + + using var conn = OpenConnection(_connectionString); + var q = @"Insert Into icons (id, filename, site, sourceid) values (@Id,@FileName, @Site, @SourceId)"; + conn.Execute(q, model); + } + + public IconModel GetById(Guid id) + { + using var conn = OpenConnection(_connectionString); + var query = "Select * From icons where ID = @id Limit 1;"; + var res = conn.Query(query, new + { + id + }); + if (!res.Any()) return new IconModel(); + return res.First(); + } + + public IconModel GetBySourceId(Guid id) + { + using var conn = OpenConnection(_connectionString); + var query = "Select * From icons where sourceid = @id Limit 1;"; + var res = conn.Query(query, new + { + id + }); + if (!res.Any()) return new IconModel(); + return res.First(); + } + + private IDbConnection OpenConnection(string connectionString) + { + var conn = new NpgsqlConnection(_connectionString); + conn.Open(); + return conn; + } +} \ No newline at end of file diff --git a/Newsbot.Collector.Domain/Consts/ConfigSectionsConst.cs b/Newsbot.Collector.Domain/Consts/ConfigSectionsConst.cs new file mode 100644 index 0000000..ac40320 --- /dev/null +++ b/Newsbot.Collector.Domain/Consts/ConfigSectionsConst.cs @@ -0,0 +1,16 @@ +namespace Newsbot.Collector.Domain.Consts; + +/// +/// This class contains the keys to find the objects in the config to load. +/// +public static class ConfigSectionsConst +{ + public const string ConnectionStrings = "ConnectionStrings"; + public const string FinalFantasyXiv = "FinalFantasyXiv"; + public const string Reddit = "Reddit"; + public const string Rss = "Rss"; + public const string Twitch = "Twitch"; + public const string Youtube = "Youtube"; + + public const string NotificationsDiscord = "Notifications:Discord"; +} \ No newline at end of file diff --git a/Newsbot.Collector.Domain/Interfaces/IIconsRepository.cs b/Newsbot.Collector.Domain/Interfaces/IIconsRepository.cs new file mode 100644 index 0000000..c502f48 --- /dev/null +++ b/Newsbot.Collector.Domain/Interfaces/IIconsRepository.cs @@ -0,0 +1,11 @@ +using Newsbot.Collector.Domain.Models; + +namespace Newsbot.Collector.Domain.Interfaces; + +public interface IIconsRepository +{ + public void New(IconModel model); + + public IconModel GetById(Guid id); + public IconModel GetBySourceId(Guid id); +} \ No newline at end of file diff --git a/Newsbot.Collector.Domain/Models/Config/ConfigSectionConnectionStrings.cs b/Newsbot.Collector.Domain/Models/Config/ConfigSectionConnectionStrings.cs new file mode 100644 index 0000000..e5902d8 --- /dev/null +++ b/Newsbot.Collector.Domain/Models/Config/ConfigSectionConnectionStrings.cs @@ -0,0 +1,7 @@ +namespace Newsbot.Collector.Domain.Models.Config; + +public class ConfigSectionConnectionStrings +{ + public string? Database { get; init; } + public string? OpenTelemetry { get; init; } +} \ No newline at end of file diff --git a/Newsbot.Collector.Domain/Models/Config/ConfigSectionNotificationsDiscord.cs b/Newsbot.Collector.Domain/Models/Config/ConfigSectionNotificationsDiscord.cs new file mode 100644 index 0000000..d3043e5 --- /dev/null +++ b/Newsbot.Collector.Domain/Models/Config/ConfigSectionNotificationsDiscord.cs @@ -0,0 +1,6 @@ +namespace Newsbot.Collector.Domain.Models.Config; + +public class ConfigSectionNotificationsDiscord +{ + public bool IsEnabled { get; set; } +} \ No newline at end of file diff --git a/Newsbot.Collector.Domain/Models/Config/ConfigSectionRedditModel.cs b/Newsbot.Collector.Domain/Models/Config/ConfigSectionRedditModel.cs new file mode 100644 index 0000000..b33f56b --- /dev/null +++ b/Newsbot.Collector.Domain/Models/Config/ConfigSectionRedditModel.cs @@ -0,0 +1,9 @@ +namespace Newsbot.Collector.Domain.Models.Config; + +public class ConfigSectionRedditModel +{ + public bool IsEnabled { get; set; } + public bool PullHot { get; set; } + public bool PullNsfw { get; set; } + public bool PullTop { get; set; } +} \ No newline at end of file diff --git a/Newsbot.Collector.Domain/Models/Config/ConfigSectionRssModel.cs b/Newsbot.Collector.Domain/Models/Config/ConfigSectionRssModel.cs new file mode 100644 index 0000000..597d14d --- /dev/null +++ b/Newsbot.Collector.Domain/Models/Config/ConfigSectionRssModel.cs @@ -0,0 +1,6 @@ +namespace Newsbot.Collector.Domain.Models.Config; + +public class ConfigSectionRssModel +{ + public bool IsEnabled { get; set; } +} \ No newline at end of file diff --git a/Newsbot.Collector.Domain/Models/Config/ConfigSectionYoutubeModel.cs b/Newsbot.Collector.Domain/Models/Config/ConfigSectionYoutubeModel.cs new file mode 100644 index 0000000..b6cb788 --- /dev/null +++ b/Newsbot.Collector.Domain/Models/Config/ConfigSectionYoutubeModel.cs @@ -0,0 +1,7 @@ +namespace Newsbot.Collector.Domain.Models.Config; + +public class ConfigSectionYoutubeModel +{ + public bool IsEnabled { get; set; } + public bool DebugMode { get; set; } +} \ No newline at end of file diff --git a/Newsbot.Collector.Services/HtmlParser/BrowserClient.cs b/Newsbot.Collector.Services/HtmlParser/BrowserClient.cs new file mode 100644 index 0000000..a31359f --- /dev/null +++ b/Newsbot.Collector.Services/HtmlParser/BrowserClient.cs @@ -0,0 +1,30 @@ +using OpenQA.Selenium; +using OpenQA.Selenium.Firefox; + +namespace Newsbot.Collector.Services.HtmlParser; + +public class BrowserClient : IDisposable +{ + private readonly IWebDriver _driver; + + public BrowserClient() + { + _driver = new FirefoxDriver(); + } + + public void Dispose() + { + _driver.Close(); + _driver.Quit(); + _driver.Dispose(); + } + + public string GetPageSource(string url, int sleep = 5000) + { + _driver.Navigate().GoToUrl(url); + + // Give the page some time to finish loading js + Thread.Sleep(sleep); + return _driver.PageSource; + } +} \ No newline at end of file diff --git a/Newsbot.Collector.Services/Jobs/JobLogger.cs b/Newsbot.Collector.Services/Jobs/JobLogger.cs new file mode 100644 index 0000000..b2f3a35 --- /dev/null +++ b/Newsbot.Collector.Services/Jobs/JobLogger.cs @@ -0,0 +1,22 @@ +using Serilog; + +namespace Newsbot.Collector.Services.Jobs; + +public static class JobLogger +{ + public static ILogger GetLogger(string connectionString, string jobName) + { + if (connectionString == "") + return Log.Logger = new LoggerConfiguration().WriteTo.Console().CreateLogger(); + return Log.Logger = new LoggerConfiguration() + .WriteTo.Console() + .WriteTo.OpenTelemetry( + connectionString, + resourceAttributes: new Dictionary + { + { "service.name", "newsbot-collector-api" }, + { "Job", jobName } + }) + .CreateLogger(); + } +} \ No newline at end of file diff --git a/Newsbot.Collector.Services/Jobs/YoutubeWatcherJob.cs b/Newsbot.Collector.Services/Jobs/YoutubeWatcherJob.cs new file mode 100644 index 0000000..315e2fc --- /dev/null +++ b/Newsbot.Collector.Services/Jobs/YoutubeWatcherJob.cs @@ -0,0 +1,152 @@ +using System.ServiceModel.Syndication; +using System.Xml; +using Newsbot.Collector.Database.Repositories; +using Newsbot.Collector.Domain.Consts; +using Newsbot.Collector.Domain.Interfaces; +using Newsbot.Collector.Domain.Models; +using Newsbot.Collector.Domain.Models.Config; +using Newsbot.Collector.Services.HtmlParser; +using Serilog; + +namespace Newsbot.Collector.Services.Jobs; + +public class YoutubeWatcherJobOptions +{ + public ConfigSectionConnectionStrings? ConnectionStrings { get; set; } + public int SleepTimer { get; set; } = 3000; +} + +public class YoutubeWatcherJob +{ + private readonly YoutubeWatcherJobOptions _options; + private IArticlesRepository _articles; + private IIconsRepository _icons; + private ILogger _logger; + private IDiscordQueueRepository _queue; + private ISourcesRepository _source; + + public YoutubeWatcherJob() + { + _options = new YoutubeWatcherJobOptions(); + _articles = new ArticlesTable(""); + _queue = new DiscordQueueTable(""); + _source = new SourcesTable(""); + _icons = new IconsTable(""); + _logger = JobLogger.GetLogger("", "YoutubeWatcherJob"); + } + + public void InitAndExecute(YoutubeWatcherJobOptions options) + { + options.ConnectionStrings ??= new ConfigSectionConnectionStrings(); + + _articles = new ArticlesTable(options.ConnectionStrings.Database ?? ""); + _queue = new DiscordQueueTable(options.ConnectionStrings.Database ?? ""); + _source = new SourcesTable(options.ConnectionStrings.Database ?? ""); + _icons = new IconsTable(options.ConnectionStrings.Database ?? ""); + _logger = JobLogger.GetLogger(options.ConnectionStrings.OpenTelemetry ?? "", "YoutubeWatcherJob"); + + Execute(); + } + + private void Execute() + { + var videos = new List(); + + var sources = _source.ListByType(SourceTypes.YouTube, 100); + + foreach (var source in sources) CheckSource(source); + } + + private void CheckSource(SourceModel source) + { + var channelId = ""; + + if (source.YoutubeId == "") + { + channelId = GetChannelId(source.Url); + _source.UpdateYoutubeId(source.ID, channelId); + } + else + { + channelId = source.YoutubeId; + } + + // Make sure we have a Icon for the channel + var icon = _icons.GetBySourceId(source.ID); + if (icon.Id == Guid.Empty) + { + } + + var url = $"https://www.youtube.com/feeds/videos.xml?channel_id={channelId}"; + + var newVideos = CheckFeed(url, source); + foreach (var video in newVideos) _articles.New(video); + } + + private string GetChannelId(string url) + { + // Collect the Channel ID and store it for later. + var pageReader = new HtmlPageReader(new HtmlPageReaderOptions + { + Url = url + }); + pageReader.Parse(); + + var id = pageReader.Data.Header.YoutubeChannelID ?? ""; + if (id == "") _logger.Error(new Exception("Unable to find the Youtube Channel ID for the requested url."), url); + + return id; + } + + private List CheckFeed(string url, SourceModel source) + { + var videos = new List(); + + using var reader = XmlReader.Create(url); + var feed = SyndicationFeed.Load(reader); + foreach (var post in feed.Items.ToList()) + { + var articleUrl = post.Links[0].Uri.AbsoluteUri; + if (IsThisUrlKnown(articleUrl)) continue; + + var videoDetails = new HtmlPageReader(new HtmlPageReaderOptions + { + Url = articleUrl + }); + videoDetails.Parse(); + + var article = new ArticlesModel + { + //Todo add the icon + AuthorName = post.Authors[0].Name, + Title = post.Title.Text, + Tags = FetchTags(post), + URL = articleUrl, + PubDate = post.PublishDate.DateTime, + Thumbnail = videoDetails.Data.Header.Image, + Description = videoDetails.Data.Header.Description, + SourceID = source.ID, + Video = "true" + }; + + videos.Add(article); + Thread.Sleep(_options.SleepTimer); + } + + return videos; + } + + private bool IsThisUrlKnown(string url) + { + var isKnown = _articles.GetByUrl(url); + if (isKnown.URL == url) return true; + return false; + } + + private static string FetchTags(SyndicationItem post) + { + var result = ""; + foreach (var tag in post.Categories) result += $"{tag.Name},"; + return result; + } +} \ No newline at end of file diff --git a/Newsbot.Collector.Tests/Services/BrowserClientTests.cs b/Newsbot.Collector.Tests/Services/BrowserClientTests.cs new file mode 100644 index 0000000..2a4e054 --- /dev/null +++ b/Newsbot.Collector.Tests/Services/BrowserClientTests.cs @@ -0,0 +1,29 @@ +using Newsbot.Collector.Services.HtmlParser; + +namespace Newsbot.Collector.Tests.Services; + +public class BrowserClientTests +{ + [Fact] + public void LoadsBrowser() + { + using var client = new BrowserClient(); + var pageSource = client.GetPageSource("https://www.google.com"); + if (pageSource == "") Assert.Fail("failed to return page source"); + } + + [Fact] + public void CanLoadHeadersFromSource() + { + using var bClient = new BrowserClient(); + var pageSource = bClient.GetPageSource("https://www.youtube.com/gamegrumps"); + + var hClient = new HtmlPageReader(new HtmlPageReaderOptions + { + SourceCode = pageSource + }); + hClient.Parse(); + + if (hClient.Data.Header.YoutubeChannelID is null) Assert.Fail("Failed to find the YoutubeChannelId"); + } +} \ No newline at end of file diff --git a/Newsbot.Collector.Tests/TestHelper.cs b/Newsbot.Collector.Tests/TestHelper.cs new file mode 100644 index 0000000..1b666fb --- /dev/null +++ b/Newsbot.Collector.Tests/TestHelper.cs @@ -0,0 +1,5 @@ +namespace Newsbot.Collector.Tests; + +public static class TestHelper +{ +} \ No newline at end of file