From 9be985da0acefcbc01dcba316f607119bd059bf7 Mon Sep 17 00:00:00 2001 From: James Tombleson Date: Fri, 31 Mar 2023 22:49:39 -0700 Subject: [PATCH] Features/adding youtube (#13) * Found the meta tags on youtube... in the body and updated the client to pull them out. * Updated namespace on test * I think formatting cleaned this up * Seed migrations have been cleaned up to get my configs out and moving them to a script. * Updates to the ISourcesRepository.cs to allow for new calls to the db. * formatter * Db models updated. Icon now can track sourceID and source can have a youtube id. * Updated api logger to ignore otel if no connection string given. * updated docker init so I can run migrations from the image * seed was updated to reflect the new api changes * Updated the SourcesController.cs to grab icon data. * Added reddit const values * Minor changes to HtmlPageReader.cs * Jobs are now pulling in the config section to bundle values. * Removed youtube api, not needed anymore. * test updates --- Dockerfile | 2 +- .../Controllers/SourcesController.cs | 91 +++++++++++++++++-- Newsbot.Collector.Api/Program.cs | 50 +++++----- .../Migrations/20220529082459_seed.sql | 54 ++++------- .../Migrations/20230305204112_seed.sql | 20 ---- .../Repositories/SourcesTable.cs | 86 ++++++++++-------- .../Consts/ConfigRedditConst.cs | 9 -- .../Interfaces/ISourcesRepository.cs | 7 +- .../Models/DatabaseModel.cs | 6 +- .../HtmlParser/HeadParserClient.cs | 85 +++++++++-------- .../HtmlParser/HtmlPageReader.cs | 51 +++++------ .../Jobs/DiscordNotificationJob.cs | 29 ++++-- .../Jobs/GithubWatcherJob.cs | 38 ++++---- .../Jobs/RssWatcherJob.cs | 72 +++++++-------- .../Newsbot.Collector.Services.csproj | 19 ++-- .../Discord/DiscordWebhookClient.cs | 18 ++-- .../Jobs/DiscordNotificationJobTest.cs | 5 + .../Jobs/GithubWatcherJobTests.cs | 17 +++- .../Jobs/RssWatcherJobTest.cs | 44 +++++---- .../Newsbot.Collector.Tests.csproj | 8 ++ .../Services/HtmlPageReaderTests.cs | 64 ++++++++++--- .../Tables/SourcesTableTests.cs | 4 +- Newsbot.Collector.Tests/UnitTest1.cs | 10 -- seed.ps1 | 33 ++++--- 24 files changed, 471 insertions(+), 351 deletions(-) delete mode 100644 Newsbot.Collector.Database/Migrations/20230305204112_seed.sql delete mode 100644 Newsbot.Collector.Domain/Consts/ConfigRedditConst.cs delete mode 100644 Newsbot.Collector.Tests/UnitTest1.cs diff --git a/Dockerfile b/Dockerfile index 6ed4123..1326dd3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -31,4 +31,4 @@ COPY --from=publish /app/build /app COPY --from=build ./app/Newsbot.Collector.Database/Migrations/ /app/migrations COPY --from=goose /go/bin/goose /app -ENTRYPOINT [ "dotnet", "Newsbot.Collector.Api.dll" ] \ No newline at end of file +CMD [ "dotnet", "Newsbot.Collector.Api.dll" ] \ No newline at end of file diff --git a/Newsbot.Collector.Api/Controllers/SourcesController.cs b/Newsbot.Collector.Api/Controllers/SourcesController.cs index 15bec9d..ad09835 100644 --- a/Newsbot.Collector.Api/Controllers/SourcesController.cs +++ b/Newsbot.Collector.Api/Controllers/SourcesController.cs @@ -5,6 +5,7 @@ using Newsbot.Collector.Domain.Consts; using Newsbot.Collector.Domain.Dto; using Newsbot.Collector.Domain.Interfaces; using Newsbot.Collector.Domain.Models; +using Newsbot.Collector.Services.HtmlParser; namespace Newsbot.Collector.Api.Controllers; @@ -12,6 +13,7 @@ namespace Newsbot.Collector.Api.Controllers; [Route("api/sources")] public class SourcesController : ControllerBase { + private readonly IIconsRepository _icons; private readonly ILogger _logger; //private readonly ConnectionStrings _settings; @@ -22,6 +24,7 @@ public class SourcesController : ControllerBase _logger = logger; //_settings = settings.Value; _sources = new SourcesTable(settings.Value.Database); + _icons = new IconsTable(settings.Value.Database); } [HttpGet(Name = "GetSources")] @@ -43,11 +46,19 @@ public class SourcesController : ControllerBase } [HttpPost("new/reddit")] - public SourceDto NewReddit(string name, string url) + public SourceDto NewReddit(string name) { var res = _sources.GetByNameAndType(name, SourceTypes.Reddit); if (res.ID != Guid.Empty) return SourceDto.Convert(res); + var uri = new Uri($"https://reddit.com/r/{name}"); + + var pageReader = new HtmlPageReader(new HtmlPageReaderOptions + { + Url = uri.ToString() + }); + pageReader.Parse(); + var item = _sources.New(new SourceModel { Site = SourceTypes.Reddit, @@ -55,9 +66,18 @@ public class SourcesController : ControllerBase Type = SourceTypes.Reddit, Source = "feed", Enabled = true, - Url = url, - Tags = $"{SourceTypes.Reddit}, {name}" + Url = uri.ToString(), + Tags = $"{SourceTypes.Reddit},{name}" }); + + // Not all subreddits have an Icon, so we only want to add a record when it has one. + if (pageReader.Data.Header.Image != "") + _icons.New(new IconModel + { + Id = Guid.NewGuid(), + FileName = pageReader.Data.Header.Image, + SourceId = item.ID + }); return SourceDto.Convert(item); } @@ -75,27 +95,41 @@ public class SourcesController : ControllerBase Source = "feed", Enabled = true, Url = url, - Tags = $"{SourceTypes.Rss}, {name}" + Tags = $"{SourceTypes.Rss},{name}" }; var item = _sources.New(m); return SourceDto.Convert(item); } [HttpPost("new/youtube")] - public SourceDto NewYoutube(string name, string url) + public SourceDto NewYoutube(string url) { - var res = _sources.GetByNameAndType(name, SourceTypes.YouTube); + var res = _sources.GetByUrl(url); if (res.ID != Guid.Empty) return SourceDto.Convert(res); + var htmlClient = new HtmlPageReader(new HtmlPageReaderOptions + { + Url = url + }); + htmlClient.Parse(); + var item = _sources.New(new SourceModel { Site = SourceTypes.YouTube, Type = SourceTypes.YouTube, - Name = name, + Name = htmlClient.Data.Header.Title, Source = "feed", - Url = url, + Url = "feed", Enabled = true, - Tags = $"{SourceTypes.YouTube}, {name}" + Tags = $"{SourceTypes.YouTube},{htmlClient.Data.Header.Title}", + YoutubeId = htmlClient.Data.Header.YoutubeChannelID ?? "" + }); + + _icons.New(new IconModel + { + Id = Guid.NewGuid(), + FileName = htmlClient.Data.Header.Image, + SourceId = item.ID }); return SourceDto.Convert(item); @@ -115,11 +149,48 @@ public class SourcesController : ControllerBase Url = $"https://twitch.tv/{name}", Source = "api", Enabled = true, - Tags = $"{SourceTypes.Twitch}, {name}" + Tags = $"{SourceTypes.Twitch},{name}" }); return SourceDto.Convert(item); } + [HttpPost("new/github")] + public SourceDto NewGithub(string url) + { + if (!url.Contains("github.com")) return new SourceDto(); + + var res = _sources.GetByUrl(url); + if (res.ID != Guid.Empty) return SourceDto.Convert(res); + + var slice = url.Split('/'); + + var pageReader = new HtmlPageReader(new HtmlPageReaderOptions + { + Url = url + }); + pageReader.Parse(); + + var item = _sources.New(new SourceModel + { + Site = SourceTypes.GitHub, + Type = SourceTypes.GitHub, + Name = $"{slice[3]}/{slice[4]}", + Url = url, + Source = "feed", + Enabled = true, + Tags = $"{SourceTypes.GitHub}, {slice[3]}, {slice[4]}" + }); + + _icons.New(new IconModel + { + Id = Guid.NewGuid(), + FileName = pageReader.Data.Header.Image, + SourceId = item.ID + }); + + return SourceDto.Convert(item); + } + [HttpGet("{id}")] public SourceDto GetById(Guid id) { diff --git a/Newsbot.Collector.Api/Program.cs b/Newsbot.Collector.Api/Program.cs index 3222673..4c6292f 100644 --- a/Newsbot.Collector.Api/Program.cs +++ b/Newsbot.Collector.Api/Program.cs @@ -2,10 +2,12 @@ using Hangfire; using Hangfire.MemoryStorage; using HealthChecks.UI.Client; using Microsoft.AspNetCore.Diagnostics.HealthChecks; +using Newsbot.Collector.Api; using Newsbot.Collector.Domain.Consts; using Newsbot.Collector.Domain.Models; -using Newsbot.Collector.Services.Jobs; +using Newsbot.Collector.Domain.Models.Config; using Serilog; + using ILogger = Serilog.ILogger; var builder = WebApplication.CreateBuilder(args); @@ -17,15 +19,7 @@ builder.Host.UseSerilog(); var config = GetConfiguration(); builder.Configuration.AddConfiguration(config); -Log.Logger = new LoggerConfiguration() - .WriteTo.Console() - .WriteTo.OpenTelemetry( - config.GetValue(ConfigConnectionStringConst.OpenTelemetry) ?? "", - resourceAttributes: new Dictionary - { - { "service.name", "newsbot-collector-api" } - }) - .CreateLogger(); +Log.Logger = GetLogger(config); Log.Information("Starting up"); // Configure Hangfire @@ -43,6 +37,11 @@ builder.Services.AddSwaggerGen(); builder.Services.Configure(config.GetSection("ConnectionStrings")); +builder.Services.Configure(config.GetSection(ConfigSectionsConst.ConnectionStrings)); +builder.Services.Configure(config.GetSection(ConfigSectionsConst.Rss)); +builder.Services.Configure(config.GetSection(ConfigSectionsConst.Youtube)); +//builder.Services.Configure< + var app = builder.Build(); // Configure the HTTP request pipeline. @@ -55,7 +54,7 @@ if (config.GetValue("EnableSwagger")) app.UseHttpsRedirection(); app.UseHangfireDashboard(); -SetupRecurringJobs(config, Log.Logger); +BackgroundJobs.SetupRecurringJobs(config); app.UseAuthorization(); @@ -77,21 +76,22 @@ static IConfiguration GetConfiguration() .Build(); } -static void SetupRecurringJobs(IConfiguration configuration, ILogger logger) +static ILogger GetLogger(IConfiguration configuration) { - //RecurringJob.AddOrUpdate("Example", x => x.InitAndExecute(new HelloWorldJobOptions - //{ - // Message = "Hello from the background!" - //}), "0/1 * * * *"); + var otel = configuration.GetValue(ConfigConnectionStringConst.OpenTelemetry) ?? ""; - RecurringJob.AddOrUpdate("RSS", x => x.InitAndExecute(new RssWatcherJobOptions - { - ConnectionString = configuration.GetSection(ConfigConnectionStringConst.Database).Value ?? "" - }), "15 0-23 * * *"); + if (otel == "") + return Log.Logger = new LoggerConfiguration() + .WriteTo.Console() + .CreateLogger(); - RecurringJob.AddOrUpdate("Discord Alerts", x => - x.InitAndExecute(new DiscordNotificationJobOptions - { - DatabaseConnectionString = configuration.GetSection(ConfigConnectionStringConst.Database).Value ?? "" - }), "5/10 * * * *"); + return Log.Logger = new LoggerConfiguration() + .WriteTo.Console() + .WriteTo.OpenTelemetry( + otel, + resourceAttributes: new Dictionary + { + { "service.name", "newsbot-collector-api" } + }) + .CreateLogger(); } \ No newline at end of file diff --git a/Newsbot.Collector.Database/Migrations/20220529082459_seed.sql b/Newsbot.Collector.Database/Migrations/20220529082459_seed.sql index 279f8cd..7b257c0 100644 --- a/Newsbot.Collector.Database/Migrations/20220529082459_seed.sql +++ b/Newsbot.Collector.Database/Migrations/20220529082459_seed.sql @@ -6,45 +6,27 @@ SELECT 'up SQL query'; CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; -- Final Fantasy XIV Entries -INSERT INTO sources VALUES -(uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - NA', 'scrape', 'ffxiv', 'a', TRUE, 'https://na.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, na, lodestone'); -INSERT INTO sources VALUES -(uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - JP', 'scrape', 'ffxiv', 'a', FALSE, 'https://jp.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, jp, lodestone'); -INSERT INTO sources VALUES -(uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - EU', 'scrape', 'ffxiv', 'a', FALSE, 'https://eu.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, eu, lodestone'); -INSERT INTO sources VALUES -(uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - FR', 'scrape', 'ffxiv', 'a', FALSE, 'https://fr.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, fr, lodestone'); -INSERT INTO sources VALUES -(uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - DE', 'scrape', 'ffxiv', 'a', FALSE, 'https://de.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, de, lodestone'); - --- Reddit Entries -INSERT INTO sources VALUES -(uuid_generate_v4(), 'reddit', 'dadjokes', 'feed', 'reddit', 'a', TRUE, 'https://reddit.com/r/dadjokes', 'reddit, dadjokes'); -INSERT INTO sources VALUES -(uuid_generate_v4(), 'reddit', 'steamdeck', 'feed', 'reddit', 'a', TRUE, 'https://reddit.com/r/steamdeck', 'reddit, steam deck, steam, deck'); - --- Youtube Entries -INSERT INTO sources VALUES -(uuid_generate_v4(), 'youtube', 'Game Grumps', 'feed', 'youtube', 'a', TRUE, 'https://www.youtube.com/user/GameGrumps', 'youtube, game grumps, game, grumps'); - --- RSS Entries -INSERT INTO sources VALUES -(uuid_generate_v4(), 'steampowered', 'steam deck', 'feed', 'rss', 'a', TRUE, 'https://store.steampowered.com/feeds/news/app/1675200/?cc=US&l=english&snr=1_2108_9__2107', 'rss, steampowered, steam, deck, steam deck'); - --- Twitch Entries -INSERT INTO sources VALUES -(uuid_generate_v4(), 'twitch', 'Nintendo', 'api', 'twitch', 'a', TRUE, 'https://twitch.tv/nintendo', 'twitch, nintendo'); - +INSERT INTO sources +VALUES (uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - NA', 'scrape', 'ffxiv', 'a', TRUE, + 'https://na.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, na, lodestone'); +INSERT INTO sources +VALUES (uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - JP', 'scrape', 'ffxiv', 'a', FALSE, + 'https://jp.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, jp, lodestone'); +INSERT INTO sources +VALUES (uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - EU', 'scrape', 'ffxiv', 'a', FALSE, + 'https://eu.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, eu, lodestone'); +INSERT INTO sources +VALUES (uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - FR', 'scrape', 'ffxiv', 'a', FALSE, + 'https://fr.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, fr, lodestone'); +INSERT INTO sources +VALUES (uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - DE', 'scrape', 'ffxiv', 'a', FALSE, + 'https://de.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, de, lodestone'); -- +goose StatementEnd -- +goose Down -- +goose StatementBegin --SELECT 'down SQL query'; - -DELETE FROM sources where source = 'reddit' and name = 'dadjokes'; -DELETE FROM sources where source = 'reddit' and name = 'steamdeck'; -DELETE FROM sources where source = 'ffxiv'; -DELETE FROM sources WHERE source = 'twitch' and name = 'Nintendo'; -DELETE FROM sources WHERE source = 'youtube' and name = 'Game Grumps'; -DELETE FROM SOURCES WHERE source = 'rss' and name = 'steam deck'; +DELETE +FROM sources +where source = 'ffxiv'; -- +goose StatementEnd diff --git a/Newsbot.Collector.Database/Migrations/20230305204112_seed.sql b/Newsbot.Collector.Database/Migrations/20230305204112_seed.sql deleted file mode 100644 index 2429119..0000000 --- a/Newsbot.Collector.Database/Migrations/20230305204112_seed.sql +++ /dev/null @@ -1,20 +0,0 @@ --- +goose Up --- +goose StatementBegin -INSERT INTO sources VALUES ( - uuid_generate_v4(), - 'rss', - 'Let''s Mosley', - 'feed', - 'rss', - 'podcast', - TRUE, - 'https://anchor.fm/s/6c7aa4c4/podcast/rss', - 'rss,let''s mosley,fitnes,coach', - FALSE); - --- +goose StatementEnd - --- +goose Down --- +goose StatementBegin -DELETE FROM sources Where type = 'rss' And Name = 'Let''s Mosley' --- +goose StatementEnd diff --git a/Newsbot.Collector.Database/Repositories/SourcesTable.cs b/Newsbot.Collector.Database/Repositories/SourcesTable.cs index 4657f68..62e5e69 100644 --- a/Newsbot.Collector.Database/Repositories/SourcesTable.cs +++ b/Newsbot.Collector.Database/Repositories/SourcesTable.cs @@ -9,7 +9,7 @@ namespace Newsbot.Collector.Database.Repositories; public class SourcesTable : ISourcesRepository { - private string _connectionString; + private readonly string _connectionString; public SourcesTable(string connectionString) { @@ -19,25 +19,16 @@ public class SourcesTable : ISourcesRepository public SourcesTable(IConfiguration configuration) { var connstr = configuration.GetConnectionString("database"); - if (connstr is null) - { - connstr = ""; - } + if (connstr is null) connstr = ""; _connectionString = connstr; } - private IDbConnection OpenConnection(string connectionString) - { - var conn = new NpgsqlConnection(_connectionString); - conn.Open(); - return conn; - } - public SourceModel New(SourceModel model) { model.ID = Guid.NewGuid(); using var conn = OpenConnection(_connectionString); - var query = "Insert Into Sources (ID, Site, Name, Source, Type, Value, Enabled, Url, Tags) Values (@id ,@site,@name,@source,@type,@value,@enabled,@url,@tags);"; + var query = + "Insert Into Sources (ID, Site, Name, Source, Type, Value, Enabled, Url, Tags, YoutubeId) Values (@id ,@site,@name,@source,@type,@value,@enabled,@url,@tags,@youtubeid);"; conn.Execute(query, new { id = model.ID, @@ -48,7 +39,8 @@ public class SourcesTable : ISourcesRepository model.Value, model.Enabled, model.Url, - model.Tags + model.Tags, + model.YoutubeId }); return model; } @@ -61,10 +53,7 @@ public class SourcesTable : ISourcesRepository { id = ID }); - if (res.Count() == 0) - { - return new SourceModel(); - } + if (res.Count() == 0) return new SourceModel(); return res.First(); } @@ -83,10 +72,7 @@ public class SourcesTable : ISourcesRepository name = Name }); - if (res.Count() == 0) - { - return new SourceModel(); - } + if (res.Count() == 0) return new SourceModel(); return res.First(); } @@ -96,14 +82,24 @@ public class SourcesTable : ISourcesRepository var query = "Select * from Sources WHERE name = @name and type = @type;"; var res = conn.Query(query, new { - name = name, - type = type + name, type }); - if (res.Count() == 0) + if (res.Count() == 0) return new SourceModel(); + return res.First(); + } + + public SourceModel GetByUrl(string url) + { + using var conn = OpenConnection(_connectionString); + var query = "Select * from Sources WHERE url = @url;"; + var res = conn.Query(query, new { - return new SourceModel(); - } + url + }); + + if (res.ToList().Count == 0) return new SourceModel(); + return res.First(); } @@ -115,8 +111,7 @@ public class SourcesTable : ISourcesRepository Fetch Next @count Rows Only;"; return conn.Query(query, new { - page = page * count, - count = count + page = page * count, count }).ToList(); } @@ -126,8 +121,7 @@ public class SourcesTable : ISourcesRepository var query = "Select * From Sources where Source = @source Limit @limit;"; return conn.Query(query, new { - source = source, - limit = limit + source, limit }).ToList(); } @@ -137,28 +131,44 @@ public class SourcesTable : ISourcesRepository var query = "Select * From Sources where Type = @type Limit @limit;"; return conn.Query(query, new { - type = type, - limit = limit + type, limit }).ToList(); } - public int Disable(Guid ID) + + public int Disable(Guid id) { using var conn = OpenConnection(_connectionString); var query = "Update Sources Set Enabled = FALSE where ID = @id;"; return conn.Execute(query, new { - id = ID + id }); } - - public int Enable(Guid ID) + + public int Enable(Guid id) { using var conn = OpenConnection(_connectionString); var query = "Update Sources Set Enabled = TRUE where ID = @id;"; return conn.Execute(query, new { - id = ID + id }); } + public int UpdateYoutubeId(Guid id, string youtubeId) + { + using var conn = OpenConnection(_connectionString); + var query = "Update Sources Set youtubeid = @youtubeId where ID = @id;"; + return conn.Execute(query, new + { + id, youtubeId + }); + } + + private IDbConnection OpenConnection(string connectionString) + { + var conn = new NpgsqlConnection(_connectionString); + conn.Open(); + return conn; + } } \ No newline at end of file diff --git a/Newsbot.Collector.Domain/Consts/ConfigRedditConst.cs b/Newsbot.Collector.Domain/Consts/ConfigRedditConst.cs deleted file mode 100644 index a40031b..0000000 --- a/Newsbot.Collector.Domain/Consts/ConfigRedditConst.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace Newsbot.Collector.Domain.Consts; - -public class ConfigRedditConst -{ - public const string IsEnabled = "Reddit:IsEnabled"; - public const string PullHot = "Reddit:PullHot"; - public const string PullNsfw = "Reddit:PullNsfw"; - public const string PullTop = "Reddit:PullTop"; -} \ No newline at end of file diff --git a/Newsbot.Collector.Domain/Interfaces/ISourcesRepository.cs b/Newsbot.Collector.Domain/Interfaces/ISourcesRepository.cs index 1fa8527..8db6a82 100644 --- a/Newsbot.Collector.Domain/Interfaces/ISourcesRepository.cs +++ b/Newsbot.Collector.Domain/Interfaces/ISourcesRepository.cs @@ -1,4 +1,3 @@ -using System.Globalization; using Newsbot.Collector.Domain.Models; namespace Newsbot.Collector.Domain.Interfaces; @@ -10,9 +9,11 @@ public interface ISourcesRepository public SourceModel GetByID(string ID); public SourceModel GetByName(string name); public SourceModel GetByNameAndType(string name, string type); + SourceModel GetByUrl(string url); public List List(int page, int count); public List ListBySource(string source, int limit); public List ListByType(string type, int limit = 25); - public int Disable(Guid ID); - public int Enable(Guid ID); + public int Disable(Guid id); + public int Enable(Guid id); + public int UpdateYoutubeId(Guid id, string youtubeId); } \ No newline at end of file diff --git a/Newsbot.Collector.Domain/Models/DatabaseModel.cs b/Newsbot.Collector.Domain/Models/DatabaseModel.cs index 8b48619..e8c5080 100644 --- a/Newsbot.Collector.Domain/Models/DatabaseModel.cs +++ b/Newsbot.Collector.Domain/Models/DatabaseModel.cs @@ -42,9 +42,10 @@ public class DiscordWebHookModel public class IconModel { - public Guid ID { get; set; } + public Guid Id { get; set; } public string FileName { get; set; } = ""; public string Site { get; set; } = ""; + public Guid SourceId { get; set; } } public class SettingModel @@ -61,7 +62,7 @@ public class SourceModel public string Site { get; set; } = ""; public string Name { get; set; } = ""; - // Source use to deinfe the worker to query with but moving to Type as it was not used really. + // Source use to define the worker to query with but moving to Type as it was not used really. public string Source { get; set; } = ""; public string Type { get; set; } = ""; public string Value { get; set; } = ""; @@ -69,6 +70,7 @@ public class SourceModel public string Url { get; set; } = ""; public string Tags { get; set; } = ""; public bool Deleted { get; set; } + public string YoutubeId { get; set; } = ""; } public class SubscriptionModel diff --git a/Newsbot.Collector.Services/HtmlParser/HeadParserClient.cs b/Newsbot.Collector.Services/HtmlParser/HeadParserClient.cs index 9224c23..2296adb 100644 --- a/Newsbot.Collector.Services/HtmlParser/HeadParserClient.cs +++ b/Newsbot.Collector.Services/HtmlParser/HeadParserClient.cs @@ -5,12 +5,11 @@ namespace Newsbot.Collector.Services.HtmlParser; public class HeadParserClient { - private const string XPathMetaTag = "//head/meta"; + private const string XPathHeadMetaTag = "//head/meta"; + private const string XPathBodyMetaTag = "//body/meta"; private const string XPathLinkTag = "//head/link"; - public HeadParserModel Data { get; set; } - - private string _htmlContent; + private readonly string _htmlContent; public HeadParserClient(string htmlContent, bool useBrowser = false) { @@ -18,6 +17,8 @@ public class HeadParserClient Data = new HeadParserModel(); } + public HeadParserModel Data { get; set; } + public void Parse() { Data.Title = GetMetaTitle(); @@ -36,9 +37,23 @@ public class HeadParserClient var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(_htmlContent); - var tags = htmlDoc.DocumentNode.SelectNodes(XPathMetaTag).ToList(); + var allTags = new List(); - return tags; + var headerTags = htmlDoc.DocumentNode.SelectNodes(XPathHeadMetaTag).ToList(); + allTags.AddRange(headerTags); + + try + { + var bodyTags = htmlDoc.DocumentNode.SelectNodes(XPathBodyMetaTag).ToList(); + allTags.AddRange(bodyTags); + } + catch + { + // no tags found in the body and that's ok. + // we check the body thanks to Youtube. + } + + return allTags; } private List CollectLinkTags() @@ -53,13 +68,13 @@ public class HeadParserClient { foreach (var meta in html) { + if (meta.Attributes.Count == 0) continue; + ; //Console.WriteLine($"Name={meta.Attributes[0].Name} & Value={meta.Attributes[0].Value}"); - if (meta.Attributes[0].Value.Contains(Tag) == false) - { - continue; - } + if (meta.Attributes[0].Value.Contains(Tag) == false) continue; return meta.Attributes[1].Value; } + return ""; } @@ -68,91 +83,86 @@ public class HeadParserClient foreach (var tag in tags) { var res = GetTagValue(tag, htmlTags); - if (res == "") - { - continue; - } + if (res == "") continue; return res; } + return ""; } public string GetMetaTitle() { var htmlTags = CollectMetaTags(); - string[] tags = new string[] { "twitter:title", "og:title", "title" }; + string[] tags = { "twitter:title", "og:title", "title" }; return FindFirstResult(tags, htmlTags); } public string GetMetaDescription() { var htmlTags = CollectMetaTags(); - string[] tags = new string[] { "twitter:description", "og:description", "description" }; + string[] tags = { "twitter:description", "og:description", "description" }; return FindFirstResult(tags, htmlTags); } public string GetMetaImage() { var htmlTags = CollectMetaTags(); - string[] tags = new string[] { "twitter:image", "og:image", "image" }; + string[] tags = { "twitter:image", "og:image", "image" }; return FindFirstResult(tags, htmlTags); } public string GetMetaUrl() { var htmlTags = CollectMetaTags(); - string[] tags = new string[] { "twitter:url", "og:url", "url" }; + string[] tags = { "twitter:url", "og:url", "url" }; return FindFirstResult(tags, htmlTags); } public string GetMetaPageType() { var htmlTags = CollectMetaTags(); - string[] tags = new string[] { "og:type", "type" }; + string[] tags = { "og:type", "type" }; return FindFirstResult(tags, htmlTags); } public string GetMetaColorTheme() { var htmlTags = CollectMetaTags(); - string[] tags = new string[] { "theme-color" }; + string[] tags = { "theme-color" }; return FindFirstResult(tags, htmlTags); } public string GetYouTubeChannelId() { var htmlTags = CollectMetaTags(); - string[] tags = new string[] { "channelId" }; - return FindFirstResult(tags, htmlTags); + string[] tags = { "og:url", "channelId" }; + var results = FindFirstResult(tags, htmlTags); + var id = results.Replace("https://www.youtube.com/channel/", ""); + return id; } /// - /// This will parse the headers looking for known keys that will contain a RSS feed link. - /// If the feed is not found, this will throw an exception (MissingHeaderValueException). + /// This will parse the headers looking for known keys that will contain a RSS feed link. + /// If the feed is not found, this will throw an exception (MissingHeaderValueException). /// /// public string GetSiteFeed() { var htmlTags = CollectLinkTags(); - var tags = new string[] { "alternate" }; + var tags = new[] { "alternate" }; try { var attr = FindFirstAttribute(tags, htmlTags); foreach (var item in attr) { - if (item.Name != "href") - { - continue; - } + if (item.Name != "href") continue; var uri = item.Value; - if (uri.StartsWith("//")) - { - uri = uri.Replace("//", "https://"); - } + if (uri.StartsWith("//")) uri = uri.Replace("//", "https://"); return uri; } + return ""; } catch @@ -165,7 +175,6 @@ public class HeadParserClient private HtmlAttributeCollection FindFirstAttribute(string[] tags, List htmlTags) { foreach (var tag in tags) - { try { var res = GetValidAttribute(tag, htmlTags); @@ -175,7 +184,7 @@ public class HeadParserClient { // Nothing was found in the given tag but we will keep looking till we finish all the entries. } - } + throw new MissingHeaderValueException("Unable to find the requested value"); } @@ -183,12 +192,10 @@ public class HeadParserClient { foreach (var meta in html) { - if (meta.Attributes[0].Value.Contains(Tag) == false) - { - continue; - } + if (meta.Attributes[0].Value.Contains(Tag) == false) continue; return meta.Attributes; } + throw new MissingHeaderValueException("Site does not expose requested tag."); } } \ No newline at end of file diff --git a/Newsbot.Collector.Services/HtmlParser/HtmlPageReader.cs b/Newsbot.Collector.Services/HtmlParser/HtmlPageReader.cs index d138171..7b36f3d 100644 --- a/Newsbot.Collector.Services/HtmlParser/HtmlPageReader.cs +++ b/Newsbot.Collector.Services/HtmlParser/HtmlPageReader.cs @@ -1,25 +1,32 @@ using HtmlAgilityPack; -using Newsbot.Collector.Domain.Exceptions; namespace Newsbot.Collector.Services.HtmlParser; +public class HtmlPageReaderOptions +{ + public string? Url { get; init; } + public string? SourceCode { get; init; } +} + public class HtmlPageReader { + private readonly HeadParserClient _headClient; + private readonly string _siteContent; - public HtmlData Data { get; set; } - - private HeadParserClient _headClient; - - private string _siteContent; - - public HtmlPageReader(string pageUrl) + public HtmlPageReader(HtmlPageReaderOptions options) { - _siteContent = ReadSiteContent(pageUrl); - _headClient = new HeadParserClient(_siteContent); + if (options.SourceCode is not null) _siteContent = options.SourceCode; + if (options.Url is not null) _siteContent = ReadSiteContent(options.Url); + + if (_siteContent is null) throw new Exception("SiteContent was not filled and expected."); + + _headClient = new HeadParserClient(_siteContent); Data = new HtmlData(); } + public HtmlData Data { get; set; } + public void Parse() { _headClient.Parse(); @@ -32,7 +39,7 @@ public class HtmlPageReader var html = client.GetStringAsync(url); html.Wait(); - var content = html.Result.ToString(); + var content = html.Result; return content; } @@ -47,24 +54,14 @@ public class HtmlPageReader htmlDoc.LoadHtml(_siteContent); var links = htmlDoc.DocumentNode.SelectNodes("//div[contains(@class, 'article-text')]").ToList(); - if (links.Count == 0) - { - throw new Exception("Unable to parse body. Tag is unkown."); - } + if (links.Count == 0) throw new Exception("Unable to parse body. Tag is unknown."); - if (links.Count >= 2) - { - throw new Exception("Too many results back for the body"); - } + if (links.Count >= 2) throw new Exception("Too many results back for the body"); - var content = new List(); - foreach (var item in links[0].ChildNodes) - { - if (item.Name == "p") - { - content.Add(item.InnerText); - } - } + //var content = new List(); + //foreach (var item in links[0].ChildNodes) + // if (item.Name == "p") + // content.Add(item.InnerText); return links; } diff --git a/Newsbot.Collector.Services/Jobs/DiscordNotificationJob.cs b/Newsbot.Collector.Services/Jobs/DiscordNotificationJob.cs index f7ab662..de59554 100644 --- a/Newsbot.Collector.Services/Jobs/DiscordNotificationJob.cs +++ b/Newsbot.Collector.Services/Jobs/DiscordNotificationJob.cs @@ -1,6 +1,7 @@ using Newsbot.Collector.Database.Repositories; using Newsbot.Collector.Domain.Interfaces; using Newsbot.Collector.Domain.Models; +using Newsbot.Collector.Domain.Models.Config; using Newsbot.Collector.Services.Notifications.Discord; using Serilog; @@ -8,7 +9,9 @@ namespace Newsbot.Collector.Services.Jobs; public class DiscordNotificationJobOptions { - public string? DatabaseConnectionString { get; set; } + public ConfigSectionConnectionStrings? ConnectionStrings { get; set; } + public ConfigSectionNotificationsDiscord? Config { get; set; } + } public class DiscordNotificationJob @@ -19,6 +22,7 @@ public class DiscordNotificationJob private IDiscordWebHooksRepository _webhook; private ISourcesRepository _sources; private ISubscriptionRepository _subs; + private IIconsRepository _icons; public DiscordNotificationJob() { @@ -27,16 +31,22 @@ public class DiscordNotificationJob _webhook = new DiscordWebhooksTable(""); _sources = new SourcesTable(""); _subs = new SubscriptionsTable(""); + _icons = new IconsTable(""); } public void InitAndExecute(DiscordNotificationJobOptions options) { - _queue = new DiscordQueueTable(options.DatabaseConnectionString ?? ""); - _article = new ArticlesTable(options.DatabaseConnectionString ?? ""); - _webhook = new DiscordWebhooksTable(options.DatabaseConnectionString ?? ""); - _sources = new SourcesTable(options.DatabaseConnectionString ?? ""); - _subs = new SubscriptionsTable(options.DatabaseConnectionString ?? ""); + options.ConnectionStrings ??= new ConfigSectionConnectionStrings(); + options.Config ??= new ConfigSectionNotificationsDiscord(); + + _queue = new DiscordQueueTable(options.ConnectionStrings.Database ?? ""); + _article = new ArticlesTable(options.ConnectionStrings.Database ?? ""); + _webhook = new DiscordWebhooksTable(options.ConnectionStrings.Database ?? ""); + _sources = new SourcesTable(options.ConnectionStrings.Database ?? ""); + _subs = new SubscriptionsTable(options.ConnectionStrings.Database ?? ""); + _icons = new IconsTable(options.ConnectionStrings.Database ?? ""); + Execute(); } @@ -59,6 +69,8 @@ public class DiscordNotificationJob continue; } + var sourceIcon = _icons.GetBySourceId(sourceDetails.ID); + // Find all the subscriptions for that source var allSubscriptions = _subs.ListBySourceID(sourceDetails.ID); @@ -74,7 +86,7 @@ public class DiscordNotificationJob var client = new DiscordWebhookClient(discordDetails.Url); try { - client.SendMessage(GenerateDiscordMessage(sourceDetails, articleDetails)); + client.SendMessage(GenerateDiscordMessage(sourceDetails, articleDetails, sourceIcon)); } catch (Exception e) { @@ -89,7 +101,7 @@ public class DiscordNotificationJob } } - public DiscordMessage GenerateDiscordMessage(SourceModel source, ArticlesModel article) + public DiscordMessage GenerateDiscordMessage(SourceModel source, ArticlesModel article, IconModel icon) { var embed = new DiscordMessageEmbed { @@ -99,6 +111,7 @@ public class DiscordNotificationJob Author = new DiscordMessageEmbedAuthor { Name = article.AuthorName, + IconUrl = icon.FileName }, Footer = new DiscordMessageEmbedFooter { diff --git a/Newsbot.Collector.Services/Jobs/GithubWatcherJob.cs b/Newsbot.Collector.Services/Jobs/GithubWatcherJob.cs index be61d52..4ae819f 100644 --- a/Newsbot.Collector.Services/Jobs/GithubWatcherJob.cs +++ b/Newsbot.Collector.Services/Jobs/GithubWatcherJob.cs @@ -1,18 +1,23 @@ using System.ServiceModel.Syndication; using System.Xml; using Newsbot.Collector.Database.Repositories; +using Newsbot.Collector.Domain.Consts; using Newsbot.Collector.Domain.Interfaces; using Newsbot.Collector.Domain.Models; +using Newsbot.Collector.Domain.Models.Config; using Newsbot.Collector.Services.HtmlParser; namespace Newsbot.Collector.Services.Jobs; public class GithubWatcherJobOptions { - public string ConnectionString { get; set; } = ""; + public ConfigSectionConnectionStrings? ConnectionStrings { get; set; } + + //public string ConnectionString { get; set; } = ""; public bool FeaturePullReleases { get; set; } = false; + public bool FeaturePullCommits { get; set; } = false; - public bool PullIssues { get; set; } = false; + //public bool PullIssues { get; set; } = false; } public class GithubWatcherJob @@ -28,24 +33,24 @@ public class GithubWatcherJob _source = new SourcesTable(""); } - public void Init(GithubWatcherJobOptions options) - { - _articles = new ArticlesTable(options.ConnectionString); - _queue = new DiscordQueueTable(options.ConnectionString); - _source = new SourcesTable(options.ConnectionString); - } - public void InitAndExecute(GithubWatcherJobOptions options) { - Init(options); + options.ConnectionStrings ??= new ConfigSectionConnectionStrings(); + _articles = new ArticlesTable(options.ConnectionStrings.Database ?? ""); + _queue = new DiscordQueueTable(options.ConnectionStrings.Database ?? ""); + _source = new SourcesTable(options.ConnectionStrings.Database ?? ""); + Execute(); } private void Execute() { + _source.ListBySource(SourceTypes.GitHub, 25); + // query sources for things to pull var items = new List(); + items.AddRange(Collect(new Uri("https://github.com/jtom38/dvb"))); // query */commits/master.atom @@ -56,7 +61,7 @@ public class GithubWatcherJob { var items = new List(); - Guid placeHolderId = Guid.NewGuid(); + var placeHolderId = Guid.NewGuid(); // query */release.atom items.AddRange(CollectItems($"{url.AbsoluteUri}/releases.atom", placeHolderId)); items.AddRange(CollectItems($"{url.AbsoluteUri}/master.atom", placeHolderId)); @@ -75,12 +80,12 @@ public class GithubWatcherJob { var itemUrl = item.Links[0].Uri.AbsoluteUri; var exits = _articles.GetByUrl(itemUrl); - if (exits.ID != Guid.Empty) - { - continue; - } + if (exits.ID != Guid.Empty) continue; - var parser = new HtmlPageReader(itemUrl); + var parser = new HtmlPageReader(new HtmlPageReaderOptions + { + Url = itemUrl + }); parser.Parse(); try @@ -104,6 +109,7 @@ public class GithubWatcherJob Console.WriteLine(e); } } + return items; } } \ No newline at end of file diff --git a/Newsbot.Collector.Services/Jobs/RssWatcherJob.cs b/Newsbot.Collector.Services/Jobs/RssWatcherJob.cs index 4436837..65cff03 100644 --- a/Newsbot.Collector.Services/Jobs/RssWatcherJob.cs +++ b/Newsbot.Collector.Services/Jobs/RssWatcherJob.cs @@ -4,6 +4,7 @@ using Newsbot.Collector.Database.Repositories; using Newsbot.Collector.Domain.Consts; using Newsbot.Collector.Domain.Interfaces; using Newsbot.Collector.Domain.Models; +using Newsbot.Collector.Domain.Models.Config; using Newsbot.Collector.Services.HtmlParser; using Serilog; @@ -11,13 +12,18 @@ namespace Newsbot.Collector.Services.Jobs; public class RssWatcherJobOptions { - public string? ConnectionString { get; init; } - public string? OpenTelemetry { get; init; } + //public string? ConnectionString { get; init; } + //public string? OpenTelemetry { get; init; } + + public ConfigSectionConnectionStrings? ConnectionStrings { get; set; } + public ConfigSectionRssModel? Config { get; set; } } // This class was made to work with Hangfire and it does not support constructors. public class RssWatcherJob { + private const string JobName = "RssWatcherJob"; + private IArticlesRepository _articles; private ILogger _logger; private IDiscordQueueRepository _queue; @@ -28,62 +34,53 @@ public class RssWatcherJob _articles = new ArticlesTable(""); _queue = new DiscordQueueTable(""); _source = new SourcesTable(""); - _logger = GetLogger(""); + _logger = JobLogger.GetLogger("", JobName); } public void InitAndExecute(RssWatcherJobOptions options) { - Init(options); + options.ConnectionStrings ??= new ConfigSectionConnectionStrings(); + options.Config ??= new ConfigSectionRssModel(); - _logger.Information("RssWatcherJob - Job was triggered"); - _logger.Information("RssWatcherJob - Setting up the job"); + _articles = new ArticlesTable(options.ConnectionStrings.Database ?? ""); + _queue = new DiscordQueueTable(options.ConnectionStrings.Database ?? ""); + _source = new SourcesTable(options.ConnectionStrings.Database ?? ""); + _logger = JobLogger.GetLogger(options.ConnectionStrings.OpenTelemetry ?? "", JobName); + + _logger.Information($"{JobName} - Job was triggered"); + if (!options.Config.IsEnabled) + { + _logger.Information($"{JobName} - Going to exit because feature flag is off."); + return; + } + + _logger.Information($"{JobName} - Setting up the job"); Execute(); } - private ILogger GetLogger(string connectionString) - { - return Log.Logger = new LoggerConfiguration() - .WriteTo.Console() - .WriteTo.OpenTelemetry( - connectionString, - resourceAttributes: new Dictionary - { - { "service.name", "newsbot-collector-api" }, - { "Job", "RssWatcherJob" } - }) - .CreateLogger(); - } - - public void Init(RssWatcherJobOptions options) - { - _articles = new ArticlesTable(options.ConnectionString ?? ""); - _queue = new DiscordQueueTable(options.ConnectionString ?? ""); - _source = new SourcesTable(options.ConnectionString ?? ""); - _logger = GetLogger(options.OpenTelemetry ?? ""); - } - public void Execute() { var articles = new List(); - _logger.Information("RssWatcherJob - Requesting sources"); + _logger.Information($"{JobName} - Requesting sources"); var sources = _source.ListByType(SourceTypes.Rss); - _logger.Information($"RssWatcherJob - Got {sources.Count} back"); + _logger.Information($"{JobName} - Got {sources.Count} back"); + foreach (var source in sources) { - _logger.Information($"RssWatcherJob - Starting to process '{source.Name}'"); - _logger.Information("RssWatcherJob - Starting to request feed to be processed"); + _logger.Information($"{JobName} - Starting to process '{source.Name}'"); + _logger.Information($"{JobName} - Starting to request feed to be processed"); var results = Collect(source.Url, source.ID); - _logger.Information($"RssWatcherJob - Collected {results.Count} posts"); + _logger.Information($"{JobName} - Collected {results.Count} posts"); articles.AddRange(results); } - _logger.Information("RssWatcherJob - Sending posts over to the database"); + _logger.Information($"{JobName} - Sending posts over to the database"); UpdateDatabase(articles); - _logger.Information("RssWatcherJob - Done!"); + _logger.Information($"{JobName} - Done!"); } public List Collect(string url, Guid sourceId, int sleep = 3000) @@ -101,7 +98,10 @@ public class RssWatcherJob // If we have, skip and save the site bandwidth if (IsThisUrlKnown(articleUrl)) continue; - var meta = new HtmlPageReader(articleUrl); + var meta = new HtmlPageReader(new HtmlPageReaderOptions + { + Url = articleUrl + }); meta.Parse(); var article = new ArticlesModel diff --git a/Newsbot.Collector.Services/Newsbot.Collector.Services.csproj b/Newsbot.Collector.Services/Newsbot.Collector.Services.csproj index 6501693..98a3e25 100644 --- a/Newsbot.Collector.Services/Newsbot.Collector.Services.csproj +++ b/Newsbot.Collector.Services/Newsbot.Collector.Services.csproj @@ -1,19 +1,18 @@ - - + + - - - - - - - - + + + + + + + diff --git a/Newsbot.Collector.Services/Notifications/Discord/DiscordWebhookClient.cs b/Newsbot.Collector.Services/Notifications/Discord/DiscordWebhookClient.cs index 54642e2..015fb2b 100644 --- a/Newsbot.Collector.Services/Notifications/Discord/DiscordWebhookClient.cs +++ b/Newsbot.Collector.Services/Notifications/Discord/DiscordWebhookClient.cs @@ -8,12 +8,11 @@ namespace Newsbot.Collector.Services.Notifications.Discord; public class DiscordWebhookClient : IDiscordNotificatioClient { - - private string[] _webhooks; + private readonly string[] _webhooks; public DiscordWebhookClient(string webhook) { - _webhooks = new string[] { webhook }; + _webhooks = new[] { webhook }; } public DiscordWebhookClient(string[] webhooks) @@ -23,25 +22,22 @@ public class DiscordWebhookClient : IDiscordNotificatioClient public void SendMessage(DiscordMessage payload) { - if (payload.Embeds is not null) - { - MessageValidation.IsEmbedFooterValid(payload.Embeds); - } + if (payload.Embeds is not null) MessageValidation.IsEmbedFooterValid(payload.Embeds); foreach (var webhook in _webhooks) { - var jsonRaw = JsonConvert.SerializeObject(payload, Newtonsoft.Json.Formatting.None, new JsonSerializerSettings { NullValueHandling = NullValueHandling.Ignore }); + var jsonRaw = JsonConvert.SerializeObject(payload, Formatting.None, + new JsonSerializerSettings { NullValueHandling = NullValueHandling.Ignore }); using StringContent jsonContent = new(jsonRaw, Encoding.UTF8, "application/json"); using var client = new HttpClient(); var resp = client.PostAsync(webhook, jsonContent); resp.Wait(); + // can be 204 or a message, might be 200 + Console.WriteLine(resp.Result.StatusCode); if (resp.Result.StatusCode != HttpStatusCode.NoContent) - { throw new Exception("Message was not accepted by the sever."); - } } } - } \ No newline at end of file diff --git a/Newsbot.Collector.Tests/Jobs/DiscordNotificationJobTest.cs b/Newsbot.Collector.Tests/Jobs/DiscordNotificationJobTest.cs index 38175ac..d579f8b 100644 --- a/Newsbot.Collector.Tests/Jobs/DiscordNotificationJobTest.cs +++ b/Newsbot.Collector.Tests/Jobs/DiscordNotificationJobTest.cs @@ -34,6 +34,11 @@ public class DiscordNotificationJobTest Thumbnail = "https://cdn.arstechnica.net/wp-content/uploads/2023/03/GettyImages-944827400-800x534.jpg", Description = "Please work", AuthorName = "No one knows" + }, + new IconModel + { + Id = Guid.NewGuid(), + FileName = "https://www.redditstatic.com/desktop2x/img/favicon/android-icon-192x192.png" }); webhookClient.SendMessage(msg); } diff --git a/Newsbot.Collector.Tests/Jobs/GithubWatcherJobTests.cs b/Newsbot.Collector.Tests/Jobs/GithubWatcherJobTests.cs index b916d73..445bae9 100644 --- a/Newsbot.Collector.Tests/Jobs/GithubWatcherJobTests.cs +++ b/Newsbot.Collector.Tests/Jobs/GithubWatcherJobTests.cs @@ -1,4 +1,6 @@ using Microsoft.Extensions.Configuration; +using Newsbot.Collector.Domain.Models; +using Newsbot.Collector.Domain.Models.Config; using Newsbot.Collector.Services.Jobs; namespace Newsbot.Collector.Tests.Jobs; @@ -7,8 +9,12 @@ public class GithubWatcherJobTests { private IConfiguration GetConfiguration() { - var inMemorySettings = new Dictionary { - {"ConnectionStrings:database", "Host=localhost;Username=postgres;Password=postgres;Database=postgres;sslmode=disable"} + var inMemorySettings = new Dictionary + { + { + "ConnectionStrings:database", + "Host=localhost;Username=postgres;Password=postgres;Database=postgres;sslmode=disable" + } }; IConfiguration configuration = new ConfigurationBuilder() @@ -26,9 +32,12 @@ public class GithubWatcherJobTests public void CanPullAFeed() { var client = new GithubWatcherJob(); - client.Init(new GithubWatcherJobOptions + client.InitAndExecute(new GithubWatcherJobOptions { - ConnectionString = ConnectionString(), + ConnectionStrings = new ConfigSectionConnectionStrings + { + Database = ConnectionString() + }, FeaturePullCommits = true, FeaturePullReleases = true }); diff --git a/Newsbot.Collector.Tests/Jobs/RssWatcherJobTest.cs b/Newsbot.Collector.Tests/Jobs/RssWatcherJobTest.cs index b169cd6..7981329 100644 --- a/Newsbot.Collector.Tests/Jobs/RssWatcherJobTest.cs +++ b/Newsbot.Collector.Tests/Jobs/RssWatcherJobTest.cs @@ -1,14 +1,27 @@ using Microsoft.Extensions.Configuration; +using Newsbot.Collector.Domain.Models.Config; using Newsbot.Collector.Services.Jobs; +using Xunit.Abstractions; namespace Newsbot.Collector.Tests.Jobs; public class RssWatcherJobTest { + private readonly ITestOutputHelper _testOutputHelper; + + public RssWatcherJobTest(ITestOutputHelper testOutputHelper) + { + _testOutputHelper = testOutputHelper; + } + private IConfiguration GetConfiguration() { - var inMemorySettings = new Dictionary { - {"ConnectionStrings:database", "Host=localhost;Username=postgres;Password=postgres;Database=postgres;sslmode=disable"} + var inMemorySettings = new Dictionary + { + { + "ConnectionStrings:database", + "Host=localhost;Username=postgres;Password=postgres;Database=postgres;sslmode=disable" + } }; IConfiguration configuration = new ConfigurationBuilder() @@ -25,9 +38,9 @@ public class RssWatcherJobTest [Fact] public void CanFindItemsNoDb() { - var url = "https://www.engadget.com/rss.xml"; + const string url = "https://www.engadget.com/rss.xml"; var client = new RssWatcherJob(); - var items = client.Collect(url, Guid.NewGuid(), 0); + client.Collect(url, Guid.NewGuid(), 0); } [Fact] @@ -35,7 +48,13 @@ public class RssWatcherJobTest { var url = "https://www.engadget.com/rss.xml"; var client = new RssWatcherJob(); - client.Init(ConnectionString()); + client.InitAndExecute(new RssWatcherJobOptions + { + ConnectionStrings = new ConfigSectionConnectionStrings + { + Database = ConnectionString() + } + }); var items = client.Collect(url, Guid.NewGuid(), 0); client.UpdateDatabase(items); } @@ -44,20 +63,13 @@ public class RssWatcherJobTest public void CanReadHtmlDrivenFeedPage() { var url = "https://www.howtogeek.com/feed/"; - var client = new RssWatcherJob(); - client.Init(ConnectionString()); - var items = client.Collect(url, Guid.NewGuid(), 0); - Console.WriteLine('k'); - } - - [Fact] - public void InitAndExecuteTest() - { var client = new RssWatcherJob(); client.InitAndExecute(new RssWatcherJobOptions { - ConnectionString = ConnectionString() + ConnectionStrings = new ConfigSectionConnectionStrings + { + Database = ConnectionString() + } }); - } } \ No newline at end of file diff --git a/Newsbot.Collector.Tests/Newsbot.Collector.Tests.csproj b/Newsbot.Collector.Tests/Newsbot.Collector.Tests.csproj index f8c1e88..e34e404 100644 --- a/Newsbot.Collector.Tests/Newsbot.Collector.Tests.csproj +++ b/Newsbot.Collector.Tests/Newsbot.Collector.Tests.csproj @@ -27,4 +27,12 @@ + + + true + PreserveNewest + PreserveNewest + + + diff --git a/Newsbot.Collector.Tests/Services/HtmlPageReaderTests.cs b/Newsbot.Collector.Tests/Services/HtmlPageReaderTests.cs index f1961bf..62d8e3a 100644 --- a/Newsbot.Collector.Tests/Services/HtmlPageReaderTests.cs +++ b/Newsbot.Collector.Tests/Services/HtmlPageReaderTests.cs @@ -7,32 +7,74 @@ public class HtmlPageReaderTests [Fact] public void BaseSiteContainsRssFeed() { - var client = new HtmlPageReader("https://dotnettutorials.net/"); + var client = new HtmlPageReader(new HtmlPageReaderOptions + { + Url = "https://dotnettutorials.net/" + }); var headClient = new HeadParserClient(client.GetSiteContent()); var feedUri = headClient.GetSiteFeed(); - if (feedUri == "") - { - Assert.Fail("Failed to find the RSS feed"); - } + if (feedUri == "") Assert.Fail("Failed to find the RSS feed"); } [Fact] public void SiteDoesNotReturnRssFeed() { - var client = new HtmlPageReader("https://www.engadget.com/"); + var client = new HtmlPageReader(new HtmlPageReaderOptions + { + Url = "https://www.engadget.com/" + }); var headClient = new HeadParserClient(client.GetSiteContent()); var feedUri = headClient.GetSiteFeed(); - if (feedUri == "") - { - Assert.Fail(""); - } + if (feedUri == "") Assert.Fail(""); } [Fact] public void CanFindBodyOfTheArticle() { - var client = new HtmlPageReader("https://www.engadget.com/apple-is-convinced-my-dog-is-stalking-me-143100932.html"); + var client = new HtmlPageReader(new HtmlPageReaderOptions + { + Url = "https://www.engadget.com/apple-is-convinced-my-dog-is-stalking-me-143100932.html" + }); var c = client.CollectPostContent(); Console.WriteLine(c); } + + [Fact] + public void FindYoutubeChannelId() + { + var url = "https://www.youtube.com/@CityPlannerPlays"; + //var b = new BrowserClient(); + //var pageSource = b.GetPageSource(url); + + var client = new HtmlPageReader(new HtmlPageReaderOptions + { + Url = url + }); + client.Parse(); + if (client.Data.Header.YoutubeChannelID is null) Assert.Fail("missing youtube id"); + } + + [Fact] + public void CanExtractHeadersFromReddit() + { + var url = "https://www.reddit.com/"; + var client = new HtmlPageReader(new HtmlPageReaderOptions + { + Url = url + }); + client.Parse(); + if (client.Data.Header.Image == "") Assert.Fail("missing an expected image from the reddit header."); + } + + [Fact] + public void CanExtractHeadersFromSubreddit() + { + var url = "https://www.reddit.com/r/ffxiv"; + var client = new HtmlPageReader(new HtmlPageReaderOptions + { + Url = url + }); + client.Parse(); + if (client.Data.Header.Image == "") Assert.Fail("missing an expected image from the reddit header."); + } } \ No newline at end of file diff --git a/Newsbot.Collector.Tests/Tables/SourcesTableTests.cs b/Newsbot.Collector.Tests/Tables/SourcesTableTests.cs index bb2c7bb..c514cba 100644 --- a/Newsbot.Collector.Tests/Tables/SourcesTableTests.cs +++ b/Newsbot.Collector.Tests/Tables/SourcesTableTests.cs @@ -1,8 +1,8 @@ - - using Newsbot.Collector.Database.Repositories; using Newsbot.Collector.Domain.Models; +namespace Newsbot.Collector.Tests.Tables; + public class SourcesTableTests { [Fact] diff --git a/Newsbot.Collector.Tests/UnitTest1.cs b/Newsbot.Collector.Tests/UnitTest1.cs deleted file mode 100644 index bf54d54..0000000 --- a/Newsbot.Collector.Tests/UnitTest1.cs +++ /dev/null @@ -1,10 +0,0 @@ -namespace Newsbot.Collector.Tests; - -public class UnitTest1 -{ - [Fact] - public void Test1() - { - - } -} \ No newline at end of file diff --git a/seed.ps1 b/seed.ps1 index d992a14..2afcaf9 100644 --- a/seed.ps1 +++ b/seed.ps1 @@ -6,20 +6,19 @@ param ( $ErrorActionPreference = 'Stop' -function NewRedditSource { +function New-RedditSource { param ( - [string] $Name, [string] $Url ) $urlEncoded = [uri]::EscapeDataString($Url) - $param = "name=$Name&url=$urlEncoded" + $param = "url=$urlEncoded" $uri = "$ApiServer/api/sources/new/reddit?$param" $res = Invoke-RestMethod -Method Post -Uri $uri return $res } -function NewRssSource { +function New-RssSource { param ( [string] $Name, [string] $Url @@ -31,19 +30,18 @@ function NewRssSource { return $res } -function NewYoutubeSource { +function New-YoutubeSource { param ( - [string] $Name, - [string] $Url + [Parameter(Required)][string] $Url ) $urlEncoded = [uri]::EscapeDataString($Url) - [string] $param = "name=$Name&url=$urlEncoded" + [string] $param = "url=$urlEncoded" [string] $uri = "$ApiServer/api/sources/new/youtube?$param" $res = Invoke-RestMethod -Method Post -Uri $uri return $res } -function NewTwitchSource { +function New-TwitchSource { param ( [string] $Name ) @@ -81,18 +79,18 @@ function New-Subscription { # Load Secrets file $secrets = Get-Content $JsonSecrets -Raw | ConvertFrom-Json -$redditDadJokes = NewRedditSource -Name "dadjokes" -Url "https://reddit.com/r/dadjokes" -$redditSteamDeck = NewRedditSource -Name "steamdeck" -Url "https://reddit.com/r/steamdeck" +$redditDadJokes = New-RedditSource -Name "dadjokes" +$redditSteamDeck = New-RedditSource -Name "steamdeck" -$rssSteamDeck = NewRssSource -Name "Steampowered - Steam Deck" -Url "https://store.steampowered.com/feeds/news/app/1675200/?cc=US&l=english&snr=1_2108_9__2107" -$rssFaysHaremporium = NewRssSource -Name "Fay's Haremporium" -Url "https://blog.nyxstudios.moe/rss/" -$rssPodcastLetsMosley = NewRssSource -Name "Let's Mosley" -Url "https://anchor.fm/s/6c7aa4c4/podcast/rss" +$rssSteamDeck = New-RssSource -Name "Steampowered - Steam Deck" -Url "https://store.steampowered.com/feeds/news/app/1675200/?cc=US&l=english&snr=1_2108_9__2107" +$rssFaysHaremporium = New-RssSource -Name "Fay's Haremporium" -Url "https://blog.nyxstudios.moe/rss/" +$rssPodcastLetsMosley = New-RssSource -Name "Let's Mosley" -Url "https://anchor.fm/s/6c7aa4c4/podcast/rss" -$youtubeGameGrumps = NewYoutubeSource -Name "Game Grumps" -Url "https://www.youtube.com/user/GameGrumps" -$youtubeCityPlannerPlays = NewYoutubeSource -Name "City Planner Plays" -Url "https://www.youtube.com/c/cityplannerplays" +$youtubeGameGrumps = New-YoutubeSource -Url "https://www.youtube.com/user/GameGrumps" +$youtubeCityPlannerPlays = New-YoutubeSource -Url "https://www.youtube.com/c/cityplannerplays" +$youtubeLinusTechTips = New-YoutubeSource -Url "https://www.youtube.com/@LinusTechTips" $twitchNintendo = NewTwitchSource -Name "Nintendo" -$twitchNintendo.id $miharuMonitor = New-DiscordWebhook -Server "Miharu Monitor" -Channel "dev" -Url $secrets.MiharuMonitor.dev01 @@ -103,4 +101,5 @@ New-Subscription -SourceId $rssFaysHaremporium.id -DiscordWebhookId $miharuMonit New-Subscription -SourceId $rssPodcastLetsMosley.id -DiscordWebhookId $miharuMonitor.id New-Subscription -SourceId $youtubeGameGrumps.id -DiscordWebhookId $miharuMonitor.id New-Subscription -SourceId $youtubeCityPlannerPlays.id -DiscordWebhookId $miharuMonitor.id +New-Subscription -SourceId $youtubeLinusTechTips.id -DiscordWebhookId $miharuMonitor.id New-Subscription -SourceId $twitchNintendo.id -DiscordWebhookId $miharuMonitor.id