From 521940ca4f7b3044b4a68720ad18f80e7c263a89 Mon Sep 17 00:00:00 2001 From: James Tombleson Date: Sun, 26 Feb 2023 09:40:04 -0800 Subject: [PATCH] Features/more rss improvements (#6) * exposing connectionStrings to controllers * First controller added to start testing * corrected param to be page not age * new model to map connection strings to for the controllers * HelloWorldJob uses options now to make hangfire happy * improved the html reader to find some rss feeds and start to extract the body of the content * moved html parser to its own namespace and make a sub client to process theh header * helpful vsc changes * updated rss watcher to include the sourceId so it can be added to the db call * updated tests to reflect changes * updated gitignore to avoid trash and moved over my makefile * More routes and added serilog * adding more database calls for the controllers * Updated interfaces for the tables * Added Serilog to jobs * removed default files * Added more routes and added DTO * Added DTO objects and SourceType Consts for easy usage * updated discord model name to follow the pattern * updated formatting * new dto objects and Subscriptions repo interface * added subscription db and api calls * focusing on the twitter tags as most sites focus on them * updated test to pull a html based feed --- .gitignore | 2 + .vscode/launch.json | 6 + .vscode/settings.json | 15 +- .../Controllers/ArticlesController.cs | 65 ++++++ .../Controllers/DiscordWebHooksController.cs | 66 +++++++ .../Controllers/SourcesController.cs | 133 +++++++++++++ .../Controllers/SubscriptionsController.cs | 98 +++++++++ .../Controllers/WeatherForecastController.cs | 32 --- .../Newsbot.Collector.Api.csproj | 2 + Newsbot.Collector.Api/Program.cs | 38 ++-- Newsbot.Collector.Api/WeatherForecast.cs | 12 -- .../Repositories/ArticlesTable.cs | 22 ++- ...bhooksTable.cs => DiscordWebhooksTable.cs} | 50 ++--- .../Repositories/SourcesTable.cs | 9 +- .../Repositories/SubscriptionsTable.cs | 80 +++++--- .../Consts/ConfigConnectionStringsConst.cs | 9 + .../Consts/ConfigRedditConst.cs | 9 + .../Consts/ConfigTwitchConst.cs | 11 ++ .../Consts/ConfigYoutubeConst.cs | 10 + .../{SourcesConst.cs => SourcesTypes.cs} | 0 .../Dto/ArticleDetailsDto.cs | 41 ++++ Newsbot.Collector.Domain/Dto/ArticleDto.cs | 39 ++++ .../Dto/DiscordWebHookDto.cs | 24 +++ Newsbot.Collector.Domain/Dto/SourceDto.cs | 34 ++++ .../Dto/SubscriptionDetailsDto.cs | 20 ++ .../Dto/SubscriptionDto.cs | 20 ++ .../Exceptions/MissingHeaderValueException.cs | 18 ++ .../Interfaces/IArticlesRepository.cs | 3 +- .../Interfaces/IDiscordWebHooksRepository.cs | 19 ++ .../Interfaces/ISourcesRepository.cs | 2 +- .../Interfaces/ISubscriptionsRepository.cs | 17 ++ Newsbot.Collector.Domain/Models/Config.cs | 5 + .../Models/DatabaseModel.cs | 2 +- Newsbot.Collector.Services/HtmlMeta.cs | 129 ------------ .../HtmlParser/HeadParserClient.cs | 186 ++++++++++++++++++ .../HtmlParser/HtmlPageReader.cs | 71 +++++++ .../HtmlParser/HtmlParserModels.cs | 18 ++ .../Jobs/GithubWatcherJob.cs | 42 ++++ .../Jobs/HelloWorldJob.cs | 16 +- .../Jobs/RssWatcherJob.cs | 32 +-- .../Newsbot.Collector.Services.csproj | 1 + .../Jobs/RssWatcherJobTest.cs | 26 ++- .../Services/HtmlPageReaderTests.cs | 38 ++++ makefile | 25 +++ 44 files changed, 1233 insertions(+), 264 deletions(-) create mode 100644 Newsbot.Collector.Api/Controllers/ArticlesController.cs create mode 100644 Newsbot.Collector.Api/Controllers/DiscordWebHooksController.cs create mode 100644 Newsbot.Collector.Api/Controllers/SourcesController.cs create mode 100644 Newsbot.Collector.Api/Controllers/SubscriptionsController.cs delete mode 100644 Newsbot.Collector.Api/Controllers/WeatherForecastController.cs delete mode 100644 Newsbot.Collector.Api/WeatherForecast.cs rename Newsbot.Collector.Database/Repositories/{WebhooksTable.cs => DiscordWebhooksTable.cs} (65%) create mode 100644 Newsbot.Collector.Domain/Consts/ConfigConnectionStringsConst.cs create mode 100644 Newsbot.Collector.Domain/Consts/ConfigRedditConst.cs create mode 100644 Newsbot.Collector.Domain/Consts/ConfigTwitchConst.cs create mode 100644 Newsbot.Collector.Domain/Consts/ConfigYoutubeConst.cs rename Newsbot.Collector.Domain/Consts/{SourcesConst.cs => SourcesTypes.cs} (100%) create mode 100644 Newsbot.Collector.Domain/Dto/ArticleDetailsDto.cs create mode 100644 Newsbot.Collector.Domain/Dto/ArticleDto.cs create mode 100644 Newsbot.Collector.Domain/Dto/DiscordWebHookDto.cs create mode 100644 Newsbot.Collector.Domain/Dto/SourceDto.cs create mode 100644 Newsbot.Collector.Domain/Dto/SubscriptionDetailsDto.cs create mode 100644 Newsbot.Collector.Domain/Dto/SubscriptionDto.cs create mode 100644 Newsbot.Collector.Domain/Exceptions/MissingHeaderValueException.cs create mode 100644 Newsbot.Collector.Domain/Interfaces/IDiscordWebHooksRepository.cs create mode 100644 Newsbot.Collector.Domain/Interfaces/ISubscriptionsRepository.cs delete mode 100644 Newsbot.Collector.Services/HtmlMeta.cs create mode 100644 Newsbot.Collector.Services/HtmlParser/HeadParserClient.cs create mode 100644 Newsbot.Collector.Services/HtmlParser/HtmlPageReader.cs create mode 100644 Newsbot.Collector.Services/HtmlParser/HtmlParserModels.cs create mode 100644 Newsbot.Collector.Services/Jobs/GithubWatcherJob.cs create mode 100644 Newsbot.Collector.Tests/Services/HtmlPageReaderTests.cs create mode 100644 makefile diff --git a/.gitignore b/.gitignore index e62a942..e5466fc 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,8 @@ ## ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore +out/ + # User-specific files *.rsuser *.suo diff --git a/.vscode/launch.json b/.vscode/launch.json index c90f1a3..522946f 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -24,6 +24,12 @@ }, "sourceFileMap": { "/Views": "${workspaceFolder}/Views" + }, + "logging": { + "engineLogging": false, + "moduleLoad": false, + "exceptions": false, + "browserStdOut": false } }, { diff --git a/.vscode/settings.json b/.vscode/settings.json index b285125..8ec43a2 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -4,5 +4,18 @@ "**/bin": false }, "csharp.inlayHints.types.enabled": true, - "omnisharp.enableImportCompletion": true + "omnisharp.enableImportCompletion": true, + "sqltools.connections": [ + { + "previewLimit": 50, + "server": "localhost", + "port": 5432, + "driver": "PostgreSQL", + "name": "localhost", + "database": "postgres", + "username": "postgres", + "password": "postgres" + } + ], + "editor.formatOnType": true } \ No newline at end of file diff --git a/Newsbot.Collector.Api/Controllers/ArticlesController.cs b/Newsbot.Collector.Api/Controllers/ArticlesController.cs new file mode 100644 index 0000000..b5fefc3 --- /dev/null +++ b/Newsbot.Collector.Api/Controllers/ArticlesController.cs @@ -0,0 +1,65 @@ +using Microsoft.AspNetCore.Mvc; +using Microsoft.Extensions.Options; +using Newsbot.Collector.Database.Repositories; +using Newsbot.Collector.Domain.Dto; +using Newsbot.Collector.Domain.Interfaces; +using Newsbot.Collector.Domain.Models; + +namespace Newsbot.Collector.Api.Controllers; + +[ApiController] +[Route("api/articles")] +public class ArticlesController : ControllerBase +{ + private readonly ILogger _logger; + private readonly ConnectionStrings _settings; + private readonly IArticlesRepository _articles; + private readonly ISourcesRepository _sources; + + public ArticlesController(ILogger logger, IOptions settings) + { + _logger = logger; + _settings = settings.Value; + _articles = new ArticlesTable(_settings.Database); + _sources = new SourcesTable(_settings.Database); + } + + [HttpGet(Name = "GetArticles")] + public IEnumerable Get() + { + var res = new List(); + var items = _articles.List(0, 25); + foreach (var item in items) + { + res.Add(ArticleDto.Convert(item)); + } + return res; + } + + [HttpGet("{id}")] + public ArticleDto GetById(Guid id) + { + var item = _articles.GetById(id); + return ArticleDto.Convert(item); + } + + [HttpGet("{id}/details")] + public ArticleDetailsDto GetDetailsById(Guid id) + { + var item = _articles.GetById(id); + var sourceItem = _sources.GetByID(item.SourceID); + return ArticleDetailsDto.Convert(item, sourceItem); + } + + [HttpGet("by/{sourceid}")] + public IEnumerable GetBySourceID(Guid sourceid, int page = 0, int count = 25) + { + var res = new List(); + var items = _articles.ListBySourceId(sourceid, page, count); + foreach (var item in items) + { + res.Add(ArticleDto.Convert(item)); + } + return res; + } +} \ No newline at end of file diff --git a/Newsbot.Collector.Api/Controllers/DiscordWebHooksController.cs b/Newsbot.Collector.Api/Controllers/DiscordWebHooksController.cs new file mode 100644 index 0000000..33807c1 --- /dev/null +++ b/Newsbot.Collector.Api/Controllers/DiscordWebHooksController.cs @@ -0,0 +1,66 @@ +using System.Net; +using Microsoft.AspNetCore.Mvc; +using Microsoft.Extensions.Options; +using Newsbot.Collector.Database.Repositories; +using Newsbot.Collector.Domain.Interfaces; +using Newsbot.Collector.Domain.Models; + +namespace Newsbot.Collector.Api.Controllers; + +[ApiController] +[Route("api/discord/webhooks")] +public class DiscordWebHookController : ControllerBase +{ + private readonly ILogger _logger; + private readonly ConnectionStrings _settings; + private readonly IDiscordWebHooksRepository _webhooks; + + public DiscordWebHookController(ILogger logger, IOptions settings) + { + _logger = logger; + _settings = settings.Value; + _webhooks = new DiscordWebhooksTable(_settings.Database); + } + + [HttpGet(Name = "GetDiscordWebhooks")] + public IEnumerable Get(int page) + { + return _webhooks.List(page); + } + + [HttpPost(Name = "New")] + public DiscordWebHookModel New(string url, string server, string channel) + { + return _webhooks.New(new DiscordWebHookModel + { + Url = url, + Server = server, + Channel = channel, + Enabled = true, + }); + } + + [HttpGet("by/serverAndChannel")] + public IEnumerable GetByServerAndChannel(string server, string channel) + { + return _webhooks.ListByServerAndChannel(server, channel, 25); + } + + [HttpGet("{id}")] + public DiscordWebHookModel GetById(Guid id) + { + return _webhooks.GetByID(id); + } + + [HttpPost("{id}/disable")] + public void DisableById(Guid id) + { + _webhooks.Disable(id); + } + + [HttpPost("{id}/enable")] + public void EnableById(Guid id) + { + _webhooks.Enable(id); + } +} \ No newline at end of file diff --git a/Newsbot.Collector.Api/Controllers/SourcesController.cs b/Newsbot.Collector.Api/Controllers/SourcesController.cs new file mode 100644 index 0000000..5e8a039 --- /dev/null +++ b/Newsbot.Collector.Api/Controllers/SourcesController.cs @@ -0,0 +1,133 @@ +using Microsoft.AspNetCore.Mvc; +using Microsoft.Extensions.Options; +using Newsbot.Collector.Database.Repositories; +using Newsbot.Collector.Domain.Consts; +using Newsbot.Collector.Domain.Dto; +using Newsbot.Collector.Domain.Interfaces; +using Newsbot.Collector.Domain.Models; + +namespace Newsbot.Collector.Api.Controllers; + +[ApiController] +[Route("api/sources")] +public class SourcesController : ControllerBase +{ + private readonly ILogger _logger; + private readonly ConnectionStrings _settings; + private readonly ISourcesRepository _sources; + + public SourcesController(ILogger logger, IOptions settings) + { + _logger = logger; + _settings = settings.Value; + _sources = new SourcesTable(_settings.Database); + } + + [HttpGet(Name = "GetSources")] + public IEnumerable Get(int page) + { + var res = new List(); + var temp = _sources.List(page, 25); + foreach (var item in temp) + { + res.Add(SourceDto.Convert(item)); + } + return res; + } + + [HttpGet("by/type")] + public IEnumerable GetByType(string type) + { + var res = new List(); + var temp = _sources.ListByType(type); + foreach (var item in temp) + { + res.Add(SourceDto.Convert(item)); + } + return res; + } + + [HttpPost("new/reddit")] + public SourceDto NewReddit(string name, string url) + { + var item = _sources.New(new SourceModel + { + Site = SourceTypes.Reddit, + Name = name, + Type = SourceTypes.Reddit, + Source = "feed", + Enabled = true, + Url = url, + Tags = $"{SourceTypes.Reddit}, {name}" + }); + return SourceDto.Convert(item); + } + + [HttpPost("new/rss")] + public SourceDto NewRss(string name, string url) + { + var item = _sources.New(new SourceModel + { + Site = SourceTypes.Rss, + Name = name, + Type = SourceTypes.Rss, + Source = "feed", + Enabled = true, + Url = url, + Tags = $"{SourceTypes.Rss}, {name}" + }); + return SourceDto.Convert(item); + } + + [HttpPost("new/youtube")] + public SourceDto NewYoutube(string name, string url) + { + var item = _sources.New(new SourceModel + { + Site = SourceTypes.YouTube, + Type = SourceTypes.YouTube, + Name = name, + Source = "feed", + Url = url, + Enabled = true, + Tags = $"{SourceTypes.YouTube}, {name}" + }); + + return SourceDto.Convert(item); + } + + [HttpPost("new/twitch")] + public SourceDto NewTwitch(string name) + { + var item = _sources.New(new SourceModel + { + Site = SourceTypes.Twitch, + Type = SourceTypes.Twitch, + Name = name, + Url = $"https://twitch.tv/{name}", + Source = "api", + Enabled = true, + Tags = $"{SourceTypes.Twitch}, {name}" + }); + return SourceDto.Convert(item); + } + + [HttpGet("{id}")] + public SourceDto GetById(Guid id) + { + var item = _sources.GetByID(id); + return SourceDto.Convert(item); + } + + [HttpPost("{id}/disable")] + public void Disable(Guid id) + { + _sources.Disable(id); + } + + [HttpPost("{id}/enable")] + public void Enable(Guid id) + { + _sources.Enable(id); + } +} \ No newline at end of file diff --git a/Newsbot.Collector.Api/Controllers/SubscriptionsController.cs b/Newsbot.Collector.Api/Controllers/SubscriptionsController.cs new file mode 100644 index 0000000..38eaccd --- /dev/null +++ b/Newsbot.Collector.Api/Controllers/SubscriptionsController.cs @@ -0,0 +1,98 @@ +using Microsoft.AspNetCore.Mvc; +using Microsoft.Extensions.Options; +using Newsbot.Collector.Database.Repositories; +using Newsbot.Collector.Domain.Dto; +using Newsbot.Collector.Domain.Interfaces; +using Newsbot.Collector.Domain.Models; + +namespace Newsbot.Collector.Api.Controllers; + +[ApiController] +[Route("api/subscriptions")] +public class SubscriptionsController : ControllerBase +{ + private readonly ILogger _logger; + private readonly ConnectionStrings _settings; + private readonly ISubscriptionRepository _subscription; + private readonly IDiscordWebHooksRepository _discord; + private readonly ISourcesRepository _sources; + public SubscriptionsController(ILogger logger, IOptions settings) + { + _logger = logger; + _settings = settings.Value; + _subscription = new SubscriptionsTable(_settings.Database); + _discord = new DiscordWebhooksTable(_settings.Database); + _sources = new SourcesTable(_settings.Database); + } + + [HttpGet(Name = "ListSubscriptions")] + public IEnumerable List(int page) + { + var res = new List(); + var items = _subscription.List(page); + foreach (var item in items) + { + res.Add(SubscriptionDto.Convert(item)); + } + return res; + } + + [HttpGet("{id}")] + public SubscriptionDto GetById(Guid id) + { + return SubscriptionDto.Convert(_subscription.GetById(id)); + } + + [HttpGet("{id}/details")] + public SubscriptionDetailsDto GetDetailsById(Guid id) + { + var sub = _subscription.GetById(id); + var webhook = _discord.GetByID(sub.DiscordWebHookID); + var source = _sources.GetByID(sub.SourceID); + + return SubscriptionDetailsDto.Convert(sub, source, webhook); + } + + [HttpPost("{id}/delete")] + public void DeleteById(Guid id) + { + _subscription.Delete(id); + } + + [HttpGet("by/discordid")] + public IEnumerable GetByDiscordId(Guid id) + { + var res = new List(); + var items = _subscription.ListByWebhook(id); + foreach (var item in items) + { + res.Add(SubscriptionDto.Convert(item)); + } + return res; + } + + [HttpGet("by/sourceid")] + public IEnumerable GetBySourceId(Guid id) + { + var res = new List(); + var items = _subscription.ListBySourceID(id); + foreach (var item in items) + { + res.Add(SubscriptionDto.Convert(item)); + } + return res; + } + + [HttpPost("new")] + public SubscriptionDto New(Guid sourceId, Guid discordId) + { + var item = _subscription.New(new SubscriptionModel + { + ID = Guid.NewGuid(), + SourceID = sourceId, + DiscordWebHookID = discordId + }); + + return SubscriptionDto.Convert(item); + } +} \ No newline at end of file diff --git a/Newsbot.Collector.Api/Controllers/WeatherForecastController.cs b/Newsbot.Collector.Api/Controllers/WeatherForecastController.cs deleted file mode 100644 index 8a25c93..0000000 --- a/Newsbot.Collector.Api/Controllers/WeatherForecastController.cs +++ /dev/null @@ -1,32 +0,0 @@ -using Microsoft.AspNetCore.Mvc; - -namespace Newsbot.Collector.Api.Controllers; - -[ApiController] -[Route("[controller]")] -public class WeatherForecastController : ControllerBase -{ - private static readonly string[] Summaries = new[] - { - "Freezing", "Bracing", "Chilly", "Cool", "Mild", "Warm", "Balmy", "Hot", "Sweltering", "Scorching" - }; - - private readonly ILogger _logger; - - public WeatherForecastController(ILogger logger) - { - _logger = logger; - } - - [HttpGet(Name = "GetWeatherForecast")] - public IEnumerable Get() - { - return Enumerable.Range(1, 5).Select(index => new WeatherForecast - { - Date = DateOnly.FromDateTime(DateTime.Now.AddDays(index)), - TemperatureC = Random.Shared.Next(-20, 55), - Summary = Summaries[Random.Shared.Next(Summaries.Length)] - }) - .ToArray(); - } -} diff --git a/Newsbot.Collector.Api/Newsbot.Collector.Api.csproj b/Newsbot.Collector.Api/Newsbot.Collector.Api.csproj index f0697ff..f56a537 100644 --- a/Newsbot.Collector.Api/Newsbot.Collector.Api.csproj +++ b/Newsbot.Collector.Api/Newsbot.Collector.Api.csproj @@ -11,6 +11,8 @@ + + diff --git a/Newsbot.Collector.Api/Program.cs b/Newsbot.Collector.Api/Program.cs index d0b2e00..4d766d0 100644 --- a/Newsbot.Collector.Api/Program.cs +++ b/Newsbot.Collector.Api/Program.cs @@ -1,24 +1,37 @@ using Hangfire; using Hangfire.MemoryStorage; +using Serilog; using Newsbot.Collector.Services.Jobs; using Newsbot.Collector.Domain.Models; +using Newsbot.Collector.Domain.Consts; + +Log.Logger = new LoggerConfiguration() + .WriteTo.Console() + .CreateLogger(); + +Log.Information("Starting up"); var builder = WebApplication.CreateBuilder(args); -// Add services to the container. +// Define Logger +builder.Host.UseSerilog(); // <-- Add this line // Build the conifg var config = GetConfiguration(); builder.Configuration.AddConfiguration(config); +// Configure Hangfire builder.Services.AddHangfire(f => f.UseMemoryStorage()); builder.Services.AddHangfireServer(); +GlobalConfiguration.Configuration.UseSerilogLogProvider(); builder.Services.AddControllers(); // Learn more about configuring Swagger/OpenAPI at https://aka.ms/aspnetcore/swashbuckle builder.Services.AddEndpointsApiExplorer(); builder.Services.AddSwaggerGen(); +builder.Services.Configure(config.GetSection("ConnectionStrings")); + var app = builder.Build(); // Configure the HTTP request pipeline. @@ -31,7 +44,7 @@ if (app.Environment.IsDevelopment()) app.UseHttpsRedirection(); app.UseHangfireDashboard(); -SetupRecurringJobs(config); +SetupRecurringJobs(config, Log.Logger); app.UseAuthorization(); @@ -39,6 +52,7 @@ app.MapControllers(); app.Run(); + static IConfiguration GetConfiguration() { return new ConfigurationBuilder() @@ -47,24 +61,14 @@ static IConfiguration GetConfiguration() .Build(); } -static void SetupRecurringJobs(IConfiguration configuration) +static void SetupRecurringJobs(IConfiguration configuration, Serilog.ILogger logger) { - var databaseConnectionString = configuration.GetConnectionString("database"); - if (databaseConnectionString is null) - { - databaseConnectionString = ""; - } - RecurringJob.AddOrUpdate("Example", x => x.InitAndExecute(new HelloWorldJobOptions { Message = "Hello from the background!" - }), "0/2 * * * *"); - //RecurringJob.AddOrUpdate("RSS", x => x.InitAndExecute(config), "15 0-23 * * *"); - - var c = new RssWatcherJob(); - BackgroundJob.Enqueue(() => c.InitAndExecute(new RssWatcherJobOptions + }), "0/1 * * * *"); + RecurringJob.AddOrUpdate("RSS", x => x.InitAndExecute(new RssWatcherJobOptions { - ConnectionString = databaseConnectionString - })); - + ConnectionString = configuration.GetSection(ConfigConnectionStringConst.Database).Value ?? "" + }), "15 0-23 * * *"); } diff --git a/Newsbot.Collector.Api/WeatherForecast.cs b/Newsbot.Collector.Api/WeatherForecast.cs deleted file mode 100644 index 48c6e29..0000000 --- a/Newsbot.Collector.Api/WeatherForecast.cs +++ /dev/null @@ -1,12 +0,0 @@ -namespace Newsbot.Collector.Api; - -public class WeatherForecast -{ - public DateOnly Date { get; set; } - - public int TemperatureC { get; set; } - - public int TemperatureF => 32 + (int)(TemperatureC / 0.5556); - - public string? Summary { get; set; } -} diff --git a/Newsbot.Collector.Database/Repositories/ArticlesTable.cs b/Newsbot.Collector.Database/Repositories/ArticlesTable.cs index 8c8e318..d0821db 100644 --- a/Newsbot.Collector.Database/Repositories/ArticlesTable.cs +++ b/Newsbot.Collector.Database/Repositories/ArticlesTable.cs @@ -41,7 +41,12 @@ public class ArticlesTable : IArticlesRepository var res = conn.Query(@"select * from articles Order By PubDate Desc Offset @Page - Fetch Next @Count Rows Only", new { Page = page * count, Count = count }).ToList(); + Fetch Next @Count Rows Only", new + { + Page = page * count, + Count = count + }) + .ToList(); return res; } @@ -67,6 +72,21 @@ public class ArticlesTable : IArticlesRepository return res.First(); } + public List ListBySourceId(Guid id, int page, int count) + { + using var conn = OpenConnection(_connectionString); + var query = @"Select * from articles + where sourceid = @sourceid + Offset @page + Fetch next @count rows only"; + return conn.Query(query, new + { + sourceid = id, + page = page * count, + count = count + }).ToList(); + } + public ArticlesModel New(ArticlesModel model) { model.ID = Guid.NewGuid(); diff --git a/Newsbot.Collector.Database/Repositories/WebhooksTable.cs b/Newsbot.Collector.Database/Repositories/DiscordWebhooksTable.cs similarity index 65% rename from Newsbot.Collector.Database/Repositories/WebhooksTable.cs rename to Newsbot.Collector.Database/Repositories/DiscordWebhooksTable.cs index 22d34ce..0af712a 100644 --- a/Newsbot.Collector.Database/Repositories/WebhooksTable.cs +++ b/Newsbot.Collector.Database/Repositories/DiscordWebhooksTable.cs @@ -1,21 +1,22 @@ using System.Data; using Dapper; using Microsoft.Extensions.Configuration; +using Newsbot.Collector.Domain.Interfaces; using Newsbot.Collector.Domain.Models; using Npgsql; namespace Newsbot.Collector.Database.Repositories; -public class WebhooksTable +public class DiscordWebhooksTable : IDiscordWebHooksRepository { private string _connectionString; - public WebhooksTable(string connectionString) + public DiscordWebhooksTable(string connectionString) { _connectionString = connectionString; } - public WebhooksTable(IConfiguration configuration) + public DiscordWebhooksTable(IConfiguration configuration) { var connstr = configuration.GetConnectionString("database"); if (connstr is null) @@ -32,66 +33,71 @@ public class WebhooksTable return conn; } - public void New(DiscordWebHook model) + public DiscordWebHookModel New(DiscordWebHookModel model) { + var uid = Guid.NewGuid(); using var conn = OpenConnection(_connectionString); var query = "Insert Into DiscordWebHooks (ID, Url, Server, Channel, Enabled) Values (@id, @url, @server, @channel, @enabled);"; conn.Execute(query, new { - id = model.ID, + id = uid, url = model.Url, server = model.Server, channel = model.Channel, enabled = model.Enabled }); + model.ID = uid; + return model; } - public DiscordWebHook GetByID(Guid ID) + public DiscordWebHookModel GetByID(Guid id) { using var conn = OpenConnection(_connectionString); var query = "Select * from DiscordWebHooks Where ID = @id LIMIT 1;"; - return conn.Query(query, new + return conn.Query(query, new { - id = ID + id = id }).First(); } - public DiscordWebHook GetByUrl(string url) + public DiscordWebHookModel GetByUrl(string url) { using var conn = OpenConnection(_connectionString); var query = "Select * From DiscordWebHooks Where url = @url;"; - return conn.QueryFirst(query, new + return conn.QueryFirst(query, new { url = url }); } - public List List(int limit = 25) + public List List(int page, int count = 25) { using var conn = OpenConnection(_connectionString); - var query = "Select * From DiscordWebHooks @limit;"; - return conn.Query(query, new + var query = @"Select * From DiscordWebHooks + Offset @offset Fetch Next @count Rows Only;"; + return conn.Query(query, new { - limit = limit + offset = page * count, + count = count }).ToList(); } - public List ListByServer(string server, int limit = 25) + public List ListByServer(string server, int limit = 25) { using var conn = OpenConnection(_connectionString); var query = "Select * From DiscordWebHooks Where Server = @id Limit @limit;"; - return conn.Query(query, new + return conn.Query(query, new { server = server, limit = limit }).ToList(); } - public List ListByServerAndChannel(string server, string channel, int limit = 25) + public List ListByServerAndChannel(string server, string channel, int limit = 25) { using var conn = OpenConnection(_connectionString); var query = "SELECT * FROM DiscordWebHooks WHERE Server = @server and Channel = @channel Limit @limit;"; - return conn.Query(query, new + return conn.Query(query, new { server = server, channel = channel, @@ -99,23 +105,23 @@ public class WebhooksTable }).ToList(); } - public int Disable(Guid ID) + public int Disable(Guid id) { using var conn = OpenConnection(_connectionString); var query = "Update discordwebhooks Set Enabled = FALSE where ID = @id;"; return conn.Execute(query, new { - id = ID + id = id }); } - public int Enable(Guid ID) + public int Enable(Guid id) { using var conn = OpenConnection(_connectionString); var query = "Update discordwebhooks Set Enabled = TRUE where ID = @id;"; return conn.Execute(query, new { - id = ID + id = id }); } } \ No newline at end of file diff --git a/Newsbot.Collector.Database/Repositories/SourcesTable.cs b/Newsbot.Collector.Database/Repositories/SourcesTable.cs index a47b734..e05e9bd 100644 --- a/Newsbot.Collector.Database/Repositories/SourcesTable.cs +++ b/Newsbot.Collector.Database/Repositories/SourcesTable.cs @@ -107,13 +107,16 @@ public class SourcesTable : ISourcesRepository return res.First(); } - public List List(int limit = 25) + public List List(int page = 0, int count = 25) { using var conn = OpenConnection(_connectionString); - var query = "Select * From Sources Limit @limit;"; + var query = @"Select * From Sources + Offset @page + Fetch Next @count Rows Only;"; return conn.Query(query, new { - limit = 25 + page = page * count, + count = count }).ToList(); } diff --git a/Newsbot.Collector.Database/Repositories/SubscriptionsTable.cs b/Newsbot.Collector.Database/Repositories/SubscriptionsTable.cs index bf48c05..5679a7b 100644 --- a/Newsbot.Collector.Database/Repositories/SubscriptionsTable.cs +++ b/Newsbot.Collector.Database/Repositories/SubscriptionsTable.cs @@ -1,12 +1,13 @@ using System.Data; using Dapper; using Microsoft.Extensions.Configuration; +using Newsbot.Collector.Domain.Interfaces; using Newsbot.Collector.Domain.Models; using Npgsql; namespace Newsbot.Collector.Database.Repositories; -public class SubscriptionsTable +public class SubscriptionsTable : ISubscriptionRepository { private string _connectionString; @@ -32,65 +33,100 @@ public class SubscriptionsTable return conn; } - public void New(SubscriptionModel model) + public SubscriptionModel New(SubscriptionModel model) { + model.ID = Guid.NewGuid(); using var conn = OpenConnection(_connectionString); var query = "Insert Into subscriptions (ID, DiscordWebHookId, SourceId) Values (@id, @webhookid, @sourceid);"; conn.Execute(query, new { - id = Guid.NewGuid(), + id = model.ID, webhookid = model.DiscordWebHookID, sourceid = model.SourceID }); + return model; } - public List List(int limit = 25) + public List List(int page = 0, int count = 25) { using var conn = OpenConnection(_connectionString); - var query = "Select * From subscriptions Limit @limit;"; + var query = @"Select * From subscriptions + Offset @page Fetch Next @count Rows Only;"; return conn.Query(query, new { - limit = limit, + page = page * count, + count = count }).ToList(); } // todo add paging - public List ListBySourceID(Guid sourceID) + public List ListBySourceID(Guid id, int page = 0, int count = 25) { using var conn = OpenConnection(_connectionString); - var query = "Select * From subscriptions where sourceid = @sourceid"; + var query = @"Select * From subscriptions + Offset @page Fetch Next @count Rows Only + Where sourceid = @sourceid"; return conn.Query(query, new { - sourceid = sourceID + page = page * count, + count = count, + sourceid = id }).ToList(); } - public List GetByWebhookAndSource(Guid webhookId, Guid sourceId) + public List ListByWebhook(Guid id, int page = 0, int count = 25) { using var conn = OpenConnection(_connectionString); - var query = "Select * From subscriptions Where discordwebhookid = @webhookid and sourceid = @sourceid;"; + var query = @"Select * From subscriptions + Offset @page Fetch Next @count Rows Only + Where discordwebhookid = @webhookid"; return conn.Query(query, new + { + page = page * count, + count = count, + webhookid = id, + }).ToList(); + } + + public SubscriptionModel GetById(Guid id) + { + using var conn = OpenConnection(_connectionString); + var query = @"Select * From subscriptions Where id = @id;"; + var res = conn.Query(query, new + { + id = id, + }); + if (res.Count() == 0) + { + return new SubscriptionModel(); + } + return res.First(); + } + + public SubscriptionModel GetByWebhookAndSource(Guid webhookId, Guid sourceId) + { + using var conn = OpenConnection(_connectionString); + var query = @"Select * From subscriptions + Where discordwebhookid = @webhookid + and sourceid = @sourceid;"; + var res = conn.Query(query, new { webhookid = webhookId, sourceid = sourceId, - }).ToList(); - } - - public List ListByWebhook(Guid webhookId) - { - using var conn = OpenConnection(_connectionString); - var query = "Select * From subscriptions Where discordwebhookid = @webhookid"; - return conn.Query(query, new + }); + if (res.Count() == 0) { - webhookid = webhookId, - }).ToList(); + return new SubscriptionModel(); + } + return res.First(); } public void Delete(Guid id) { using var conn = OpenConnection(_connectionString); var query = "Delete From subscriptions Where id = @id;"; - conn.Execute(query, new { + conn.Execute(query, new + { id = id }); } diff --git a/Newsbot.Collector.Domain/Consts/ConfigConnectionStringsConst.cs b/Newsbot.Collector.Domain/Consts/ConfigConnectionStringsConst.cs new file mode 100644 index 0000000..c8a2995 --- /dev/null +++ b/Newsbot.Collector.Domain/Consts/ConfigConnectionStringsConst.cs @@ -0,0 +1,9 @@ +namespace Newsbot.Collector.Domain.Consts; + +/// +/// This class contains const entries to access keys within IConfiguration. +/// +public class ConfigConnectionStringConst +{ + public const string Database = "ConnectionStrings:Database"; +} \ No newline at end of file diff --git a/Newsbot.Collector.Domain/Consts/ConfigRedditConst.cs b/Newsbot.Collector.Domain/Consts/ConfigRedditConst.cs new file mode 100644 index 0000000..a40031b --- /dev/null +++ b/Newsbot.Collector.Domain/Consts/ConfigRedditConst.cs @@ -0,0 +1,9 @@ +namespace Newsbot.Collector.Domain.Consts; + +public class ConfigRedditConst +{ + public const string IsEnabled = "Reddit:IsEnabled"; + public const string PullHot = "Reddit:PullHot"; + public const string PullNsfw = "Reddit:PullNsfw"; + public const string PullTop = "Reddit:PullTop"; +} \ No newline at end of file diff --git a/Newsbot.Collector.Domain/Consts/ConfigTwitchConst.cs b/Newsbot.Collector.Domain/Consts/ConfigTwitchConst.cs new file mode 100644 index 0000000..24b02fd --- /dev/null +++ b/Newsbot.Collector.Domain/Consts/ConfigTwitchConst.cs @@ -0,0 +1,11 @@ +namespace Newsbot.Collector.Domain.Consts; + +/// +/// This class contains const entries to access keys within IConfiguration. +/// +public class ConfigTwitchConst +{ + public const string IsEnabled = "Twitch:IsEnabled"; + public const string ClientID = "Twitch:ClientID"; + public const string ClientSecret = "Twitch:ClientSecret"; +} \ No newline at end of file diff --git a/Newsbot.Collector.Domain/Consts/ConfigYoutubeConst.cs b/Newsbot.Collector.Domain/Consts/ConfigYoutubeConst.cs new file mode 100644 index 0000000..41012f7 --- /dev/null +++ b/Newsbot.Collector.Domain/Consts/ConfigYoutubeConst.cs @@ -0,0 +1,10 @@ +namespace Newsbot.Collector.Domain.Consts; + +/// +/// This class contains const entries to access keys within IConfiguration. +/// +public class ConfigYoutubeConst +{ + public const string IsEnable = "Youtube:IsEnabled"; + public const string Debug = "Youtube:Debug"; +} \ No newline at end of file diff --git a/Newsbot.Collector.Domain/Consts/SourcesConst.cs b/Newsbot.Collector.Domain/Consts/SourcesTypes.cs similarity index 100% rename from Newsbot.Collector.Domain/Consts/SourcesConst.cs rename to Newsbot.Collector.Domain/Consts/SourcesTypes.cs diff --git a/Newsbot.Collector.Domain/Dto/ArticleDetailsDto.cs b/Newsbot.Collector.Domain/Dto/ArticleDetailsDto.cs new file mode 100644 index 0000000..1624a58 --- /dev/null +++ b/Newsbot.Collector.Domain/Dto/ArticleDetailsDto.cs @@ -0,0 +1,41 @@ +using Newsbot.Collector.Domain.Models; + +namespace Newsbot.Collector.Domain.Dto; + +public class ArticleDetailsDto +{ + public Guid ID { get; set; } + public string[]? Tags { get; set; } + public string? Title { get; set; } + public string? Url { get; set; } + public DateTime PubDate { get; set; } + public string? Video { get; set; } + public int VideoHeight { get; set; } + public int VideoWidth { get; set; } + public string? Thumbnail { get; set; } + public string? Description { get; set; } + public string? AuthorName { get; set; } + public string? AuthorImage { get; set; } + + public SourceDto? Source { get; set; } + + public static ArticleDetailsDto Convert(ArticlesModel article, SourceModel source) + { + return new ArticleDetailsDto + { + ID = article.ID, + Tags = article.Tags.Split(','), + Title = article.Title, + Url = article.URL, + PubDate = article.PubDate, + Video = article.Video, + VideoHeight = article.VideoHeight, + VideoWidth = article.VideoWidth, + Thumbnail = article.Thumbnail, + Description = article.Description, + AuthorName = article.AuthorName, + AuthorImage = article.AuthorImage, + Source = SourceDto.Convert(source) + }; + } +} diff --git a/Newsbot.Collector.Domain/Dto/ArticleDto.cs b/Newsbot.Collector.Domain/Dto/ArticleDto.cs new file mode 100644 index 0000000..0532f32 --- /dev/null +++ b/Newsbot.Collector.Domain/Dto/ArticleDto.cs @@ -0,0 +1,39 @@ +using Newsbot.Collector.Domain.Models; + +namespace Newsbot.Collector.Domain.Dto; + +public class ArticleDto +{ + public Guid ID { get; set; } + public Guid SourceID { get; set; } + public string[]? Tags { get; set; } + public string? Title { get; set; } + public string? Url { get; set; } + public DateTime PubDate { get; set; } + public string? Video { get; set; } + public int VideoHeight { get; set; } + public int VideoWidth { get; set; } + public string? Thumbnail { get; set; } + public string? Description { get; set; } + public string? AuthorName { get; set; } + public string? AuthorImage { get; set; } + public static ArticleDto Convert(ArticlesModel article) + { + return new ArticleDto + { + ID = article.ID, + SourceID = article.SourceID, + Tags = article.Tags.Split(','), + Title = article.Title, + Url = article.URL, + PubDate = article.PubDate, + Video = article.Video, + VideoHeight = article.VideoHeight, + VideoWidth = article.VideoWidth, + Thumbnail = article.Thumbnail, + Description = article.Description, + AuthorName = article.AuthorName, + AuthorImage = article.AuthorImage, + }; + } +} \ No newline at end of file diff --git a/Newsbot.Collector.Domain/Dto/DiscordWebHookDto.cs b/Newsbot.Collector.Domain/Dto/DiscordWebHookDto.cs new file mode 100644 index 0000000..0eeea4d --- /dev/null +++ b/Newsbot.Collector.Domain/Dto/DiscordWebHookDto.cs @@ -0,0 +1,24 @@ +using Newsbot.Collector.Domain.Models; + +namespace Newsbot.Collector.Domain.Dto; + +public class DiscordWebHookDto +{ + public Guid ID { get; set; } + public string? Url { get; set; } + public string? Server { get; set; } + public string? Channel { get; set; } + public bool Enabled { get; set; } + + public static DiscordWebHookDto Convert(DiscordWebHookModel model) + { + return new DiscordWebHookDto + { + ID = model.ID, + Url = model.Url, + Server = model.Server, + Channel = model.Channel, + Enabled = model.Enabled, + }; + } +} \ No newline at end of file diff --git a/Newsbot.Collector.Domain/Dto/SourceDto.cs b/Newsbot.Collector.Domain/Dto/SourceDto.cs new file mode 100644 index 0000000..8a9e051 --- /dev/null +++ b/Newsbot.Collector.Domain/Dto/SourceDto.cs @@ -0,0 +1,34 @@ +using System.Net.NetworkInformation; +using Newsbot.Collector.Domain.Models; + +namespace Newsbot.Collector.Domain.Dto; + +public class SourceDto +{ + public Guid ID { get; set; } + public string? Site { get; set; } + public string? Name { get; set; } + public string? Source { get; set; } + public string? Type { get; set; } + public string? Value { get; set; } + public bool Enabled { get; set; } + public string? Url { get; set; } + public string[]? Tags { get; set; } + public bool Deleted { get; set; } + + public static SourceDto Convert(SourceModel model) { + return new SourceDto + { + ID = model.ID, + Site = model.Site, + Name = model.Name, + Source = model.Source, + Type = model.Type, + Value = model.Value, + Enabled = model.Enabled, + Url = model.Url, + Tags = model.Tags.Split(','), + Deleted = model.Deleted + }; + } +} \ No newline at end of file diff --git a/Newsbot.Collector.Domain/Dto/SubscriptionDetailsDto.cs b/Newsbot.Collector.Domain/Dto/SubscriptionDetailsDto.cs new file mode 100644 index 0000000..70c8665 --- /dev/null +++ b/Newsbot.Collector.Domain/Dto/SubscriptionDetailsDto.cs @@ -0,0 +1,20 @@ +using Newsbot.Collector.Domain.Models; + +namespace Newsbot.Collector.Domain.Dto; + +public class SubscriptionDetailsDto +{ + public Guid ID { get; set; } + public SourceDto? Source { get; set; } + public DiscordWebHookDto? DiscordWebHook { get; set; } + + public static SubscriptionDetailsDto Convert(SubscriptionModel subscription, SourceModel source, DiscordWebHookModel discord) + { + return new SubscriptionDetailsDto + { + ID = subscription.ID, + Source = SourceDto.Convert(source), + DiscordWebHook = DiscordWebHookDto.Convert(discord) + }; + } +} \ No newline at end of file diff --git a/Newsbot.Collector.Domain/Dto/SubscriptionDto.cs b/Newsbot.Collector.Domain/Dto/SubscriptionDto.cs new file mode 100644 index 0000000..e0132ce --- /dev/null +++ b/Newsbot.Collector.Domain/Dto/SubscriptionDto.cs @@ -0,0 +1,20 @@ +using Newsbot.Collector.Domain.Models; + +namespace Newsbot.Collector.Domain.Dto; + +public class SubscriptionDto +{ + public Guid ID { get; set; } + public Guid SourceID { get; set; } + public Guid DiscordWebHookID { get; set; } + + public static SubscriptionDto Convert(SubscriptionModel model) + { + return new SubscriptionDto + { + ID = model.ID, + SourceID = model.SourceID, + DiscordWebHookID = model.DiscordWebHookID + }; + } +} \ No newline at end of file diff --git a/Newsbot.Collector.Domain/Exceptions/MissingHeaderValueException.cs b/Newsbot.Collector.Domain/Exceptions/MissingHeaderValueException.cs new file mode 100644 index 0000000..d51b7a1 --- /dev/null +++ b/Newsbot.Collector.Domain/Exceptions/MissingHeaderValueException.cs @@ -0,0 +1,18 @@ +namespace Newsbot.Collector.Domain.Exceptions; + +public class MissingHeaderValueException : Exception +{ + public MissingHeaderValueException() + { + } + + public MissingHeaderValueException(string message) + : base(message) + { + } + + public MissingHeaderValueException(string message, Exception inner) + : base(message, inner) + { + } +} \ No newline at end of file diff --git a/Newsbot.Collector.Domain/Interfaces/IArticlesRepository.cs b/Newsbot.Collector.Domain/Interfaces/IArticlesRepository.cs index 9c1493d..9be0fb4 100644 --- a/Newsbot.Collector.Domain/Interfaces/IArticlesRepository.cs +++ b/Newsbot.Collector.Domain/Interfaces/IArticlesRepository.cs @@ -4,7 +4,8 @@ namespace Newsbot.Collector.Domain.Interfaces; public interface IArticlesRepository : ITableRepository { - ListList(int age, int count); + ListList(int page, int count); + ListListBySourceId(Guid id, int page = 0, int count = 25); ArticlesModel GetById(Guid ID); ArticlesModel GetByUrl(string url); ArticlesModel New(ArticlesModel model); diff --git a/Newsbot.Collector.Domain/Interfaces/IDiscordWebHooksRepository.cs b/Newsbot.Collector.Domain/Interfaces/IDiscordWebHooksRepository.cs new file mode 100644 index 0000000..cffbaca --- /dev/null +++ b/Newsbot.Collector.Domain/Interfaces/IDiscordWebHooksRepository.cs @@ -0,0 +1,19 @@ +using Microsoft.VisualBasic; +using Newsbot.Collector.Domain.Models; + +namespace Newsbot.Collector.Domain.Interfaces; + +public interface IDiscordWebHooksRepository +{ + DiscordWebHookModel New(DiscordWebHookModel model); + + DiscordWebHookModel GetByID(Guid id); + DiscordWebHookModel GetByUrl(string url); + + List List(int page, int count = 25); + List ListByServer(string server, int limit); + List ListByServerAndChannel(string server, string channel, int limit); + + int Disable(Guid id); + int Enable(Guid id); +} \ No newline at end of file diff --git a/Newsbot.Collector.Domain/Interfaces/ISourcesRepository.cs b/Newsbot.Collector.Domain/Interfaces/ISourcesRepository.cs index 61cbd92..9eed6ef 100644 --- a/Newsbot.Collector.Domain/Interfaces/ISourcesRepository.cs +++ b/Newsbot.Collector.Domain/Interfaces/ISourcesRepository.cs @@ -10,7 +10,7 @@ public interface ISourcesRepository public SourceModel GetByID(string ID); public SourceModel GetByName(string name); public SourceModel GetByNameAndSource(string name, string source); - public List List(int limit); + public List List(int page, int count); public List ListBySource(string source, int limit); public List ListByType(string type, int limit = 25); public int Disable(Guid ID); diff --git a/Newsbot.Collector.Domain/Interfaces/ISubscriptionsRepository.cs b/Newsbot.Collector.Domain/Interfaces/ISubscriptionsRepository.cs new file mode 100644 index 0000000..ecfe54b --- /dev/null +++ b/Newsbot.Collector.Domain/Interfaces/ISubscriptionsRepository.cs @@ -0,0 +1,17 @@ +using Newsbot.Collector.Domain.Models; + +namespace Newsbot.Collector.Domain.Interfaces; + +public interface ISubscriptionRepository +{ + SubscriptionModel New(SubscriptionModel model); + + List List(int page = 0, int count = 25); + List ListBySourceID(Guid id, int page = 0, int count = 25); + List ListByWebhook(Guid id, int page = 0, int count = 25); + + SubscriptionModel GetById(Guid id); + SubscriptionModel GetByWebhookAndSource(Guid webhookId, Guid sourceId); + + void Delete(Guid id); +} \ No newline at end of file diff --git a/Newsbot.Collector.Domain/Models/Config.cs b/Newsbot.Collector.Domain/Models/Config.cs index bee2f48..77dd469 100644 --- a/Newsbot.Collector.Domain/Models/Config.cs +++ b/Newsbot.Collector.Domain/Models/Config.cs @@ -13,4 +13,9 @@ public class RedditConfigModel public bool PullHot { get; set; } public bool PullNsfw { get; set; } public bool PullTop { get; set; } +} + +public class ConnectionStrings +{ + public string Database { get; set; } = ""; } \ No newline at end of file diff --git a/Newsbot.Collector.Domain/Models/DatabaseModel.cs b/Newsbot.Collector.Domain/Models/DatabaseModel.cs index 730f81a..3fd6c5d 100644 --- a/Newsbot.Collector.Domain/Models/DatabaseModel.cs +++ b/Newsbot.Collector.Domain/Models/DatabaseModel.cs @@ -31,7 +31,7 @@ public class DiscordQueueModel public Guid ArticleID { get; set; } } -public class DiscordWebHook +public class DiscordWebHookModel { public Guid ID { get; set; } public string Url { get; set; } = ""; diff --git a/Newsbot.Collector.Services/HtmlMeta.cs b/Newsbot.Collector.Services/HtmlMeta.cs deleted file mode 100644 index 79b3c8d..0000000 --- a/Newsbot.Collector.Services/HtmlMeta.cs +++ /dev/null @@ -1,129 +0,0 @@ -using System.Data; -using System.Runtime.Serialization; -using System.Xml; -using HtmlAgilityPack; - -namespace Newsbot.Collector.Services; - -public class HtmlData -{ - public HtmlHeaderData Header { get; set; } = new HtmlHeaderData(); -} - -public class HtmlHeaderData -{ - public HtmlMetaData Meta { get; set; } = new HtmlMetaData(); -} - -public class HtmlMetaData -{ - public string Title { get; set; } = ""; - public string Description { get; set; } = ""; - public string Image { get; set; } = ""; - public string Url { get; set; } = ""; - public string PageType { get; set; } = ""; - //public string Color { get; set; } -} - -public class HtmlPageReader -{ - - public HtmlData Data { get; set; } - - private const string XPathMetaTag = "//head/meta"; - - private string _siteContent; - - public HtmlPageReader(string pageUrl) - { - _siteContent = ReadSiteContent(pageUrl); - var tags = CollectMetaTags(); - - Data = new HtmlData(); - Data.Header.Meta.Title = GetMetaTitle(); - Data.Header.Meta.Description = GetDescription(); - Data.Header.Meta.Image = GetImage(); - Data.Header.Meta.Url = GetUrl(); - Data.Header.Meta.PageType = GetPageType(); - } - - private string ReadSiteContent(string url) - { - using var client = new HttpClient(); - var html = client.GetStringAsync(url); - html.Wait(); - - var content = html.Result.ToString(); - return content; - } - - private List CollectMetaTags() - { - var htmlDoc = new HtmlDocument(); - htmlDoc.LoadHtml(_siteContent); - - var tags = htmlDoc.DocumentNode.SelectNodes(XPathMetaTag).ToList(); - - return tags; - } - - public string GetTagValue(string Tag) - { - var tags = CollectMetaTags(); - - foreach (var meta in tags) - { - //Console.WriteLine($"Name={meta.Attributes[0].Name} & Value={meta.Attributes[0].Value}"); - if (meta.Attributes[0].Value.Contains(Tag) == false) - { - continue; - } - return meta.Attributes[1].Value; - } - return ""; - } - - private string FindFirstResult(string[] tags) - { - foreach (var tag in tags) - { - var res = GetTagValue(tag); - if (res == "") - { - continue; - } - return res; - } - return ""; - } - - public string GetMetaTitle() - { - string[] tags = new string[] { "og:title", "twitter:title", "title" }; - return FindFirstResult(tags); - } - - public string GetDescription() - { - string[] tags = new string[] { "description", "og:description" }; - return FindFirstResult(tags); - } - - public string GetImage() - { - string[] tags = new string[] { "image", "og:image", "twitter:image" }; - return FindFirstResult(tags); - } - - public string GetUrl() - { - string[] tags = new string[] { "url", "og:url", "twitter:url" }; - return FindFirstResult(tags); - } - - public string GetPageType() - { - string[] tags = new string[] { "og:type", "type" }; - return FindFirstResult(tags); - } -} \ No newline at end of file diff --git a/Newsbot.Collector.Services/HtmlParser/HeadParserClient.cs b/Newsbot.Collector.Services/HtmlParser/HeadParserClient.cs new file mode 100644 index 0000000..9b0f4a4 --- /dev/null +++ b/Newsbot.Collector.Services/HtmlParser/HeadParserClient.cs @@ -0,0 +1,186 @@ +using HtmlAgilityPack; +using Newsbot.Collector.Domain.Exceptions; + +namespace Newsbot.Collector.Services.HtmlParser; + +public class HeadParserClient +{ + private const string XPathMetaTag = "//head/meta"; + private const string XPathLinkTag = "//head/link"; + + public HeadParserModel Data { get; set; } + + private string _htmlContent; + + public HeadParserClient(string htmlContent) + { + _htmlContent = htmlContent; + Data = new HeadParserModel(); + } + + public void Parse() + { + Data.Title = GetMetaTitle(); + Data.Description = GetMetaDescription(); + Data.Image = GetMetaImage(); + Data.Url = GetMetaUrl(); + Data.PageType = GetMetaPageType(); + Data.ColorTheme = GetMetaColorTheme(); + + Data.FeedUri = GetSiteFeed(); + } + + private List CollectMetaTags() + { + var htmlDoc = new HtmlDocument(); + htmlDoc.LoadHtml(_htmlContent); + + var tags = htmlDoc.DocumentNode.SelectNodes(XPathMetaTag).ToList(); + + return tags; + } + + private List CollectLinkTags() + { + var htmlDoc = new HtmlDocument(); + htmlDoc.LoadHtml(_htmlContent); + var links = htmlDoc.DocumentNode.SelectNodes(XPathLinkTag).ToList(); + return links; + } + + private string GetTagValue(string Tag, List html) + { + foreach (var meta in html) + { + //Console.WriteLine($"Name={meta.Attributes[0].Name} & Value={meta.Attributes[0].Value}"); + if (meta.Attributes[0].Value.Contains(Tag) == false) + { + continue; + } + return meta.Attributes[1].Value; + } + return ""; + } + + private string FindFirstResult(string[] tags, List htmlTags) + { + foreach (var tag in tags) + { + var res = GetTagValue(tag, htmlTags); + if (res == "") + { + continue; + } + return res; + } + return ""; + } + + public string GetMetaTitle() + { + var htmlTags = CollectMetaTags(); + string[] tags = new string[] { "twitter:title", "og:title", "title" }; + return FindFirstResult(tags, htmlTags); + } + + public string GetMetaDescription() + { + var htmlTags = CollectMetaTags(); + string[] tags = new string[] { "twitter:description", "og:description", "description" }; + return FindFirstResult(tags, htmlTags); + } + + public string GetMetaImage() + { + var htmlTags = CollectMetaTags(); + string[] tags = new string[] { "twitter:image", "og:image", "image" }; + return FindFirstResult(tags, htmlTags); + } + + public string GetMetaUrl() + { + var htmlTags = CollectMetaTags(); + string[] tags = new string[] { "twitter:url", "og:url", "url" }; + return FindFirstResult(tags, htmlTags); + } + + public string GetMetaPageType() + { + var htmlTags = CollectMetaTags(); + string[] tags = new string[] { "og:type", "type" }; + return FindFirstResult(tags, htmlTags); + } + + public string GetMetaColorTheme() + { + var htmlTags = CollectMetaTags(); + string[] tags = new string[] { "theme-color" }; + return FindFirstResult(tags, htmlTags); + } + + /// + /// This will parse the headers looking for known keys that will contain a RSS feed link. + /// If the feed is not found, this will throw an exception (MissingHeaderValueException). + /// + /// + public string GetSiteFeed() + { + var htmlTags = CollectLinkTags(); + var tags = new string[] { "alternate" }; + try + { + var attr = FindFirstAttribute(tags, htmlTags); + foreach (var item in attr) + { + if (item.Name != "href") + { + continue; + } + + var uri = item.Value; + if (uri.StartsWith("//")) + { + uri = uri.Replace("//", "https://"); + } + + return uri; + } + return ""; + } + catch + { + // not found + return ""; + } + } + + private HtmlAttributeCollection FindFirstAttribute(string[] tags, List htmlTags) + { + foreach (var tag in tags) + { + try + { + var res = GetValidAttribute(tag, htmlTags); + return res; + } + catch (MissingHeaderValueException) + { + // Nothing was found in the given tag but we will keep looking till we finish all the entries. + } + } + throw new MissingHeaderValueException("Unable to find the requested value"); + } + + private HtmlAttributeCollection GetValidAttribute(string Tag, List html) + { + foreach (var meta in html) + { + if (meta.Attributes[0].Value.Contains(Tag) == false) + { + continue; + } + return meta.Attributes; + } + throw new MissingHeaderValueException("Site does not expose requested tag."); + } +} \ No newline at end of file diff --git a/Newsbot.Collector.Services/HtmlParser/HtmlPageReader.cs b/Newsbot.Collector.Services/HtmlParser/HtmlPageReader.cs new file mode 100644 index 0000000..d138171 --- /dev/null +++ b/Newsbot.Collector.Services/HtmlParser/HtmlPageReader.cs @@ -0,0 +1,71 @@ +using HtmlAgilityPack; +using Newsbot.Collector.Domain.Exceptions; + +namespace Newsbot.Collector.Services.HtmlParser; + +public class HtmlPageReader +{ + + public HtmlData Data { get; set; } + + private HeadParserClient _headClient; + + private string _siteContent; + + public HtmlPageReader(string pageUrl) + { + _siteContent = ReadSiteContent(pageUrl); + _headClient = new HeadParserClient(_siteContent); + + Data = new HtmlData(); + } + + public void Parse() + { + _headClient.Parse(); + Data.Header = _headClient.Data; + } + + private string ReadSiteContent(string url) + { + using var client = new HttpClient(); + var html = client.GetStringAsync(url); + html.Wait(); + + var content = html.Result.ToString(); + return content; + } + + public string GetSiteContent() + { + return _siteContent; + } + + public List CollectPostContent() + { + var htmlDoc = new HtmlDocument(); + htmlDoc.LoadHtml(_siteContent); + var links = htmlDoc.DocumentNode.SelectNodes("//div[contains(@class, 'article-text')]").ToList(); + + if (links.Count == 0) + { + throw new Exception("Unable to parse body. Tag is unkown."); + } + + if (links.Count >= 2) + { + throw new Exception("Too many results back for the body"); + } + + var content = new List(); + foreach (var item in links[0].ChildNodes) + { + if (item.Name == "p") + { + content.Add(item.InnerText); + } + } + + return links; + } +} \ No newline at end of file diff --git a/Newsbot.Collector.Services/HtmlParser/HtmlParserModels.cs b/Newsbot.Collector.Services/HtmlParser/HtmlParserModels.cs new file mode 100644 index 0000000..3823bc5 --- /dev/null +++ b/Newsbot.Collector.Services/HtmlParser/HtmlParserModels.cs @@ -0,0 +1,18 @@ +namespace Newsbot.Collector.Services.HtmlParser; + +public class HeadParserModel +{ + public string Title { get; set; } = ""; + public string Description { get; set; } = ""; + public string Image { get; set; } = ""; + public string Url { get; set; } = ""; + public string PageType { get; set; } = ""; + public string ColorTheme { get; set; } = ""; + + public string? FeedUri { get; set; } +} + +public class HtmlData +{ + public HeadParserModel Header { get; set; } = new HeadParserModel(); +} \ No newline at end of file diff --git a/Newsbot.Collector.Services/Jobs/GithubWatcherJob.cs b/Newsbot.Collector.Services/Jobs/GithubWatcherJob.cs new file mode 100644 index 0000000..41cac9b --- /dev/null +++ b/Newsbot.Collector.Services/Jobs/GithubWatcherJob.cs @@ -0,0 +1,42 @@ +using Newsbot.Collector.Database.Repositories; +using Newsbot.Collector.Domain.Interfaces; + +namespace Newsbot.Collector.Services.Jobs; + +public class GithubWatcherJobOptions +{ + public string ConnectionString { get; set; } = ""; + public bool FeaturePullReleases { get; set; } = false; + public bool FeaturePullCommits { get; set; } = false; +} + +public class GithubWatcherJob +{ + private IArticlesRepository _articles; + private IDiscordQueueRepository _queue; + private ISourcesRepository _source; + + public GithubWatcherJob() + { + _articles = new ArticlesTable(""); + _queue = new DiscordQueueTable(""); + _source = new SourcesTable(""); + } + + private void Init(GithubWatcherJobOptions options) + { + _articles = new ArticlesTable(options.ConnectionString); + _queue = new DiscordQueueTable(options.ConnectionString); + _source = new SourcesTable(options.ConnectionString); + } + + public void InitAndExecute(GithubWatcherJobOptions options) + { + Init(options); + + // query sources for things to pull + + // query */release.atom + // query */commits.atom + } +} \ No newline at end of file diff --git a/Newsbot.Collector.Services/Jobs/HelloWorldJob.cs b/Newsbot.Collector.Services/Jobs/HelloWorldJob.cs index 8f2f2e3..9152c9b 100644 --- a/Newsbot.Collector.Services/Jobs/HelloWorldJob.cs +++ b/Newsbot.Collector.Services/Jobs/HelloWorldJob.cs @@ -1,5 +1,4 @@ - -using Microsoft.Extensions.Configuration; +using Serilog; namespace Newsbot.Collector.Services.Jobs; @@ -10,21 +9,22 @@ public class HelloWorldJobOptions public class HelloWorldJob { - private HelloWorldJobOptions _options; - - public HelloWorldJob(HelloWorldJobOptions options) + private string _message; + + public HelloWorldJob() { - _options = options; + _message = ""; } public void InitAndExecute(HelloWorldJobOptions options) { - _options = options; + _message = options.Message; Execute(); } private void Execute() { - Console.WriteLine(_options.Message); + Log.Information("Starting 'HelloWorldJob'"); + Log.Information(_message); } } \ No newline at end of file diff --git a/Newsbot.Collector.Services/Jobs/RssWatcherJob.cs b/Newsbot.Collector.Services/Jobs/RssWatcherJob.cs index 80173c9..2cb2861 100644 --- a/Newsbot.Collector.Services/Jobs/RssWatcherJob.cs +++ b/Newsbot.Collector.Services/Jobs/RssWatcherJob.cs @@ -1,4 +1,3 @@ -using System.Runtime.InteropServices; using System.ServiceModel.Syndication; using System.Xml; using Microsoft.Extensions.Configuration; @@ -6,6 +5,8 @@ using Newsbot.Collector.Database.Repositories; using Newsbot.Collector.Domain.Consts; using Newsbot.Collector.Domain.Interfaces; using Newsbot.Collector.Domain.Models; +using Newsbot.Collector.Services.HtmlParser; +using Serilog; namespace Newsbot.Collector.Services.Jobs; @@ -31,24 +32,29 @@ public class RssWatcherJob : IHangfireJob public void InitAndExecute(RssWatcherJobOptions options) { - Console.WriteLine("Job was triggered"); - Console.WriteLine("Setting up the job"); + Log.Information("RssWatcherJob - Job was triggered"); + Log.Information("RssWatcherJob - Setting up the job"); Init(options.ConnectionString); var articles = new List(); - Console.WriteLine("Requesting sources"); + Log.Information("RssWatcherJob - Requesting sources"); var sources = _source.ListByType(SourceTypes.Rss); - Console.WriteLine($"Got {sources.Count()} back"); + Log.Information($"RssWatcherJob - Got {sources.Count()} back"); foreach (var source in sources) { - Console.WriteLine("Starting to request feed to be processed"); - var results = Collect(source.Url); + Log.Information($"RssWatcherJob - Starting to proces '{source.Name}'"); + Log.Information("RssWatcherJob - Starting to request feed to be processed"); + var results = Collect(source.Url, source.ID); + Log.Information($"RssWatcherJob - Collected {results.Count()} posts"); articles.AddRange(results); } + Log.Information("RssWatcherJob - Sending posts over to the database"); UpdateDatabase(articles); + + Log.Information("RssWatcherJob - Done!"); } public void InitAndExecute(IConfiguration config) @@ -66,7 +72,7 @@ public class RssWatcherJob : IHangfireJob var sources = _source.ListByType(SourceTypes.Rss); foreach (var source in sources) { - var results = Collect(source.Url); + var results = Collect(source.Url, source.ID); articles.AddRange(results); } @@ -81,7 +87,7 @@ public class RssWatcherJob : IHangfireJob _source = new SourcesTable(connectionString); } - public List Collect(string url, int sleep = 3000) + public List Collect(string url, Guid SourceID, int sleep = 3000) { var CollectedPosts = new List(); @@ -100,6 +106,7 @@ public class RssWatcherJob : IHangfireJob } var meta = new HtmlPageReader(articleUrl); + meta.Parse(); var article = new ArticlesModel { @@ -107,8 +114,9 @@ public class RssWatcherJob : IHangfireJob Tags = FetchTags(post), URL = articleUrl, PubDate = post.PublishDate.DateTime, - Thumbnail = meta.Data.Header.Meta.Image, - Description = meta.Data.Header.Meta.Description, + Thumbnail = meta.Data.Header.Image, + Description = meta.Data.Header.Description, + SourceID = SourceID }; CollectedPosts.Add(article); @@ -123,7 +131,7 @@ public class RssWatcherJob : IHangfireJob { foreach (var item in items) { - if (IsThisUrlKnown(item.URL) == false) + if (IsThisUrlKnown(item.URL) == true) { continue; } diff --git a/Newsbot.Collector.Services/Newsbot.Collector.Services.csproj b/Newsbot.Collector.Services/Newsbot.Collector.Services.csproj index 36f4513..56f2c55 100644 --- a/Newsbot.Collector.Services/Newsbot.Collector.Services.csproj +++ b/Newsbot.Collector.Services/Newsbot.Collector.Services.csproj @@ -7,6 +7,7 @@ + diff --git a/Newsbot.Collector.Tests/Jobs/RssWatcherJobTest.cs b/Newsbot.Collector.Tests/Jobs/RssWatcherJobTest.cs index e17f15e..b169cd6 100644 --- a/Newsbot.Collector.Tests/Jobs/RssWatcherJobTest.cs +++ b/Newsbot.Collector.Tests/Jobs/RssWatcherJobTest.cs @@ -27,7 +27,7 @@ public class RssWatcherJobTest { var url = "https://www.engadget.com/rss.xml"; var client = new RssWatcherJob(); - var items = client.Collect(url); + var items = client.Collect(url, Guid.NewGuid(), 0); } [Fact] @@ -36,6 +36,28 @@ public class RssWatcherJobTest var url = "https://www.engadget.com/rss.xml"; var client = new RssWatcherJob(); client.Init(ConnectionString()); - client.Collect(url, 0); + var items = client.Collect(url, Guid.NewGuid(), 0); + client.UpdateDatabase(items); + } + + [Fact] + public void CanReadHtmlDrivenFeedPage() + { + var url = "https://www.howtogeek.com/feed/"; + var client = new RssWatcherJob(); + client.Init(ConnectionString()); + var items = client.Collect(url, Guid.NewGuid(), 0); + Console.WriteLine('k'); + } + + [Fact] + public void InitAndExecuteTest() + { + var client = new RssWatcherJob(); + client.InitAndExecute(new RssWatcherJobOptions + { + ConnectionString = ConnectionString() + }); + } } \ No newline at end of file diff --git a/Newsbot.Collector.Tests/Services/HtmlPageReaderTests.cs b/Newsbot.Collector.Tests/Services/HtmlPageReaderTests.cs new file mode 100644 index 0000000..f1961bf --- /dev/null +++ b/Newsbot.Collector.Tests/Services/HtmlPageReaderTests.cs @@ -0,0 +1,38 @@ +using Newsbot.Collector.Services.HtmlParser; + +namespace Newsbot.Collector.Tests.Services; + +public class HtmlPageReaderTests +{ + [Fact] + public void BaseSiteContainsRssFeed() + { + var client = new HtmlPageReader("https://dotnettutorials.net/"); + var headClient = new HeadParserClient(client.GetSiteContent()); + var feedUri = headClient.GetSiteFeed(); + if (feedUri == "") + { + Assert.Fail("Failed to find the RSS feed"); + } + } + + [Fact] + public void SiteDoesNotReturnRssFeed() + { + var client = new HtmlPageReader("https://www.engadget.com/"); + var headClient = new HeadParserClient(client.GetSiteContent()); + var feedUri = headClient.GetSiteFeed(); + if (feedUri == "") + { + Assert.Fail(""); + } + } + + [Fact] + public void CanFindBodyOfTheArticle() + { + var client = new HtmlPageReader("https://www.engadget.com/apple-is-convinced-my-dog-is-stalking-me-143100932.html"); + var c = client.CollectPostContent(); + Console.WriteLine(c); + } +} \ No newline at end of file diff --git a/makefile b/makefile new file mode 100644 index 0000000..a22c8a3 --- /dev/null +++ b/makefile @@ -0,0 +1,25 @@ +.PHONY: help +help: ## Shows this help command + @egrep -h '\s##\s' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' + +build: ## builds the application with the current go runtime + dotnet restore + dotnet build + +pub: ## Generate artifacts + dotnet restore + dotnet publish -o out + rm ./out/appsettings.json + +docker-build: ## Generates the docker image + docker build -t "newsbot.collector" . + docker image ls | grep newsbot.collector + +docker-run: ## Runs the docker compose + docker compose up + +migrate-dev: ## Apply sql migrations to dev db + goose -dir "./Newsbot.Collector.Database/Migrations" postgres "host=localhost user=postgres password=postgres dbname=postgres sslmode=disable" up + +migrate-dev-down: ## revert sql migrations to dev db + goose -dir "./Newsbot.Collector.Database/Migrations" postgres "host=localhost user=postgres password=postgres dbname=postgres sslmode=disable" down