Features/more rss improvements (#6)

* exposing connectionStrings to controllers

* First controller added to start testing

* corrected param to be page not age

* new model to map connection strings to for the controllers

* HelloWorldJob uses options now to make hangfire happy

* improved the html reader to find some rss feeds and start to extract the body of the content

* moved html parser to its own namespace and make a sub client to process theh header

* helpful vsc changes

* updated rss watcher to include the sourceId so it can be added to the db call

* updated tests to reflect changes

* updated gitignore to avoid trash and moved over my makefile

* More routes and added serilog

* adding more database calls for the controllers

* Updated interfaces for the tables

* Added Serilog to jobs

* removed default files

* Added more routes and added DTO

* Added DTO objects and SourceType Consts for easy usage

* updated discord model name to follow the pattern

* updated formatting

* new dto objects and Subscriptions repo interface

* added subscription db and api calls

* focusing on the twitter tags as most sites focus on them

* updated test to pull a html based feed
This commit is contained in:
James Tombleson 2023-02-26 09:40:04 -08:00 committed by GitHub
parent 17e97b4e09
commit 521940ca4f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
44 changed files with 1233 additions and 264 deletions

2
.gitignore vendored
View File

@ -3,6 +3,8 @@
## ##
## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
out/
# User-specific files # User-specific files
*.rsuser *.rsuser
*.suo *.suo

6
.vscode/launch.json vendored
View File

@ -24,6 +24,12 @@
}, },
"sourceFileMap": { "sourceFileMap": {
"/Views": "${workspaceFolder}/Views" "/Views": "${workspaceFolder}/Views"
},
"logging": {
"engineLogging": false,
"moduleLoad": false,
"exceptions": false,
"browserStdOut": false
} }
}, },
{ {

15
.vscode/settings.json vendored
View File

@ -4,5 +4,18 @@
"**/bin": false "**/bin": false
}, },
"csharp.inlayHints.types.enabled": true, "csharp.inlayHints.types.enabled": true,
"omnisharp.enableImportCompletion": true "omnisharp.enableImportCompletion": true,
"sqltools.connections": [
{
"previewLimit": 50,
"server": "localhost",
"port": 5432,
"driver": "PostgreSQL",
"name": "localhost",
"database": "postgres",
"username": "postgres",
"password": "postgres"
}
],
"editor.formatOnType": true
} }

View File

@ -0,0 +1,65 @@
using Microsoft.AspNetCore.Mvc;
using Microsoft.Extensions.Options;
using Newsbot.Collector.Database.Repositories;
using Newsbot.Collector.Domain.Dto;
using Newsbot.Collector.Domain.Interfaces;
using Newsbot.Collector.Domain.Models;
namespace Newsbot.Collector.Api.Controllers;
[ApiController]
[Route("api/articles")]
public class ArticlesController : ControllerBase
{
private readonly ILogger<ArticlesController> _logger;
private readonly ConnectionStrings _settings;
private readonly IArticlesRepository _articles;
private readonly ISourcesRepository _sources;
public ArticlesController(ILogger<ArticlesController> logger, IOptions<ConnectionStrings> settings)
{
_logger = logger;
_settings = settings.Value;
_articles = new ArticlesTable(_settings.Database);
_sources = new SourcesTable(_settings.Database);
}
[HttpGet(Name = "GetArticles")]
public IEnumerable<ArticleDto> Get()
{
var res = new List<ArticleDto>();
var items = _articles.List(0, 25);
foreach (var item in items)
{
res.Add(ArticleDto.Convert(item));
}
return res;
}
[HttpGet("{id}")]
public ArticleDto GetById(Guid id)
{
var item = _articles.GetById(id);
return ArticleDto.Convert(item);
}
[HttpGet("{id}/details")]
public ArticleDetailsDto GetDetailsById(Guid id)
{
var item = _articles.GetById(id);
var sourceItem = _sources.GetByID(item.SourceID);
return ArticleDetailsDto.Convert(item, sourceItem);
}
[HttpGet("by/{sourceid}")]
public IEnumerable<ArticleDto> GetBySourceID(Guid sourceid, int page = 0, int count = 25)
{
var res = new List<ArticleDto>();
var items = _articles.ListBySourceId(sourceid, page, count);
foreach (var item in items)
{
res.Add(ArticleDto.Convert(item));
}
return res;
}
}

View File

@ -0,0 +1,66 @@
using System.Net;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Extensions.Options;
using Newsbot.Collector.Database.Repositories;
using Newsbot.Collector.Domain.Interfaces;
using Newsbot.Collector.Domain.Models;
namespace Newsbot.Collector.Api.Controllers;
[ApiController]
[Route("api/discord/webhooks")]
public class DiscordWebHookController : ControllerBase
{
private readonly ILogger<DiscordWebHookController> _logger;
private readonly ConnectionStrings _settings;
private readonly IDiscordWebHooksRepository _webhooks;
public DiscordWebHookController(ILogger<DiscordWebHookController> logger, IOptions<ConnectionStrings> settings)
{
_logger = logger;
_settings = settings.Value;
_webhooks = new DiscordWebhooksTable(_settings.Database);
}
[HttpGet(Name = "GetDiscordWebhooks")]
public IEnumerable<DiscordWebHookModel> Get(int page)
{
return _webhooks.List(page);
}
[HttpPost(Name = "New")]
public DiscordWebHookModel New(string url, string server, string channel)
{
return _webhooks.New(new DiscordWebHookModel
{
Url = url,
Server = server,
Channel = channel,
Enabled = true,
});
}
[HttpGet("by/serverAndChannel")]
public IEnumerable<DiscordWebHookModel> GetByServerAndChannel(string server, string channel)
{
return _webhooks.ListByServerAndChannel(server, channel, 25);
}
[HttpGet("{id}")]
public DiscordWebHookModel GetById(Guid id)
{
return _webhooks.GetByID(id);
}
[HttpPost("{id}/disable")]
public void DisableById(Guid id)
{
_webhooks.Disable(id);
}
[HttpPost("{id}/enable")]
public void EnableById(Guid id)
{
_webhooks.Enable(id);
}
}

View File

@ -0,0 +1,133 @@
using Microsoft.AspNetCore.Mvc;
using Microsoft.Extensions.Options;
using Newsbot.Collector.Database.Repositories;
using Newsbot.Collector.Domain.Consts;
using Newsbot.Collector.Domain.Dto;
using Newsbot.Collector.Domain.Interfaces;
using Newsbot.Collector.Domain.Models;
namespace Newsbot.Collector.Api.Controllers;
[ApiController]
[Route("api/sources")]
public class SourcesController : ControllerBase
{
private readonly ILogger<SourcesController> _logger;
private readonly ConnectionStrings _settings;
private readonly ISourcesRepository _sources;
public SourcesController(ILogger<SourcesController> logger, IOptions<ConnectionStrings> settings)
{
_logger = logger;
_settings = settings.Value;
_sources = new SourcesTable(_settings.Database);
}
[HttpGet(Name = "GetSources")]
public IEnumerable<SourceDto> Get(int page)
{
var res = new List<SourceDto>();
var temp = _sources.List(page, 25);
foreach (var item in temp)
{
res.Add(SourceDto.Convert(item));
}
return res;
}
[HttpGet("by/type")]
public IEnumerable<SourceDto> GetByType(string type)
{
var res = new List<SourceDto>();
var temp = _sources.ListByType(type);
foreach (var item in temp)
{
res.Add(SourceDto.Convert(item));
}
return res;
}
[HttpPost("new/reddit")]
public SourceDto NewReddit(string name, string url)
{
var item = _sources.New(new SourceModel
{
Site = SourceTypes.Reddit,
Name = name,
Type = SourceTypes.Reddit,
Source = "feed",
Enabled = true,
Url = url,
Tags = $"{SourceTypes.Reddit}, {name}"
});
return SourceDto.Convert(item);
}
[HttpPost("new/rss")]
public SourceDto NewRss(string name, string url)
{
var item = _sources.New(new SourceModel
{
Site = SourceTypes.Rss,
Name = name,
Type = SourceTypes.Rss,
Source = "feed",
Enabled = true,
Url = url,
Tags = $"{SourceTypes.Rss}, {name}"
});
return SourceDto.Convert(item);
}
[HttpPost("new/youtube")]
public SourceDto NewYoutube(string name, string url)
{
var item = _sources.New(new SourceModel
{
Site = SourceTypes.YouTube,
Type = SourceTypes.YouTube,
Name = name,
Source = "feed",
Url = url,
Enabled = true,
Tags = $"{SourceTypes.YouTube}, {name}"
});
return SourceDto.Convert(item);
}
[HttpPost("new/twitch")]
public SourceDto NewTwitch(string name)
{
var item = _sources.New(new SourceModel
{
Site = SourceTypes.Twitch,
Type = SourceTypes.Twitch,
Name = name,
Url = $"https://twitch.tv/{name}",
Source = "api",
Enabled = true,
Tags = $"{SourceTypes.Twitch}, {name}"
});
return SourceDto.Convert(item);
}
[HttpGet("{id}")]
public SourceDto GetById(Guid id)
{
var item = _sources.GetByID(id);
return SourceDto.Convert(item);
}
[HttpPost("{id}/disable")]
public void Disable(Guid id)
{
_sources.Disable(id);
}
[HttpPost("{id}/enable")]
public void Enable(Guid id)
{
_sources.Enable(id);
}
}

View File

@ -0,0 +1,98 @@
using Microsoft.AspNetCore.Mvc;
using Microsoft.Extensions.Options;
using Newsbot.Collector.Database.Repositories;
using Newsbot.Collector.Domain.Dto;
using Newsbot.Collector.Domain.Interfaces;
using Newsbot.Collector.Domain.Models;
namespace Newsbot.Collector.Api.Controllers;
[ApiController]
[Route("api/subscriptions")]
public class SubscriptionsController : ControllerBase
{
private readonly ILogger<ArticlesController> _logger;
private readonly ConnectionStrings _settings;
private readonly ISubscriptionRepository _subscription;
private readonly IDiscordWebHooksRepository _discord;
private readonly ISourcesRepository _sources;
public SubscriptionsController(ILogger<ArticlesController> logger, IOptions<ConnectionStrings> settings)
{
_logger = logger;
_settings = settings.Value;
_subscription = new SubscriptionsTable(_settings.Database);
_discord = new DiscordWebhooksTable(_settings.Database);
_sources = new SourcesTable(_settings.Database);
}
[HttpGet(Name = "ListSubscriptions")]
public IEnumerable<SubscriptionDto> List(int page)
{
var res = new List<SubscriptionDto>();
var items = _subscription.List(page);
foreach (var item in items)
{
res.Add(SubscriptionDto.Convert(item));
}
return res;
}
[HttpGet("{id}")]
public SubscriptionDto GetById(Guid id)
{
return SubscriptionDto.Convert(_subscription.GetById(id));
}
[HttpGet("{id}/details")]
public SubscriptionDetailsDto GetDetailsById(Guid id)
{
var sub = _subscription.GetById(id);
var webhook = _discord.GetByID(sub.DiscordWebHookID);
var source = _sources.GetByID(sub.SourceID);
return SubscriptionDetailsDto.Convert(sub, source, webhook);
}
[HttpPost("{id}/delete")]
public void DeleteById(Guid id)
{
_subscription.Delete(id);
}
[HttpGet("by/discordid")]
public IEnumerable<SubscriptionDto> GetByDiscordId(Guid id)
{
var res = new List<SubscriptionDto>();
var items = _subscription.ListByWebhook(id);
foreach (var item in items)
{
res.Add(SubscriptionDto.Convert(item));
}
return res;
}
[HttpGet("by/sourceid")]
public IEnumerable<SubscriptionDto> GetBySourceId(Guid id)
{
var res = new List<SubscriptionDto>();
var items = _subscription.ListBySourceID(id);
foreach (var item in items)
{
res.Add(SubscriptionDto.Convert(item));
}
return res;
}
[HttpPost("new")]
public SubscriptionDto New(Guid sourceId, Guid discordId)
{
var item = _subscription.New(new SubscriptionModel
{
ID = Guid.NewGuid(),
SourceID = sourceId,
DiscordWebHookID = discordId
});
return SubscriptionDto.Convert(item);
}
}

View File

@ -1,32 +0,0 @@
using Microsoft.AspNetCore.Mvc;
namespace Newsbot.Collector.Api.Controllers;
[ApiController]
[Route("[controller]")]
public class WeatherForecastController : ControllerBase
{
private static readonly string[] Summaries = new[]
{
"Freezing", "Bracing", "Chilly", "Cool", "Mild", "Warm", "Balmy", "Hot", "Sweltering", "Scorching"
};
private readonly ILogger<WeatherForecastController> _logger;
public WeatherForecastController(ILogger<WeatherForecastController> logger)
{
_logger = logger;
}
[HttpGet(Name = "GetWeatherForecast")]
public IEnumerable<WeatherForecast> Get()
{
return Enumerable.Range(1, 5).Select(index => new WeatherForecast
{
Date = DateOnly.FromDateTime(DateTime.Now.AddDays(index)),
TemperatureC = Random.Shared.Next(-20, 55),
Summary = Summaries[Random.Shared.Next(Summaries.Length)]
})
.ToArray();
}
}

View File

@ -11,6 +11,8 @@
<PackageReference Include="Hangfire.MemoryStorage" Version="1.7.0" /> <PackageReference Include="Hangfire.MemoryStorage" Version="1.7.0" />
<PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="7.0.0" /> <PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="7.0.0" />
<PackageReference Include="Microsoft.Extensions.Configuration" Version="7.0.0" /> <PackageReference Include="Microsoft.Extensions.Configuration" Version="7.0.0" />
<PackageReference Include="Serilog" Version="2.12.0" />
<PackageReference Include="Serilog.AspNetCore" Version="6.1.0" />
<PackageReference Include="Swashbuckle.AspNetCore" Version="6.4.0" /> <PackageReference Include="Swashbuckle.AspNetCore" Version="6.4.0" />
</ItemGroup> </ItemGroup>

View File

@ -1,24 +1,37 @@
using Hangfire; using Hangfire;
using Hangfire.MemoryStorage; using Hangfire.MemoryStorage;
using Serilog;
using Newsbot.Collector.Services.Jobs; using Newsbot.Collector.Services.Jobs;
using Newsbot.Collector.Domain.Models; using Newsbot.Collector.Domain.Models;
using Newsbot.Collector.Domain.Consts;
Log.Logger = new LoggerConfiguration()
.WriteTo.Console()
.CreateLogger();
Log.Information("Starting up");
var builder = WebApplication.CreateBuilder(args); var builder = WebApplication.CreateBuilder(args);
// Add services to the container. // Define Logger
builder.Host.UseSerilog(); // <-- Add this line
// Build the conifg // Build the conifg
var config = GetConfiguration(); var config = GetConfiguration();
builder.Configuration.AddConfiguration(config); builder.Configuration.AddConfiguration(config);
// Configure Hangfire
builder.Services.AddHangfire(f => f.UseMemoryStorage()); builder.Services.AddHangfire(f => f.UseMemoryStorage());
builder.Services.AddHangfireServer(); builder.Services.AddHangfireServer();
GlobalConfiguration.Configuration.UseSerilogLogProvider();
builder.Services.AddControllers(); builder.Services.AddControllers();
// Learn more about configuring Swagger/OpenAPI at https://aka.ms/aspnetcore/swashbuckle // Learn more about configuring Swagger/OpenAPI at https://aka.ms/aspnetcore/swashbuckle
builder.Services.AddEndpointsApiExplorer(); builder.Services.AddEndpointsApiExplorer();
builder.Services.AddSwaggerGen(); builder.Services.AddSwaggerGen();
builder.Services.Configure<ConnectionStrings>(config.GetSection("ConnectionStrings"));
var app = builder.Build(); var app = builder.Build();
// Configure the HTTP request pipeline. // Configure the HTTP request pipeline.
@ -31,7 +44,7 @@ if (app.Environment.IsDevelopment())
app.UseHttpsRedirection(); app.UseHttpsRedirection();
app.UseHangfireDashboard(); app.UseHangfireDashboard();
SetupRecurringJobs(config); SetupRecurringJobs(config, Log.Logger);
app.UseAuthorization(); app.UseAuthorization();
@ -39,6 +52,7 @@ app.MapControllers();
app.Run(); app.Run();
static IConfiguration GetConfiguration() static IConfiguration GetConfiguration()
{ {
return new ConfigurationBuilder() return new ConfigurationBuilder()
@ -47,24 +61,14 @@ static IConfiguration GetConfiguration()
.Build(); .Build();
} }
static void SetupRecurringJobs(IConfiguration configuration) static void SetupRecurringJobs(IConfiguration configuration, Serilog.ILogger logger)
{ {
var databaseConnectionString = configuration.GetConnectionString("database");
if (databaseConnectionString is null)
{
databaseConnectionString = "";
}
RecurringJob.AddOrUpdate<HelloWorldJob>("Example", x => x.InitAndExecute(new HelloWorldJobOptions RecurringJob.AddOrUpdate<HelloWorldJob>("Example", x => x.InitAndExecute(new HelloWorldJobOptions
{ {
Message = "Hello from the background!" Message = "Hello from the background!"
}), "0/2 * * * *"); }), "0/1 * * * *");
//RecurringJob.AddOrUpdate<RssWatcherJob>("RSS", x => x.InitAndExecute(config), "15 0-23 * * *"); RecurringJob.AddOrUpdate<RssWatcherJob>("RSS", x => x.InitAndExecute(new RssWatcherJobOptions
var c = new RssWatcherJob();
BackgroundJob.Enqueue(() => c.InitAndExecute(new RssWatcherJobOptions
{ {
ConnectionString = databaseConnectionString ConnectionString = configuration.GetSection(ConfigConnectionStringConst.Database).Value ?? ""
})); }), "15 0-23 * * *");
} }

View File

@ -1,12 +0,0 @@
namespace Newsbot.Collector.Api;
public class WeatherForecast
{
public DateOnly Date { get; set; }
public int TemperatureC { get; set; }
public int TemperatureF => 32 + (int)(TemperatureC / 0.5556);
public string? Summary { get; set; }
}

View File

@ -41,7 +41,12 @@ public class ArticlesTable : IArticlesRepository
var res = conn.Query<ArticlesModel>(@"select * from articles var res = conn.Query<ArticlesModel>(@"select * from articles
Order By PubDate Desc Order By PubDate Desc
Offset @Page Offset @Page
Fetch Next @Count Rows Only", new { Page = page * count, Count = count }).ToList(); Fetch Next @Count Rows Only", new
{
Page = page * count,
Count = count
})
.ToList();
return res; return res;
} }
@ -67,6 +72,21 @@ public class ArticlesTable : IArticlesRepository
return res.First(); return res.First();
} }
public List<ArticlesModel> ListBySourceId(Guid id, int page, int count)
{
using var conn = OpenConnection(_connectionString);
var query = @"Select * from articles
where sourceid = @sourceid
Offset @page
Fetch next @count rows only";
return conn.Query<ArticlesModel>(query, new
{
sourceid = id,
page = page * count,
count = count
}).ToList();
}
public ArticlesModel New(ArticlesModel model) public ArticlesModel New(ArticlesModel model)
{ {
model.ID = Guid.NewGuid(); model.ID = Guid.NewGuid();

View File

@ -1,21 +1,22 @@
using System.Data; using System.Data;
using Dapper; using Dapper;
using Microsoft.Extensions.Configuration; using Microsoft.Extensions.Configuration;
using Newsbot.Collector.Domain.Interfaces;
using Newsbot.Collector.Domain.Models; using Newsbot.Collector.Domain.Models;
using Npgsql; using Npgsql;
namespace Newsbot.Collector.Database.Repositories; namespace Newsbot.Collector.Database.Repositories;
public class WebhooksTable public class DiscordWebhooksTable : IDiscordWebHooksRepository
{ {
private string _connectionString; private string _connectionString;
public WebhooksTable(string connectionString) public DiscordWebhooksTable(string connectionString)
{ {
_connectionString = connectionString; _connectionString = connectionString;
} }
public WebhooksTable(IConfiguration configuration) public DiscordWebhooksTable(IConfiguration configuration)
{ {
var connstr = configuration.GetConnectionString("database"); var connstr = configuration.GetConnectionString("database");
if (connstr is null) if (connstr is null)
@ -32,66 +33,71 @@ public class WebhooksTable
return conn; return conn;
} }
public void New(DiscordWebHook model) public DiscordWebHookModel New(DiscordWebHookModel model)
{ {
var uid = Guid.NewGuid();
using var conn = OpenConnection(_connectionString); using var conn = OpenConnection(_connectionString);
var query = "Insert Into DiscordWebHooks (ID, Url, Server, Channel, Enabled) Values (@id, @url, @server, @channel, @enabled);"; var query = "Insert Into DiscordWebHooks (ID, Url, Server, Channel, Enabled) Values (@id, @url, @server, @channel, @enabled);";
conn.Execute(query, new conn.Execute(query, new
{ {
id = model.ID, id = uid,
url = model.Url, url = model.Url,
server = model.Server, server = model.Server,
channel = model.Channel, channel = model.Channel,
enabled = model.Enabled enabled = model.Enabled
}); });
model.ID = uid;
return model;
} }
public DiscordWebHook GetByID(Guid ID) public DiscordWebHookModel GetByID(Guid id)
{ {
using var conn = OpenConnection(_connectionString); using var conn = OpenConnection(_connectionString);
var query = "Select * from DiscordWebHooks Where ID = @id LIMIT 1;"; var query = "Select * from DiscordWebHooks Where ID = @id LIMIT 1;";
return conn.Query<DiscordWebHook>(query, new return conn.Query<DiscordWebHookModel>(query, new
{ {
id = ID id = id
}).First(); }).First();
} }
public DiscordWebHook GetByUrl(string url) public DiscordWebHookModel GetByUrl(string url)
{ {
using var conn = OpenConnection(_connectionString); using var conn = OpenConnection(_connectionString);
var query = "Select * From DiscordWebHooks Where url = @url;"; var query = "Select * From DiscordWebHooks Where url = @url;";
return conn.QueryFirst<DiscordWebHook>(query, new return conn.QueryFirst<DiscordWebHookModel>(query, new
{ {
url = url url = url
}); });
} }
public List<DiscordWebHook> List(int limit = 25) public List<DiscordWebHookModel> List(int page, int count = 25)
{ {
using var conn = OpenConnection(_connectionString); using var conn = OpenConnection(_connectionString);
var query = "Select * From DiscordWebHooks @limit;"; var query = @"Select * From DiscordWebHooks
return conn.Query<DiscordWebHook>(query, new Offset @offset Fetch Next @count Rows Only;";
return conn.Query<DiscordWebHookModel>(query, new
{ {
limit = limit offset = page * count,
count = count
}).ToList(); }).ToList();
} }
public List<DiscordWebHook> ListByServer(string server, int limit = 25) public List<DiscordWebHookModel> ListByServer(string server, int limit = 25)
{ {
using var conn = OpenConnection(_connectionString); using var conn = OpenConnection(_connectionString);
var query = "Select * From DiscordWebHooks Where Server = @id Limit @limit;"; var query = "Select * From DiscordWebHooks Where Server = @id Limit @limit;";
return conn.Query<DiscordWebHook>(query, new return conn.Query<DiscordWebHookModel>(query, new
{ {
server = server, server = server,
limit = limit limit = limit
}).ToList(); }).ToList();
} }
public List<DiscordWebHook> ListByServerAndChannel(string server, string channel, int limit = 25) public List<DiscordWebHookModel> ListByServerAndChannel(string server, string channel, int limit = 25)
{ {
using var conn = OpenConnection(_connectionString); using var conn = OpenConnection(_connectionString);
var query = "SELECT * FROM DiscordWebHooks WHERE Server = @server and Channel = @channel Limit @limit;"; var query = "SELECT * FROM DiscordWebHooks WHERE Server = @server and Channel = @channel Limit @limit;";
return conn.Query<DiscordWebHook>(query, new return conn.Query<DiscordWebHookModel>(query, new
{ {
server = server, server = server,
channel = channel, channel = channel,
@ -99,23 +105,23 @@ public class WebhooksTable
}).ToList(); }).ToList();
} }
public int Disable(Guid ID) public int Disable(Guid id)
{ {
using var conn = OpenConnection(_connectionString); using var conn = OpenConnection(_connectionString);
var query = "Update discordwebhooks Set Enabled = FALSE where ID = @id;"; var query = "Update discordwebhooks Set Enabled = FALSE where ID = @id;";
return conn.Execute(query, new return conn.Execute(query, new
{ {
id = ID id = id
}); });
} }
public int Enable(Guid ID) public int Enable(Guid id)
{ {
using var conn = OpenConnection(_connectionString); using var conn = OpenConnection(_connectionString);
var query = "Update discordwebhooks Set Enabled = TRUE where ID = @id;"; var query = "Update discordwebhooks Set Enabled = TRUE where ID = @id;";
return conn.Execute(query, new return conn.Execute(query, new
{ {
id = ID id = id
}); });
} }
} }

View File

@ -107,13 +107,16 @@ public class SourcesTable : ISourcesRepository
return res.First(); return res.First();
} }
public List<SourceModel> List(int limit = 25) public List<SourceModel> List(int page = 0, int count = 25)
{ {
using var conn = OpenConnection(_connectionString); using var conn = OpenConnection(_connectionString);
var query = "Select * From Sources Limit @limit;"; var query = @"Select * From Sources
Offset @page
Fetch Next @count Rows Only;";
return conn.Query<SourceModel>(query, new return conn.Query<SourceModel>(query, new
{ {
limit = 25 page = page * count,
count = count
}).ToList(); }).ToList();
} }

View File

@ -1,12 +1,13 @@
using System.Data; using System.Data;
using Dapper; using Dapper;
using Microsoft.Extensions.Configuration; using Microsoft.Extensions.Configuration;
using Newsbot.Collector.Domain.Interfaces;
using Newsbot.Collector.Domain.Models; using Newsbot.Collector.Domain.Models;
using Npgsql; using Npgsql;
namespace Newsbot.Collector.Database.Repositories; namespace Newsbot.Collector.Database.Repositories;
public class SubscriptionsTable public class SubscriptionsTable : ISubscriptionRepository
{ {
private string _connectionString; private string _connectionString;
@ -32,65 +33,100 @@ public class SubscriptionsTable
return conn; return conn;
} }
public void New(SubscriptionModel model) public SubscriptionModel New(SubscriptionModel model)
{ {
model.ID = Guid.NewGuid();
using var conn = OpenConnection(_connectionString); using var conn = OpenConnection(_connectionString);
var query = "Insert Into subscriptions (ID, DiscordWebHookId, SourceId) Values (@id, @webhookid, @sourceid);"; var query = "Insert Into subscriptions (ID, DiscordWebHookId, SourceId) Values (@id, @webhookid, @sourceid);";
conn.Execute(query, new conn.Execute(query, new
{ {
id = Guid.NewGuid(), id = model.ID,
webhookid = model.DiscordWebHookID, webhookid = model.DiscordWebHookID,
sourceid = model.SourceID sourceid = model.SourceID
}); });
return model;
} }
public List<SubscriptionModel> List(int limit = 25) public List<SubscriptionModel> List(int page = 0, int count = 25)
{ {
using var conn = OpenConnection(_connectionString); using var conn = OpenConnection(_connectionString);
var query = "Select * From subscriptions Limit @limit;"; var query = @"Select * From subscriptions
Offset @page Fetch Next @count Rows Only;";
return conn.Query<SubscriptionModel>(query, new return conn.Query<SubscriptionModel>(query, new
{ {
limit = limit, page = page * count,
count = count
}).ToList(); }).ToList();
} }
// todo add paging // todo add paging
public List<SubscriptionModel> ListBySourceID(Guid sourceID) public List<SubscriptionModel> ListBySourceID(Guid id, int page = 0, int count = 25)
{ {
using var conn = OpenConnection(_connectionString); using var conn = OpenConnection(_connectionString);
var query = "Select * From subscriptions where sourceid = @sourceid"; var query = @"Select * From subscriptions
Offset @page Fetch Next @count Rows Only
Where sourceid = @sourceid";
return conn.Query<SubscriptionModel>(query, new return conn.Query<SubscriptionModel>(query, new
{ {
sourceid = sourceID page = page * count,
count = count,
sourceid = id
}).ToList(); }).ToList();
} }
public List<SubscriptionModel> GetByWebhookAndSource(Guid webhookId, Guid sourceId) public List<SubscriptionModel> ListByWebhook(Guid id, int page = 0, int count = 25)
{ {
using var conn = OpenConnection(_connectionString); using var conn = OpenConnection(_connectionString);
var query = "Select * From subscriptions Where discordwebhookid = @webhookid and sourceid = @sourceid;"; var query = @"Select * From subscriptions
Offset @page Fetch Next @count Rows Only
Where discordwebhookid = @webhookid";
return conn.Query<SubscriptionModel>(query, new return conn.Query<SubscriptionModel>(query, new
{
page = page * count,
count = count,
webhookid = id,
}).ToList();
}
public SubscriptionModel GetById(Guid id)
{
using var conn = OpenConnection(_connectionString);
var query = @"Select * From subscriptions Where id = @id;";
var res = conn.Query<SubscriptionModel>(query, new
{
id = id,
});
if (res.Count() == 0)
{
return new SubscriptionModel();
}
return res.First();
}
public SubscriptionModel GetByWebhookAndSource(Guid webhookId, Guid sourceId)
{
using var conn = OpenConnection(_connectionString);
var query = @"Select * From subscriptions
Where discordwebhookid = @webhookid
and sourceid = @sourceid;";
var res = conn.Query<SubscriptionModel>(query, new
{ {
webhookid = webhookId, webhookid = webhookId,
sourceid = sourceId, sourceid = sourceId,
}).ToList(); });
if (res.Count() == 0)
{
return new SubscriptionModel();
} }
return res.First();
public List<SubscriptionModel> ListByWebhook(Guid webhookId)
{
using var conn = OpenConnection(_connectionString);
var query = "Select * From subscriptions Where discordwebhookid = @webhookid";
return conn.Query<SubscriptionModel>(query, new
{
webhookid = webhookId,
}).ToList();
} }
public void Delete(Guid id) public void Delete(Guid id)
{ {
using var conn = OpenConnection(_connectionString); using var conn = OpenConnection(_connectionString);
var query = "Delete From subscriptions Where id = @id;"; var query = "Delete From subscriptions Where id = @id;";
conn.Execute(query, new { conn.Execute(query, new
{
id = id id = id
}); });
} }

View File

@ -0,0 +1,9 @@
namespace Newsbot.Collector.Domain.Consts;
/// <summary>
/// This class contains const entries to access keys within IConfiguration.
/// </summary>
public class ConfigConnectionStringConst
{
public const string Database = "ConnectionStrings:Database";
}

View File

@ -0,0 +1,9 @@
namespace Newsbot.Collector.Domain.Consts;
public class ConfigRedditConst
{
public const string IsEnabled = "Reddit:IsEnabled";
public const string PullHot = "Reddit:PullHot";
public const string PullNsfw = "Reddit:PullNsfw";
public const string PullTop = "Reddit:PullTop";
}

View File

@ -0,0 +1,11 @@
namespace Newsbot.Collector.Domain.Consts;
/// <summary>
/// This class contains const entries to access keys within IConfiguration.
/// </summary>
public class ConfigTwitchConst
{
public const string IsEnabled = "Twitch:IsEnabled";
public const string ClientID = "Twitch:ClientID";
public const string ClientSecret = "Twitch:ClientSecret";
}

View File

@ -0,0 +1,10 @@
namespace Newsbot.Collector.Domain.Consts;
/// <summary>
/// This class contains const entries to access keys within IConfiguration.
/// </summary>
public class ConfigYoutubeConst
{
public const string IsEnable = "Youtube:IsEnabled";
public const string Debug = "Youtube:Debug";
}

View File

@ -0,0 +1,41 @@
using Newsbot.Collector.Domain.Models;
namespace Newsbot.Collector.Domain.Dto;
public class ArticleDetailsDto
{
public Guid ID { get; set; }
public string[]? Tags { get; set; }
public string? Title { get; set; }
public string? Url { get; set; }
public DateTime PubDate { get; set; }
public string? Video { get; set; }
public int VideoHeight { get; set; }
public int VideoWidth { get; set; }
public string? Thumbnail { get; set; }
public string? Description { get; set; }
public string? AuthorName { get; set; }
public string? AuthorImage { get; set; }
public SourceDto? Source { get; set; }
public static ArticleDetailsDto Convert(ArticlesModel article, SourceModel source)
{
return new ArticleDetailsDto
{
ID = article.ID,
Tags = article.Tags.Split(','),
Title = article.Title,
Url = article.URL,
PubDate = article.PubDate,
Video = article.Video,
VideoHeight = article.VideoHeight,
VideoWidth = article.VideoWidth,
Thumbnail = article.Thumbnail,
Description = article.Description,
AuthorName = article.AuthorName,
AuthorImage = article.AuthorImage,
Source = SourceDto.Convert(source)
};
}
}

View File

@ -0,0 +1,39 @@
using Newsbot.Collector.Domain.Models;
namespace Newsbot.Collector.Domain.Dto;
public class ArticleDto
{
public Guid ID { get; set; }
public Guid SourceID { get; set; }
public string[]? Tags { get; set; }
public string? Title { get; set; }
public string? Url { get; set; }
public DateTime PubDate { get; set; }
public string? Video { get; set; }
public int VideoHeight { get; set; }
public int VideoWidth { get; set; }
public string? Thumbnail { get; set; }
public string? Description { get; set; }
public string? AuthorName { get; set; }
public string? AuthorImage { get; set; }
public static ArticleDto Convert(ArticlesModel article)
{
return new ArticleDto
{
ID = article.ID,
SourceID = article.SourceID,
Tags = article.Tags.Split(','),
Title = article.Title,
Url = article.URL,
PubDate = article.PubDate,
Video = article.Video,
VideoHeight = article.VideoHeight,
VideoWidth = article.VideoWidth,
Thumbnail = article.Thumbnail,
Description = article.Description,
AuthorName = article.AuthorName,
AuthorImage = article.AuthorImage,
};
}
}

View File

@ -0,0 +1,24 @@
using Newsbot.Collector.Domain.Models;
namespace Newsbot.Collector.Domain.Dto;
public class DiscordWebHookDto
{
public Guid ID { get; set; }
public string? Url { get; set; }
public string? Server { get; set; }
public string? Channel { get; set; }
public bool Enabled { get; set; }
public static DiscordWebHookDto Convert(DiscordWebHookModel model)
{
return new DiscordWebHookDto
{
ID = model.ID,
Url = model.Url,
Server = model.Server,
Channel = model.Channel,
Enabled = model.Enabled,
};
}
}

View File

@ -0,0 +1,34 @@
using System.Net.NetworkInformation;
using Newsbot.Collector.Domain.Models;
namespace Newsbot.Collector.Domain.Dto;
public class SourceDto
{
public Guid ID { get; set; }
public string? Site { get; set; }
public string? Name { get; set; }
public string? Source { get; set; }
public string? Type { get; set; }
public string? Value { get; set; }
public bool Enabled { get; set; }
public string? Url { get; set; }
public string[]? Tags { get; set; }
public bool Deleted { get; set; }
public static SourceDto Convert(SourceModel model) {
return new SourceDto
{
ID = model.ID,
Site = model.Site,
Name = model.Name,
Source = model.Source,
Type = model.Type,
Value = model.Value,
Enabled = model.Enabled,
Url = model.Url,
Tags = model.Tags.Split(','),
Deleted = model.Deleted
};
}
}

View File

@ -0,0 +1,20 @@
using Newsbot.Collector.Domain.Models;
namespace Newsbot.Collector.Domain.Dto;
public class SubscriptionDetailsDto
{
public Guid ID { get; set; }
public SourceDto? Source { get; set; }
public DiscordWebHookDto? DiscordWebHook { get; set; }
public static SubscriptionDetailsDto Convert(SubscriptionModel subscription, SourceModel source, DiscordWebHookModel discord)
{
return new SubscriptionDetailsDto
{
ID = subscription.ID,
Source = SourceDto.Convert(source),
DiscordWebHook = DiscordWebHookDto.Convert(discord)
};
}
}

View File

@ -0,0 +1,20 @@
using Newsbot.Collector.Domain.Models;
namespace Newsbot.Collector.Domain.Dto;
public class SubscriptionDto
{
public Guid ID { get; set; }
public Guid SourceID { get; set; }
public Guid DiscordWebHookID { get; set; }
public static SubscriptionDto Convert(SubscriptionModel model)
{
return new SubscriptionDto
{
ID = model.ID,
SourceID = model.SourceID,
DiscordWebHookID = model.DiscordWebHookID
};
}
}

View File

@ -0,0 +1,18 @@
namespace Newsbot.Collector.Domain.Exceptions;
public class MissingHeaderValueException : Exception
{
public MissingHeaderValueException()
{
}
public MissingHeaderValueException(string message)
: base(message)
{
}
public MissingHeaderValueException(string message, Exception inner)
: base(message, inner)
{
}
}

View File

@ -4,7 +4,8 @@ namespace Newsbot.Collector.Domain.Interfaces;
public interface IArticlesRepository : ITableRepository public interface IArticlesRepository : ITableRepository
{ {
List<ArticlesModel>List(int age, int count); List<ArticlesModel>List(int page, int count);
List<ArticlesModel>ListBySourceId(Guid id, int page = 0, int count = 25);
ArticlesModel GetById(Guid ID); ArticlesModel GetById(Guid ID);
ArticlesModel GetByUrl(string url); ArticlesModel GetByUrl(string url);
ArticlesModel New(ArticlesModel model); ArticlesModel New(ArticlesModel model);

View File

@ -0,0 +1,19 @@
using Microsoft.VisualBasic;
using Newsbot.Collector.Domain.Models;
namespace Newsbot.Collector.Domain.Interfaces;
public interface IDiscordWebHooksRepository
{
DiscordWebHookModel New(DiscordWebHookModel model);
DiscordWebHookModel GetByID(Guid id);
DiscordWebHookModel GetByUrl(string url);
List<DiscordWebHookModel> List(int page, int count = 25);
List<DiscordWebHookModel> ListByServer(string server, int limit);
List<DiscordWebHookModel> ListByServerAndChannel(string server, string channel, int limit);
int Disable(Guid id);
int Enable(Guid id);
}

View File

@ -10,7 +10,7 @@ public interface ISourcesRepository
public SourceModel GetByID(string ID); public SourceModel GetByID(string ID);
public SourceModel GetByName(string name); public SourceModel GetByName(string name);
public SourceModel GetByNameAndSource(string name, string source); public SourceModel GetByNameAndSource(string name, string source);
public List<SourceModel> List(int limit); public List<SourceModel> List(int page, int count);
public List<SourceModel> ListBySource(string source, int limit); public List<SourceModel> ListBySource(string source, int limit);
public List<SourceModel> ListByType(string type, int limit = 25); public List<SourceModel> ListByType(string type, int limit = 25);
public int Disable(Guid ID); public int Disable(Guid ID);

View File

@ -0,0 +1,17 @@
using Newsbot.Collector.Domain.Models;
namespace Newsbot.Collector.Domain.Interfaces;
public interface ISubscriptionRepository
{
SubscriptionModel New(SubscriptionModel model);
List<SubscriptionModel> List(int page = 0, int count = 25);
List<SubscriptionModel> ListBySourceID(Guid id, int page = 0, int count = 25);
List<SubscriptionModel> ListByWebhook(Guid id, int page = 0, int count = 25);
SubscriptionModel GetById(Guid id);
SubscriptionModel GetByWebhookAndSource(Guid webhookId, Guid sourceId);
void Delete(Guid id);
}

View File

@ -14,3 +14,8 @@ public class RedditConfigModel
public bool PullNsfw { get; set; } public bool PullNsfw { get; set; }
public bool PullTop { get; set; } public bool PullTop { get; set; }
} }
public class ConnectionStrings
{
public string Database { get; set; } = "";
}

View File

@ -31,7 +31,7 @@ public class DiscordQueueModel
public Guid ArticleID { get; set; } public Guid ArticleID { get; set; }
} }
public class DiscordWebHook public class DiscordWebHookModel
{ {
public Guid ID { get; set; } public Guid ID { get; set; }
public string Url { get; set; } = ""; public string Url { get; set; } = "";

View File

@ -1,129 +0,0 @@
using System.Data;
using System.Runtime.Serialization;
using System.Xml;
using HtmlAgilityPack;
namespace Newsbot.Collector.Services;
public class HtmlData
{
public HtmlHeaderData Header { get; set; } = new HtmlHeaderData();
}
public class HtmlHeaderData
{
public HtmlMetaData Meta { get; set; } = new HtmlMetaData();
}
public class HtmlMetaData
{
public string Title { get; set; } = "";
public string Description { get; set; } = "";
public string Image { get; set; } = "";
public string Url { get; set; } = "";
public string PageType { get; set; } = "";
//public string Color { get; set; }
}
public class HtmlPageReader
{
public HtmlData Data { get; set; }
private const string XPathMetaTag = "//head/meta";
private string _siteContent;
public HtmlPageReader(string pageUrl)
{
_siteContent = ReadSiteContent(pageUrl);
var tags = CollectMetaTags();
Data = new HtmlData();
Data.Header.Meta.Title = GetMetaTitle();
Data.Header.Meta.Description = GetDescription();
Data.Header.Meta.Image = GetImage();
Data.Header.Meta.Url = GetUrl();
Data.Header.Meta.PageType = GetPageType();
}
private string ReadSiteContent(string url)
{
using var client = new HttpClient();
var html = client.GetStringAsync(url);
html.Wait();
var content = html.Result.ToString();
return content;
}
private List<HtmlNode> CollectMetaTags()
{
var htmlDoc = new HtmlDocument();
htmlDoc.LoadHtml(_siteContent);
var tags = htmlDoc.DocumentNode.SelectNodes(XPathMetaTag).ToList();
return tags;
}
public string GetTagValue(string Tag)
{
var tags = CollectMetaTags();
foreach (var meta in tags)
{
//Console.WriteLine($"Name={meta.Attributes[0].Name} & Value={meta.Attributes[0].Value}");
if (meta.Attributes[0].Value.Contains(Tag) == false)
{
continue;
}
return meta.Attributes[1].Value;
}
return "";
}
private string FindFirstResult(string[] tags)
{
foreach (var tag in tags)
{
var res = GetTagValue(tag);
if (res == "")
{
continue;
}
return res;
}
return "";
}
public string GetMetaTitle()
{
string[] tags = new string[] { "og:title", "twitter:title", "title" };
return FindFirstResult(tags);
}
public string GetDescription()
{
string[] tags = new string[] { "description", "og:description" };
return FindFirstResult(tags);
}
public string GetImage()
{
string[] tags = new string[] { "image", "og:image", "twitter:image" };
return FindFirstResult(tags);
}
public string GetUrl()
{
string[] tags = new string[] { "url", "og:url", "twitter:url" };
return FindFirstResult(tags);
}
public string GetPageType()
{
string[] tags = new string[] { "og:type", "type" };
return FindFirstResult(tags);
}
}

View File

@ -0,0 +1,186 @@
using HtmlAgilityPack;
using Newsbot.Collector.Domain.Exceptions;
namespace Newsbot.Collector.Services.HtmlParser;
public class HeadParserClient
{
private const string XPathMetaTag = "//head/meta";
private const string XPathLinkTag = "//head/link";
public HeadParserModel Data { get; set; }
private string _htmlContent;
public HeadParserClient(string htmlContent)
{
_htmlContent = htmlContent;
Data = new HeadParserModel();
}
public void Parse()
{
Data.Title = GetMetaTitle();
Data.Description = GetMetaDescription();
Data.Image = GetMetaImage();
Data.Url = GetMetaUrl();
Data.PageType = GetMetaPageType();
Data.ColorTheme = GetMetaColorTheme();
Data.FeedUri = GetSiteFeed();
}
private List<HtmlNode> CollectMetaTags()
{
var htmlDoc = new HtmlDocument();
htmlDoc.LoadHtml(_htmlContent);
var tags = htmlDoc.DocumentNode.SelectNodes(XPathMetaTag).ToList();
return tags;
}
private List<HtmlNode> CollectLinkTags()
{
var htmlDoc = new HtmlDocument();
htmlDoc.LoadHtml(_htmlContent);
var links = htmlDoc.DocumentNode.SelectNodes(XPathLinkTag).ToList();
return links;
}
private string GetTagValue(string Tag, List<HtmlNode> html)
{
foreach (var meta in html)
{
//Console.WriteLine($"Name={meta.Attributes[0].Name} & Value={meta.Attributes[0].Value}");
if (meta.Attributes[0].Value.Contains(Tag) == false)
{
continue;
}
return meta.Attributes[1].Value;
}
return "";
}
private string FindFirstResult(string[] tags, List<HtmlNode> htmlTags)
{
foreach (var tag in tags)
{
var res = GetTagValue(tag, htmlTags);
if (res == "")
{
continue;
}
return res;
}
return "";
}
public string GetMetaTitle()
{
var htmlTags = CollectMetaTags();
string[] tags = new string[] { "twitter:title", "og:title", "title" };
return FindFirstResult(tags, htmlTags);
}
public string GetMetaDescription()
{
var htmlTags = CollectMetaTags();
string[] tags = new string[] { "twitter:description", "og:description", "description" };
return FindFirstResult(tags, htmlTags);
}
public string GetMetaImage()
{
var htmlTags = CollectMetaTags();
string[] tags = new string[] { "twitter:image", "og:image", "image" };
return FindFirstResult(tags, htmlTags);
}
public string GetMetaUrl()
{
var htmlTags = CollectMetaTags();
string[] tags = new string[] { "twitter:url", "og:url", "url" };
return FindFirstResult(tags, htmlTags);
}
public string GetMetaPageType()
{
var htmlTags = CollectMetaTags();
string[] tags = new string[] { "og:type", "type" };
return FindFirstResult(tags, htmlTags);
}
public string GetMetaColorTheme()
{
var htmlTags = CollectMetaTags();
string[] tags = new string[] { "theme-color" };
return FindFirstResult(tags, htmlTags);
}
/// <summary>
/// This will parse the headers looking for known keys that will contain a RSS feed link.
/// If the feed is not found, this will throw an exception (MissingHeaderValueException).
/// </summary>
/// <returns></returns>
public string GetSiteFeed()
{
var htmlTags = CollectLinkTags();
var tags = new string[] { "alternate" };
try
{
var attr = FindFirstAttribute(tags, htmlTags);
foreach (var item in attr)
{
if (item.Name != "href")
{
continue;
}
var uri = item.Value;
if (uri.StartsWith("//"))
{
uri = uri.Replace("//", "https://");
}
return uri;
}
return "";
}
catch
{
// not found
return "";
}
}
private HtmlAttributeCollection FindFirstAttribute(string[] tags, List<HtmlNode> htmlTags)
{
foreach (var tag in tags)
{
try
{
var res = GetValidAttribute(tag, htmlTags);
return res;
}
catch (MissingHeaderValueException)
{
// Nothing was found in the given tag but we will keep looking till we finish all the entries.
}
}
throw new MissingHeaderValueException("Unable to find the requested value");
}
private HtmlAttributeCollection GetValidAttribute(string Tag, List<HtmlNode> html)
{
foreach (var meta in html)
{
if (meta.Attributes[0].Value.Contains(Tag) == false)
{
continue;
}
return meta.Attributes;
}
throw new MissingHeaderValueException("Site does not expose requested tag.");
}
}

View File

@ -0,0 +1,71 @@
using HtmlAgilityPack;
using Newsbot.Collector.Domain.Exceptions;
namespace Newsbot.Collector.Services.HtmlParser;
public class HtmlPageReader
{
public HtmlData Data { get; set; }
private HeadParserClient _headClient;
private string _siteContent;
public HtmlPageReader(string pageUrl)
{
_siteContent = ReadSiteContent(pageUrl);
_headClient = new HeadParserClient(_siteContent);
Data = new HtmlData();
}
public void Parse()
{
_headClient.Parse();
Data.Header = _headClient.Data;
}
private string ReadSiteContent(string url)
{
using var client = new HttpClient();
var html = client.GetStringAsync(url);
html.Wait();
var content = html.Result.ToString();
return content;
}
public string GetSiteContent()
{
return _siteContent;
}
public List<HtmlNode> CollectPostContent()
{
var htmlDoc = new HtmlDocument();
htmlDoc.LoadHtml(_siteContent);
var links = htmlDoc.DocumentNode.SelectNodes("//div[contains(@class, 'article-text')]").ToList();
if (links.Count == 0)
{
throw new Exception("Unable to parse body. Tag is unkown.");
}
if (links.Count >= 2)
{
throw new Exception("Too many results back for the body");
}
var content = new List<string>();
foreach (var item in links[0].ChildNodes)
{
if (item.Name == "p")
{
content.Add(item.InnerText);
}
}
return links;
}
}

View File

@ -0,0 +1,18 @@
namespace Newsbot.Collector.Services.HtmlParser;
public class HeadParserModel
{
public string Title { get; set; } = "";
public string Description { get; set; } = "";
public string Image { get; set; } = "";
public string Url { get; set; } = "";
public string PageType { get; set; } = "";
public string ColorTheme { get; set; } = "";
public string? FeedUri { get; set; }
}
public class HtmlData
{
public HeadParserModel Header { get; set; } = new HeadParserModel();
}

View File

@ -0,0 +1,42 @@
using Newsbot.Collector.Database.Repositories;
using Newsbot.Collector.Domain.Interfaces;
namespace Newsbot.Collector.Services.Jobs;
public class GithubWatcherJobOptions
{
public string ConnectionString { get; set; } = "";
public bool FeaturePullReleases { get; set; } = false;
public bool FeaturePullCommits { get; set; } = false;
}
public class GithubWatcherJob
{
private IArticlesRepository _articles;
private IDiscordQueueRepository _queue;
private ISourcesRepository _source;
public GithubWatcherJob()
{
_articles = new ArticlesTable("");
_queue = new DiscordQueueTable("");
_source = new SourcesTable("");
}
private void Init(GithubWatcherJobOptions options)
{
_articles = new ArticlesTable(options.ConnectionString);
_queue = new DiscordQueueTable(options.ConnectionString);
_source = new SourcesTable(options.ConnectionString);
}
public void InitAndExecute(GithubWatcherJobOptions options)
{
Init(options);
// query sources for things to pull
// query */release.atom
// query */commits.atom
}
}

View File

@ -1,5 +1,4 @@
using Serilog;
using Microsoft.Extensions.Configuration;
namespace Newsbot.Collector.Services.Jobs; namespace Newsbot.Collector.Services.Jobs;
@ -10,21 +9,22 @@ public class HelloWorldJobOptions
public class HelloWorldJob public class HelloWorldJob
{ {
private HelloWorldJobOptions _options; private string _message;
public HelloWorldJob(HelloWorldJobOptions options) public HelloWorldJob()
{ {
_options = options; _message = "";
} }
public void InitAndExecute(HelloWorldJobOptions options) public void InitAndExecute(HelloWorldJobOptions options)
{ {
_options = options; _message = options.Message;
Execute(); Execute();
} }
private void Execute() private void Execute()
{ {
Console.WriteLine(_options.Message); Log.Information("Starting 'HelloWorldJob'");
Log.Information(_message);
} }
} }

View File

@ -1,4 +1,3 @@
using System.Runtime.InteropServices;
using System.ServiceModel.Syndication; using System.ServiceModel.Syndication;
using System.Xml; using System.Xml;
using Microsoft.Extensions.Configuration; using Microsoft.Extensions.Configuration;
@ -6,6 +5,8 @@ using Newsbot.Collector.Database.Repositories;
using Newsbot.Collector.Domain.Consts; using Newsbot.Collector.Domain.Consts;
using Newsbot.Collector.Domain.Interfaces; using Newsbot.Collector.Domain.Interfaces;
using Newsbot.Collector.Domain.Models; using Newsbot.Collector.Domain.Models;
using Newsbot.Collector.Services.HtmlParser;
using Serilog;
namespace Newsbot.Collector.Services.Jobs; namespace Newsbot.Collector.Services.Jobs;
@ -31,24 +32,29 @@ public class RssWatcherJob : IHangfireJob
public void InitAndExecute(RssWatcherJobOptions options) public void InitAndExecute(RssWatcherJobOptions options)
{ {
Console.WriteLine("Job was triggered"); Log.Information("RssWatcherJob - Job was triggered");
Console.WriteLine("Setting up the job"); Log.Information("RssWatcherJob - Setting up the job");
Init(options.ConnectionString); Init(options.ConnectionString);
var articles = new List<ArticlesModel>(); var articles = new List<ArticlesModel>();
Console.WriteLine("Requesting sources"); Log.Information("RssWatcherJob - Requesting sources");
var sources = _source.ListByType(SourceTypes.Rss); var sources = _source.ListByType(SourceTypes.Rss);
Console.WriteLine($"Got {sources.Count()} back"); Log.Information($"RssWatcherJob - Got {sources.Count()} back");
foreach (var source in sources) foreach (var source in sources)
{ {
Console.WriteLine("Starting to request feed to be processed"); Log.Information($"RssWatcherJob - Starting to proces '{source.Name}'");
var results = Collect(source.Url); Log.Information("RssWatcherJob - Starting to request feed to be processed");
var results = Collect(source.Url, source.ID);
Log.Information($"RssWatcherJob - Collected {results.Count()} posts");
articles.AddRange(results); articles.AddRange(results);
} }
Log.Information("RssWatcherJob - Sending posts over to the database");
UpdateDatabase(articles); UpdateDatabase(articles);
Log.Information("RssWatcherJob - Done!");
} }
public void InitAndExecute(IConfiguration config) public void InitAndExecute(IConfiguration config)
@ -66,7 +72,7 @@ public class RssWatcherJob : IHangfireJob
var sources = _source.ListByType(SourceTypes.Rss); var sources = _source.ListByType(SourceTypes.Rss);
foreach (var source in sources) foreach (var source in sources)
{ {
var results = Collect(source.Url); var results = Collect(source.Url, source.ID);
articles.AddRange(results); articles.AddRange(results);
} }
@ -81,7 +87,7 @@ public class RssWatcherJob : IHangfireJob
_source = new SourcesTable(connectionString); _source = new SourcesTable(connectionString);
} }
public List<ArticlesModel> Collect(string url, int sleep = 3000) public List<ArticlesModel> Collect(string url, Guid SourceID, int sleep = 3000)
{ {
var CollectedPosts = new List<ArticlesModel>(); var CollectedPosts = new List<ArticlesModel>();
@ -100,6 +106,7 @@ public class RssWatcherJob : IHangfireJob
} }
var meta = new HtmlPageReader(articleUrl); var meta = new HtmlPageReader(articleUrl);
meta.Parse();
var article = new ArticlesModel var article = new ArticlesModel
{ {
@ -107,8 +114,9 @@ public class RssWatcherJob : IHangfireJob
Tags = FetchTags(post), Tags = FetchTags(post),
URL = articleUrl, URL = articleUrl,
PubDate = post.PublishDate.DateTime, PubDate = post.PublishDate.DateTime,
Thumbnail = meta.Data.Header.Meta.Image, Thumbnail = meta.Data.Header.Image,
Description = meta.Data.Header.Meta.Description, Description = meta.Data.Header.Description,
SourceID = SourceID
}; };
CollectedPosts.Add(article); CollectedPosts.Add(article);
@ -123,7 +131,7 @@ public class RssWatcherJob : IHangfireJob
{ {
foreach (var item in items) foreach (var item in items)
{ {
if (IsThisUrlKnown(item.URL) == false) if (IsThisUrlKnown(item.URL) == true)
{ {
continue; continue;
} }

View File

@ -7,6 +7,7 @@
<ItemGroup> <ItemGroup>
<PackageReference Include="HtmlAgilityPack" Version="1.11.46" /> <PackageReference Include="HtmlAgilityPack" Version="1.11.46" />
<PackageReference Include="Serilog" Version="2.12.0" />
<PackageReference Include="System.ServiceModel.Syndication" Version="7.0.0" /> <PackageReference Include="System.ServiceModel.Syndication" Version="7.0.0" />
</ItemGroup> </ItemGroup>

View File

@ -27,7 +27,7 @@ public class RssWatcherJobTest
{ {
var url = "https://www.engadget.com/rss.xml"; var url = "https://www.engadget.com/rss.xml";
var client = new RssWatcherJob(); var client = new RssWatcherJob();
var items = client.Collect(url); var items = client.Collect(url, Guid.NewGuid(), 0);
} }
[Fact] [Fact]
@ -36,6 +36,28 @@ public class RssWatcherJobTest
var url = "https://www.engadget.com/rss.xml"; var url = "https://www.engadget.com/rss.xml";
var client = new RssWatcherJob(); var client = new RssWatcherJob();
client.Init(ConnectionString()); client.Init(ConnectionString());
client.Collect(url, 0); var items = client.Collect(url, Guid.NewGuid(), 0);
client.UpdateDatabase(items);
}
[Fact]
public void CanReadHtmlDrivenFeedPage()
{
var url = "https://www.howtogeek.com/feed/";
var client = new RssWatcherJob();
client.Init(ConnectionString());
var items = client.Collect(url, Guid.NewGuid(), 0);
Console.WriteLine('k');
}
[Fact]
public void InitAndExecuteTest()
{
var client = new RssWatcherJob();
client.InitAndExecute(new RssWatcherJobOptions
{
ConnectionString = ConnectionString()
});
} }
} }

View File

@ -0,0 +1,38 @@
using Newsbot.Collector.Services.HtmlParser;
namespace Newsbot.Collector.Tests.Services;
public class HtmlPageReaderTests
{
[Fact]
public void BaseSiteContainsRssFeed()
{
var client = new HtmlPageReader("https://dotnettutorials.net/");
var headClient = new HeadParserClient(client.GetSiteContent());
var feedUri = headClient.GetSiteFeed();
if (feedUri == "")
{
Assert.Fail("Failed to find the RSS feed");
}
}
[Fact]
public void SiteDoesNotReturnRssFeed()
{
var client = new HtmlPageReader("https://www.engadget.com/");
var headClient = new HeadParserClient(client.GetSiteContent());
var feedUri = headClient.GetSiteFeed();
if (feedUri == "")
{
Assert.Fail("");
}
}
[Fact]
public void CanFindBodyOfTheArticle()
{
var client = new HtmlPageReader("https://www.engadget.com/apple-is-convinced-my-dog-is-stalking-me-143100932.html");
var c = client.CollectPostContent();
Console.WriteLine(c);
}
}

25
makefile Normal file
View File

@ -0,0 +1,25 @@
.PHONY: help
help: ## Shows this help command
@egrep -h '\s##\s' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'
build: ## builds the application with the current go runtime
dotnet restore
dotnet build
pub: ## Generate artifacts
dotnet restore
dotnet publish -o out
rm ./out/appsettings.json
docker-build: ## Generates the docker image
docker build -t "newsbot.collector" .
docker image ls | grep newsbot.collector
docker-run: ## Runs the docker compose
docker compose up
migrate-dev: ## Apply sql migrations to dev db
goose -dir "./Newsbot.Collector.Database/Migrations" postgres "host=localhost user=postgres password=postgres dbname=postgres sslmode=disable" up
migrate-dev-down: ## revert sql migrations to dev db
goose -dir "./Newsbot.Collector.Database/Migrations" postgres "host=localhost user=postgres password=postgres dbname=postgres sslmode=disable" down