Features/adding youtube (#13)

* Found the meta tags on youtube... in the body and updated the client to pull them out.

* Updated namespace on test

* I think formatting cleaned this up

* Seed migrations have been cleaned up to get my configs out and moving them to a script.

* Updates to the ISourcesRepository.cs to allow for new calls to the db.

* formatter

* Db models updated. Icon now can track sourceID and source can have a youtube id.

* Updated api logger to ignore otel if no connection string given.

* updated docker init so I can run migrations from the image

* seed was updated to reflect the new api changes

* Updated the SourcesController.cs to grab icon data.

* Added reddit const values

* Minor changes to HtmlPageReader.cs

* Jobs are now pulling in the config section to bundle values.

* Removed youtube api, not needed anymore.

* test updates
This commit is contained in:
James Tombleson 2023-03-31 22:49:39 -07:00 committed by GitHub
parent ac6bdaa184
commit 9be985da0a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 471 additions and 351 deletions

View File

@ -31,4 +31,4 @@ COPY --from=publish /app/build /app
COPY --from=build ./app/Newsbot.Collector.Database/Migrations/ /app/migrations COPY --from=build ./app/Newsbot.Collector.Database/Migrations/ /app/migrations
COPY --from=goose /go/bin/goose /app COPY --from=goose /go/bin/goose /app
ENTRYPOINT [ "dotnet", "Newsbot.Collector.Api.dll" ] CMD [ "dotnet", "Newsbot.Collector.Api.dll" ]

View File

@ -5,6 +5,7 @@ using Newsbot.Collector.Domain.Consts;
using Newsbot.Collector.Domain.Dto; using Newsbot.Collector.Domain.Dto;
using Newsbot.Collector.Domain.Interfaces; using Newsbot.Collector.Domain.Interfaces;
using Newsbot.Collector.Domain.Models; using Newsbot.Collector.Domain.Models;
using Newsbot.Collector.Services.HtmlParser;
namespace Newsbot.Collector.Api.Controllers; namespace Newsbot.Collector.Api.Controllers;
@ -12,6 +13,7 @@ namespace Newsbot.Collector.Api.Controllers;
[Route("api/sources")] [Route("api/sources")]
public class SourcesController : ControllerBase public class SourcesController : ControllerBase
{ {
private readonly IIconsRepository _icons;
private readonly ILogger<SourcesController> _logger; private readonly ILogger<SourcesController> _logger;
//private readonly ConnectionStrings _settings; //private readonly ConnectionStrings _settings;
@ -22,6 +24,7 @@ public class SourcesController : ControllerBase
_logger = logger; _logger = logger;
//_settings = settings.Value; //_settings = settings.Value;
_sources = new SourcesTable(settings.Value.Database); _sources = new SourcesTable(settings.Value.Database);
_icons = new IconsTable(settings.Value.Database);
} }
[HttpGet(Name = "GetSources")] [HttpGet(Name = "GetSources")]
@ -43,11 +46,19 @@ public class SourcesController : ControllerBase
} }
[HttpPost("new/reddit")] [HttpPost("new/reddit")]
public SourceDto NewReddit(string name, string url) public SourceDto NewReddit(string name)
{ {
var res = _sources.GetByNameAndType(name, SourceTypes.Reddit); var res = _sources.GetByNameAndType(name, SourceTypes.Reddit);
if (res.ID != Guid.Empty) return SourceDto.Convert(res); if (res.ID != Guid.Empty) return SourceDto.Convert(res);
var uri = new Uri($"https://reddit.com/r/{name}");
var pageReader = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = uri.ToString()
});
pageReader.Parse();
var item = _sources.New(new SourceModel var item = _sources.New(new SourceModel
{ {
Site = SourceTypes.Reddit, Site = SourceTypes.Reddit,
@ -55,9 +66,18 @@ public class SourcesController : ControllerBase
Type = SourceTypes.Reddit, Type = SourceTypes.Reddit,
Source = "feed", Source = "feed",
Enabled = true, Enabled = true,
Url = url, Url = uri.ToString(),
Tags = $"{SourceTypes.Reddit}, {name}" Tags = $"{SourceTypes.Reddit},{name}"
}); });
// Not all subreddits have an Icon, so we only want to add a record when it has one.
if (pageReader.Data.Header.Image != "")
_icons.New(new IconModel
{
Id = Guid.NewGuid(),
FileName = pageReader.Data.Header.Image,
SourceId = item.ID
});
return SourceDto.Convert(item); return SourceDto.Convert(item);
} }
@ -75,27 +95,41 @@ public class SourcesController : ControllerBase
Source = "feed", Source = "feed",
Enabled = true, Enabled = true,
Url = url, Url = url,
Tags = $"{SourceTypes.Rss}, {name}" Tags = $"{SourceTypes.Rss},{name}"
}; };
var item = _sources.New(m); var item = _sources.New(m);
return SourceDto.Convert(item); return SourceDto.Convert(item);
} }
[HttpPost("new/youtube")] [HttpPost("new/youtube")]
public SourceDto NewYoutube(string name, string url) public SourceDto NewYoutube(string url)
{ {
var res = _sources.GetByNameAndType(name, SourceTypes.YouTube); var res = _sources.GetByUrl(url);
if (res.ID != Guid.Empty) return SourceDto.Convert(res); if (res.ID != Guid.Empty) return SourceDto.Convert(res);
var htmlClient = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = url
});
htmlClient.Parse();
var item = _sources.New(new SourceModel var item = _sources.New(new SourceModel
{ {
Site = SourceTypes.YouTube, Site = SourceTypes.YouTube,
Type = SourceTypes.YouTube, Type = SourceTypes.YouTube,
Name = name, Name = htmlClient.Data.Header.Title,
Source = "feed", Source = "feed",
Url = url, Url = "feed",
Enabled = true, Enabled = true,
Tags = $"{SourceTypes.YouTube}, {name}" Tags = $"{SourceTypes.YouTube},{htmlClient.Data.Header.Title}",
YoutubeId = htmlClient.Data.Header.YoutubeChannelID ?? ""
});
_icons.New(new IconModel
{
Id = Guid.NewGuid(),
FileName = htmlClient.Data.Header.Image,
SourceId = item.ID
}); });
return SourceDto.Convert(item); return SourceDto.Convert(item);
@ -115,11 +149,48 @@ public class SourcesController : ControllerBase
Url = $"https://twitch.tv/{name}", Url = $"https://twitch.tv/{name}",
Source = "api", Source = "api",
Enabled = true, Enabled = true,
Tags = $"{SourceTypes.Twitch}, {name}" Tags = $"{SourceTypes.Twitch},{name}"
}); });
return SourceDto.Convert(item); return SourceDto.Convert(item);
} }
[HttpPost("new/github")]
public SourceDto NewGithub(string url)
{
if (!url.Contains("github.com")) return new SourceDto();
var res = _sources.GetByUrl(url);
if (res.ID != Guid.Empty) return SourceDto.Convert(res);
var slice = url.Split('/');
var pageReader = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = url
});
pageReader.Parse();
var item = _sources.New(new SourceModel
{
Site = SourceTypes.GitHub,
Type = SourceTypes.GitHub,
Name = $"{slice[3]}/{slice[4]}",
Url = url,
Source = "feed",
Enabled = true,
Tags = $"{SourceTypes.GitHub}, {slice[3]}, {slice[4]}"
});
_icons.New(new IconModel
{
Id = Guid.NewGuid(),
FileName = pageReader.Data.Header.Image,
SourceId = item.ID
});
return SourceDto.Convert(item);
}
[HttpGet("{id}")] [HttpGet("{id}")]
public SourceDto GetById(Guid id) public SourceDto GetById(Guid id)
{ {

View File

@ -2,10 +2,12 @@ using Hangfire;
using Hangfire.MemoryStorage; using Hangfire.MemoryStorage;
using HealthChecks.UI.Client; using HealthChecks.UI.Client;
using Microsoft.AspNetCore.Diagnostics.HealthChecks; using Microsoft.AspNetCore.Diagnostics.HealthChecks;
using Newsbot.Collector.Api;
using Newsbot.Collector.Domain.Consts; using Newsbot.Collector.Domain.Consts;
using Newsbot.Collector.Domain.Models; using Newsbot.Collector.Domain.Models;
using Newsbot.Collector.Services.Jobs; using Newsbot.Collector.Domain.Models.Config;
using Serilog; using Serilog;
using ILogger = Serilog.ILogger; using ILogger = Serilog.ILogger;
var builder = WebApplication.CreateBuilder(args); var builder = WebApplication.CreateBuilder(args);
@ -17,15 +19,7 @@ builder.Host.UseSerilog();
var config = GetConfiguration(); var config = GetConfiguration();
builder.Configuration.AddConfiguration(config); builder.Configuration.AddConfiguration(config);
Log.Logger = new LoggerConfiguration() Log.Logger = GetLogger(config);
.WriteTo.Console()
.WriteTo.OpenTelemetry(
config.GetValue<string>(ConfigConnectionStringConst.OpenTelemetry) ?? "",
resourceAttributes: new Dictionary<string, object>
{
{ "service.name", "newsbot-collector-api" }
})
.CreateLogger();
Log.Information("Starting up"); Log.Information("Starting up");
// Configure Hangfire // Configure Hangfire
@ -43,6 +37,11 @@ builder.Services.AddSwaggerGen();
builder.Services.Configure<ConnectionStrings>(config.GetSection("ConnectionStrings")); builder.Services.Configure<ConnectionStrings>(config.GetSection("ConnectionStrings"));
builder.Services.Configure<ConfigSectionConnectionStrings>(config.GetSection(ConfigSectionsConst.ConnectionStrings));
builder.Services.Configure<ConfigSectionRssModel>(config.GetSection(ConfigSectionsConst.Rss));
builder.Services.Configure<ConfigSectionYoutubeModel>(config.GetSection(ConfigSectionsConst.Youtube));
//builder.Services.Configure<
var app = builder.Build(); var app = builder.Build();
// Configure the HTTP request pipeline. // Configure the HTTP request pipeline.
@ -55,7 +54,7 @@ if (config.GetValue<bool>("EnableSwagger"))
app.UseHttpsRedirection(); app.UseHttpsRedirection();
app.UseHangfireDashboard(); app.UseHangfireDashboard();
SetupRecurringJobs(config, Log.Logger); BackgroundJobs.SetupRecurringJobs(config);
app.UseAuthorization(); app.UseAuthorization();
@ -77,21 +76,22 @@ static IConfiguration GetConfiguration()
.Build(); .Build();
} }
static void SetupRecurringJobs(IConfiguration configuration, ILogger logger) static ILogger GetLogger(IConfiguration configuration)
{ {
//RecurringJob.AddOrUpdate<HelloWorldJob>("Example", x => x.InitAndExecute(new HelloWorldJobOptions var otel = configuration.GetValue<string>(ConfigConnectionStringConst.OpenTelemetry) ?? "";
//{
// Message = "Hello from the background!"
//}), "0/1 * * * *");
RecurringJob.AddOrUpdate<RssWatcherJob>("RSS", x => x.InitAndExecute(new RssWatcherJobOptions if (otel == "")
{ return Log.Logger = new LoggerConfiguration()
ConnectionString = configuration.GetSection(ConfigConnectionStringConst.Database).Value ?? "" .WriteTo.Console()
}), "15 0-23 * * *"); .CreateLogger();
RecurringJob.AddOrUpdate<DiscordNotificationJob>("Discord Alerts", x => return Log.Logger = new LoggerConfiguration()
x.InitAndExecute(new DiscordNotificationJobOptions .WriteTo.Console()
{ .WriteTo.OpenTelemetry(
DatabaseConnectionString = configuration.GetSection(ConfigConnectionStringConst.Database).Value ?? "" otel,
}), "5/10 * * * *"); resourceAttributes: new Dictionary<string, object>
{
{ "service.name", "newsbot-collector-api" }
})
.CreateLogger();
} }

View File

@ -6,45 +6,27 @@ SELECT 'up SQL query';
CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
-- Final Fantasy XIV Entries -- Final Fantasy XIV Entries
INSERT INTO sources VALUES INSERT INTO sources
(uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - NA', 'scrape', 'ffxiv', 'a', TRUE, 'https://na.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, na, lodestone'); VALUES (uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - NA', 'scrape', 'ffxiv', 'a', TRUE,
INSERT INTO sources VALUES 'https://na.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, na, lodestone');
(uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - JP', 'scrape', 'ffxiv', 'a', FALSE, 'https://jp.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, jp, lodestone'); INSERT INTO sources
INSERT INTO sources VALUES VALUES (uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - JP', 'scrape', 'ffxiv', 'a', FALSE,
(uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - EU', 'scrape', 'ffxiv', 'a', FALSE, 'https://eu.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, eu, lodestone'); 'https://jp.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, jp, lodestone');
INSERT INTO sources VALUES INSERT INTO sources
(uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - FR', 'scrape', 'ffxiv', 'a', FALSE, 'https://fr.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, fr, lodestone'); VALUES (uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - EU', 'scrape', 'ffxiv', 'a', FALSE,
INSERT INTO sources VALUES 'https://eu.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, eu, lodestone');
(uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - DE', 'scrape', 'ffxiv', 'a', FALSE, 'https://de.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, de, lodestone'); INSERT INTO sources
VALUES (uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - FR', 'scrape', 'ffxiv', 'a', FALSE,
-- Reddit Entries 'https://fr.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, fr, lodestone');
INSERT INTO sources VALUES INSERT INTO sources
(uuid_generate_v4(), 'reddit', 'dadjokes', 'feed', 'reddit', 'a', TRUE, 'https://reddit.com/r/dadjokes', 'reddit, dadjokes'); VALUES (uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - DE', 'scrape', 'ffxiv', 'a', FALSE,
INSERT INTO sources VALUES 'https://de.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, de, lodestone');
(uuid_generate_v4(), 'reddit', 'steamdeck', 'feed', 'reddit', 'a', TRUE, 'https://reddit.com/r/steamdeck', 'reddit, steam deck, steam, deck');
-- Youtube Entries
INSERT INTO sources VALUES
(uuid_generate_v4(), 'youtube', 'Game Grumps', 'feed', 'youtube', 'a', TRUE, 'https://www.youtube.com/user/GameGrumps', 'youtube, game grumps, game, grumps');
-- RSS Entries
INSERT INTO sources VALUES
(uuid_generate_v4(), 'steampowered', 'steam deck', 'feed', 'rss', 'a', TRUE, 'https://store.steampowered.com/feeds/news/app/1675200/?cc=US&l=english&snr=1_2108_9__2107', 'rss, steampowered, steam, deck, steam deck');
-- Twitch Entries
INSERT INTO sources VALUES
(uuid_generate_v4(), 'twitch', 'Nintendo', 'api', 'twitch', 'a', TRUE, 'https://twitch.tv/nintendo', 'twitch, nintendo');
-- +goose StatementEnd -- +goose StatementEnd
-- +goose Down -- +goose Down
-- +goose StatementBegin -- +goose StatementBegin
--SELECT 'down SQL query'; --SELECT 'down SQL query';
DELETE
DELETE FROM sources where source = 'reddit' and name = 'dadjokes'; FROM sources
DELETE FROM sources where source = 'reddit' and name = 'steamdeck'; where source = 'ffxiv';
DELETE FROM sources where source = 'ffxiv';
DELETE FROM sources WHERE source = 'twitch' and name = 'Nintendo';
DELETE FROM sources WHERE source = 'youtube' and name = 'Game Grumps';
DELETE FROM SOURCES WHERE source = 'rss' and name = 'steam deck';
-- +goose StatementEnd -- +goose StatementEnd

View File

@ -1,20 +0,0 @@
-- +goose Up
-- +goose StatementBegin
INSERT INTO sources VALUES (
uuid_generate_v4(),
'rss',
'Let''s Mosley',
'feed',
'rss',
'podcast',
TRUE,
'https://anchor.fm/s/6c7aa4c4/podcast/rss',
'rss,let''s mosley,fitnes,coach',
FALSE);
-- +goose StatementEnd
-- +goose Down
-- +goose StatementBegin
DELETE FROM sources Where type = 'rss' And Name = 'Let''s Mosley'
-- +goose StatementEnd

View File

@ -9,7 +9,7 @@ namespace Newsbot.Collector.Database.Repositories;
public class SourcesTable : ISourcesRepository public class SourcesTable : ISourcesRepository
{ {
private string _connectionString; private readonly string _connectionString;
public SourcesTable(string connectionString) public SourcesTable(string connectionString)
{ {
@ -19,25 +19,16 @@ public class SourcesTable : ISourcesRepository
public SourcesTable(IConfiguration configuration) public SourcesTable(IConfiguration configuration)
{ {
var connstr = configuration.GetConnectionString("database"); var connstr = configuration.GetConnectionString("database");
if (connstr is null) if (connstr is null) connstr = "";
{
connstr = "";
}
_connectionString = connstr; _connectionString = connstr;
} }
private IDbConnection OpenConnection(string connectionString)
{
var conn = new NpgsqlConnection(_connectionString);
conn.Open();
return conn;
}
public SourceModel New(SourceModel model) public SourceModel New(SourceModel model)
{ {
model.ID = Guid.NewGuid(); model.ID = Guid.NewGuid();
using var conn = OpenConnection(_connectionString); using var conn = OpenConnection(_connectionString);
var query = "Insert Into Sources (ID, Site, Name, Source, Type, Value, Enabled, Url, Tags) Values (@id ,@site,@name,@source,@type,@value,@enabled,@url,@tags);"; var query =
"Insert Into Sources (ID, Site, Name, Source, Type, Value, Enabled, Url, Tags, YoutubeId) Values (@id ,@site,@name,@source,@type,@value,@enabled,@url,@tags,@youtubeid);";
conn.Execute(query, new conn.Execute(query, new
{ {
id = model.ID, id = model.ID,
@ -48,7 +39,8 @@ public class SourcesTable : ISourcesRepository
model.Value, model.Value,
model.Enabled, model.Enabled,
model.Url, model.Url,
model.Tags model.Tags,
model.YoutubeId
}); });
return model; return model;
} }
@ -61,10 +53,7 @@ public class SourcesTable : ISourcesRepository
{ {
id = ID id = ID
}); });
if (res.Count() == 0) if (res.Count() == 0) return new SourceModel();
{
return new SourceModel();
}
return res.First(); return res.First();
} }
@ -83,10 +72,7 @@ public class SourcesTable : ISourcesRepository
name = Name name = Name
}); });
if (res.Count() == 0) if (res.Count() == 0) return new SourceModel();
{
return new SourceModel();
}
return res.First(); return res.First();
} }
@ -96,14 +82,24 @@ public class SourcesTable : ISourcesRepository
var query = "Select * from Sources WHERE name = @name and type = @type;"; var query = "Select * from Sources WHERE name = @name and type = @type;";
var res = conn.Query<SourceModel>(query, new var res = conn.Query<SourceModel>(query, new
{ {
name = name, name, type
type = type
}); });
if (res.Count() == 0) if (res.Count() == 0) return new SourceModel();
return res.First();
}
public SourceModel GetByUrl(string url)
{
using var conn = OpenConnection(_connectionString);
var query = "Select * from Sources WHERE url = @url;";
var res = conn.Query<SourceModel>(query, new
{ {
return new SourceModel(); url
} });
if (res.ToList().Count == 0) return new SourceModel();
return res.First(); return res.First();
} }
@ -115,8 +111,7 @@ public class SourcesTable : ISourcesRepository
Fetch Next @count Rows Only;"; Fetch Next @count Rows Only;";
return conn.Query<SourceModel>(query, new return conn.Query<SourceModel>(query, new
{ {
page = page * count, page = page * count, count
count = count
}).ToList(); }).ToList();
} }
@ -126,8 +121,7 @@ public class SourcesTable : ISourcesRepository
var query = "Select * From Sources where Source = @source Limit @limit;"; var query = "Select * From Sources where Source = @source Limit @limit;";
return conn.Query<SourceModel>(query, new return conn.Query<SourceModel>(query, new
{ {
source = source, source, limit
limit = limit
}).ToList(); }).ToList();
} }
@ -137,28 +131,44 @@ public class SourcesTable : ISourcesRepository
var query = "Select * From Sources where Type = @type Limit @limit;"; var query = "Select * From Sources where Type = @type Limit @limit;";
return conn.Query<SourceModel>(query, new return conn.Query<SourceModel>(query, new
{ {
type = type, type, limit
limit = limit
}).ToList(); }).ToList();
} }
public int Disable(Guid ID)
public int Disable(Guid id)
{ {
using var conn = OpenConnection(_connectionString); using var conn = OpenConnection(_connectionString);
var query = "Update Sources Set Enabled = FALSE where ID = @id;"; var query = "Update Sources Set Enabled = FALSE where ID = @id;";
return conn.Execute(query, new return conn.Execute(query, new
{ {
id = ID id
}); });
} }
public int Enable(Guid ID) public int Enable(Guid id)
{ {
using var conn = OpenConnection(_connectionString); using var conn = OpenConnection(_connectionString);
var query = "Update Sources Set Enabled = TRUE where ID = @id;"; var query = "Update Sources Set Enabled = TRUE where ID = @id;";
return conn.Execute(query, new return conn.Execute(query, new
{ {
id = ID id
}); });
} }
public int UpdateYoutubeId(Guid id, string youtubeId)
{
using var conn = OpenConnection(_connectionString);
var query = "Update Sources Set youtubeid = @youtubeId where ID = @id;";
return conn.Execute(query, new
{
id, youtubeId
});
}
private IDbConnection OpenConnection(string connectionString)
{
var conn = new NpgsqlConnection(_connectionString);
conn.Open();
return conn;
}
} }

View File

@ -1,9 +0,0 @@
namespace Newsbot.Collector.Domain.Consts;
public class ConfigRedditConst
{
public const string IsEnabled = "Reddit:IsEnabled";
public const string PullHot = "Reddit:PullHot";
public const string PullNsfw = "Reddit:PullNsfw";
public const string PullTop = "Reddit:PullTop";
}

View File

@ -1,4 +1,3 @@
using System.Globalization;
using Newsbot.Collector.Domain.Models; using Newsbot.Collector.Domain.Models;
namespace Newsbot.Collector.Domain.Interfaces; namespace Newsbot.Collector.Domain.Interfaces;
@ -10,9 +9,11 @@ public interface ISourcesRepository
public SourceModel GetByID(string ID); public SourceModel GetByID(string ID);
public SourceModel GetByName(string name); public SourceModel GetByName(string name);
public SourceModel GetByNameAndType(string name, string type); public SourceModel GetByNameAndType(string name, string type);
SourceModel GetByUrl(string url);
public List<SourceModel> List(int page, int count); public List<SourceModel> List(int page, int count);
public List<SourceModel> ListBySource(string source, int limit); public List<SourceModel> ListBySource(string source, int limit);
public List<SourceModel> ListByType(string type, int limit = 25); public List<SourceModel> ListByType(string type, int limit = 25);
public int Disable(Guid ID); public int Disable(Guid id);
public int Enable(Guid ID); public int Enable(Guid id);
public int UpdateYoutubeId(Guid id, string youtubeId);
} }

View File

@ -42,9 +42,10 @@ public class DiscordWebHookModel
public class IconModel public class IconModel
{ {
public Guid ID { get; set; } public Guid Id { get; set; }
public string FileName { get; set; } = ""; public string FileName { get; set; } = "";
public string Site { get; set; } = ""; public string Site { get; set; } = "";
public Guid SourceId { get; set; }
} }
public class SettingModel public class SettingModel
@ -61,7 +62,7 @@ public class SourceModel
public string Site { get; set; } = ""; public string Site { get; set; } = "";
public string Name { get; set; } = ""; public string Name { get; set; } = "";
// Source use to deinfe the worker to query with but moving to Type as it was not used really. // Source use to define the worker to query with but moving to Type as it was not used really.
public string Source { get; set; } = ""; public string Source { get; set; } = "";
public string Type { get; set; } = ""; public string Type { get; set; } = "";
public string Value { get; set; } = ""; public string Value { get; set; } = "";
@ -69,6 +70,7 @@ public class SourceModel
public string Url { get; set; } = ""; public string Url { get; set; } = "";
public string Tags { get; set; } = ""; public string Tags { get; set; } = "";
public bool Deleted { get; set; } public bool Deleted { get; set; }
public string YoutubeId { get; set; } = "";
} }
public class SubscriptionModel public class SubscriptionModel

View File

@ -5,12 +5,11 @@ namespace Newsbot.Collector.Services.HtmlParser;
public class HeadParserClient public class HeadParserClient
{ {
private const string XPathMetaTag = "//head/meta"; private const string XPathHeadMetaTag = "//head/meta";
private const string XPathBodyMetaTag = "//body/meta";
private const string XPathLinkTag = "//head/link"; private const string XPathLinkTag = "//head/link";
public HeadParserModel Data { get; set; } private readonly string _htmlContent;
private string _htmlContent;
public HeadParserClient(string htmlContent, bool useBrowser = false) public HeadParserClient(string htmlContent, bool useBrowser = false)
{ {
@ -18,6 +17,8 @@ public class HeadParserClient
Data = new HeadParserModel(); Data = new HeadParserModel();
} }
public HeadParserModel Data { get; set; }
public void Parse() public void Parse()
{ {
Data.Title = GetMetaTitle(); Data.Title = GetMetaTitle();
@ -36,9 +37,23 @@ public class HeadParserClient
var htmlDoc = new HtmlDocument(); var htmlDoc = new HtmlDocument();
htmlDoc.LoadHtml(_htmlContent); htmlDoc.LoadHtml(_htmlContent);
var tags = htmlDoc.DocumentNode.SelectNodes(XPathMetaTag).ToList(); var allTags = new List<HtmlNode>();
return tags; var headerTags = htmlDoc.DocumentNode.SelectNodes(XPathHeadMetaTag).ToList();
allTags.AddRange(headerTags);
try
{
var bodyTags = htmlDoc.DocumentNode.SelectNodes(XPathBodyMetaTag).ToList();
allTags.AddRange(bodyTags);
}
catch
{
// no tags found in the body and that's ok.
// we check the body thanks to Youtube.
}
return allTags;
} }
private List<HtmlNode> CollectLinkTags() private List<HtmlNode> CollectLinkTags()
@ -53,13 +68,13 @@ public class HeadParserClient
{ {
foreach (var meta in html) foreach (var meta in html)
{ {
if (meta.Attributes.Count == 0) continue;
;
//Console.WriteLine($"Name={meta.Attributes[0].Name} & Value={meta.Attributes[0].Value}"); //Console.WriteLine($"Name={meta.Attributes[0].Name} & Value={meta.Attributes[0].Value}");
if (meta.Attributes[0].Value.Contains(Tag) == false) if (meta.Attributes[0].Value.Contains(Tag) == false) continue;
{
continue;
}
return meta.Attributes[1].Value; return meta.Attributes[1].Value;
} }
return ""; return "";
} }
@ -68,91 +83,86 @@ public class HeadParserClient
foreach (var tag in tags) foreach (var tag in tags)
{ {
var res = GetTagValue(tag, htmlTags); var res = GetTagValue(tag, htmlTags);
if (res == "") if (res == "") continue;
{
continue;
}
return res; return res;
} }
return ""; return "";
} }
public string GetMetaTitle() public string GetMetaTitle()
{ {
var htmlTags = CollectMetaTags(); var htmlTags = CollectMetaTags();
string[] tags = new string[] { "twitter:title", "og:title", "title" }; string[] tags = { "twitter:title", "og:title", "title" };
return FindFirstResult(tags, htmlTags); return FindFirstResult(tags, htmlTags);
} }
public string GetMetaDescription() public string GetMetaDescription()
{ {
var htmlTags = CollectMetaTags(); var htmlTags = CollectMetaTags();
string[] tags = new string[] { "twitter:description", "og:description", "description" }; string[] tags = { "twitter:description", "og:description", "description" };
return FindFirstResult(tags, htmlTags); return FindFirstResult(tags, htmlTags);
} }
public string GetMetaImage() public string GetMetaImage()
{ {
var htmlTags = CollectMetaTags(); var htmlTags = CollectMetaTags();
string[] tags = new string[] { "twitter:image", "og:image", "image" }; string[] tags = { "twitter:image", "og:image", "image" };
return FindFirstResult(tags, htmlTags); return FindFirstResult(tags, htmlTags);
} }
public string GetMetaUrl() public string GetMetaUrl()
{ {
var htmlTags = CollectMetaTags(); var htmlTags = CollectMetaTags();
string[] tags = new string[] { "twitter:url", "og:url", "url" }; string[] tags = { "twitter:url", "og:url", "url" };
return FindFirstResult(tags, htmlTags); return FindFirstResult(tags, htmlTags);
} }
public string GetMetaPageType() public string GetMetaPageType()
{ {
var htmlTags = CollectMetaTags(); var htmlTags = CollectMetaTags();
string[] tags = new string[] { "og:type", "type" }; string[] tags = { "og:type", "type" };
return FindFirstResult(tags, htmlTags); return FindFirstResult(tags, htmlTags);
} }
public string GetMetaColorTheme() public string GetMetaColorTheme()
{ {
var htmlTags = CollectMetaTags(); var htmlTags = CollectMetaTags();
string[] tags = new string[] { "theme-color" }; string[] tags = { "theme-color" };
return FindFirstResult(tags, htmlTags); return FindFirstResult(tags, htmlTags);
} }
public string GetYouTubeChannelId() public string GetYouTubeChannelId()
{ {
var htmlTags = CollectMetaTags(); var htmlTags = CollectMetaTags();
string[] tags = new string[] { "channelId" }; string[] tags = { "og:url", "channelId" };
return FindFirstResult(tags, htmlTags); var results = FindFirstResult(tags, htmlTags);
var id = results.Replace("https://www.youtube.com/channel/", "");
return id;
} }
/// <summary> /// <summary>
/// This will parse the headers looking for known keys that will contain a RSS feed link. /// This will parse the headers looking for known keys that will contain a RSS feed link.
/// If the feed is not found, this will throw an exception (MissingHeaderValueException). /// If the feed is not found, this will throw an exception (MissingHeaderValueException).
/// </summary> /// </summary>
/// <returns></returns> /// <returns></returns>
public string GetSiteFeed() public string GetSiteFeed()
{ {
var htmlTags = CollectLinkTags(); var htmlTags = CollectLinkTags();
var tags = new string[] { "alternate" }; var tags = new[] { "alternate" };
try try
{ {
var attr = FindFirstAttribute(tags, htmlTags); var attr = FindFirstAttribute(tags, htmlTags);
foreach (var item in attr) foreach (var item in attr)
{ {
if (item.Name != "href") if (item.Name != "href") continue;
{
continue;
}
var uri = item.Value; var uri = item.Value;
if (uri.StartsWith("//")) if (uri.StartsWith("//")) uri = uri.Replace("//", "https://");
{
uri = uri.Replace("//", "https://");
}
return uri; return uri;
} }
return ""; return "";
} }
catch catch
@ -165,7 +175,6 @@ public class HeadParserClient
private HtmlAttributeCollection FindFirstAttribute(string[] tags, List<HtmlNode> htmlTags) private HtmlAttributeCollection FindFirstAttribute(string[] tags, List<HtmlNode> htmlTags)
{ {
foreach (var tag in tags) foreach (var tag in tags)
{
try try
{ {
var res = GetValidAttribute(tag, htmlTags); var res = GetValidAttribute(tag, htmlTags);
@ -175,7 +184,7 @@ public class HeadParserClient
{ {
// Nothing was found in the given tag but we will keep looking till we finish all the entries. // Nothing was found in the given tag but we will keep looking till we finish all the entries.
} }
}
throw new MissingHeaderValueException("Unable to find the requested value"); throw new MissingHeaderValueException("Unable to find the requested value");
} }
@ -183,12 +192,10 @@ public class HeadParserClient
{ {
foreach (var meta in html) foreach (var meta in html)
{ {
if (meta.Attributes[0].Value.Contains(Tag) == false) if (meta.Attributes[0].Value.Contains(Tag) == false) continue;
{
continue;
}
return meta.Attributes; return meta.Attributes;
} }
throw new MissingHeaderValueException("Site does not expose requested tag."); throw new MissingHeaderValueException("Site does not expose requested tag.");
} }
} }

View File

@ -1,25 +1,32 @@
using HtmlAgilityPack; using HtmlAgilityPack;
using Newsbot.Collector.Domain.Exceptions;
namespace Newsbot.Collector.Services.HtmlParser; namespace Newsbot.Collector.Services.HtmlParser;
public class HtmlPageReaderOptions
{
public string? Url { get; init; }
public string? SourceCode { get; init; }
}
public class HtmlPageReader public class HtmlPageReader
{ {
private readonly HeadParserClient _headClient;
private readonly string _siteContent;
public HtmlData Data { get; set; } public HtmlPageReader(HtmlPageReaderOptions options)
private HeadParserClient _headClient;
private string _siteContent;
public HtmlPageReader(string pageUrl)
{ {
_siteContent = ReadSiteContent(pageUrl); if (options.SourceCode is not null) _siteContent = options.SourceCode;
_headClient = new HeadParserClient(_siteContent);
if (options.Url is not null) _siteContent = ReadSiteContent(options.Url);
if (_siteContent is null) throw new Exception("SiteContent was not filled and expected.");
_headClient = new HeadParserClient(_siteContent);
Data = new HtmlData(); Data = new HtmlData();
} }
public HtmlData Data { get; set; }
public void Parse() public void Parse()
{ {
_headClient.Parse(); _headClient.Parse();
@ -32,7 +39,7 @@ public class HtmlPageReader
var html = client.GetStringAsync(url); var html = client.GetStringAsync(url);
html.Wait(); html.Wait();
var content = html.Result.ToString(); var content = html.Result;
return content; return content;
} }
@ -47,24 +54,14 @@ public class HtmlPageReader
htmlDoc.LoadHtml(_siteContent); htmlDoc.LoadHtml(_siteContent);
var links = htmlDoc.DocumentNode.SelectNodes("//div[contains(@class, 'article-text')]").ToList(); var links = htmlDoc.DocumentNode.SelectNodes("//div[contains(@class, 'article-text')]").ToList();
if (links.Count == 0) if (links.Count == 0) throw new Exception("Unable to parse body. Tag is unknown.");
{
throw new Exception("Unable to parse body. Tag is unkown.");
}
if (links.Count >= 2) if (links.Count >= 2) throw new Exception("Too many results back for the body");
{
throw new Exception("Too many results back for the body");
}
var content = new List<string>(); //var content = new List<string>();
foreach (var item in links[0].ChildNodes) //foreach (var item in links[0].ChildNodes)
{ // if (item.Name == "p")
if (item.Name == "p") // content.Add(item.InnerText);
{
content.Add(item.InnerText);
}
}
return links; return links;
} }

View File

@ -1,6 +1,7 @@
using Newsbot.Collector.Database.Repositories; using Newsbot.Collector.Database.Repositories;
using Newsbot.Collector.Domain.Interfaces; using Newsbot.Collector.Domain.Interfaces;
using Newsbot.Collector.Domain.Models; using Newsbot.Collector.Domain.Models;
using Newsbot.Collector.Domain.Models.Config;
using Newsbot.Collector.Services.Notifications.Discord; using Newsbot.Collector.Services.Notifications.Discord;
using Serilog; using Serilog;
@ -8,7 +9,9 @@ namespace Newsbot.Collector.Services.Jobs;
public class DiscordNotificationJobOptions public class DiscordNotificationJobOptions
{ {
public string? DatabaseConnectionString { get; set; } public ConfigSectionConnectionStrings? ConnectionStrings { get; set; }
public ConfigSectionNotificationsDiscord? Config { get; set; }
} }
public class DiscordNotificationJob public class DiscordNotificationJob
@ -19,6 +22,7 @@ public class DiscordNotificationJob
private IDiscordWebHooksRepository _webhook; private IDiscordWebHooksRepository _webhook;
private ISourcesRepository _sources; private ISourcesRepository _sources;
private ISubscriptionRepository _subs; private ISubscriptionRepository _subs;
private IIconsRepository _icons;
public DiscordNotificationJob() public DiscordNotificationJob()
{ {
@ -27,16 +31,22 @@ public class DiscordNotificationJob
_webhook = new DiscordWebhooksTable(""); _webhook = new DiscordWebhooksTable("");
_sources = new SourcesTable(""); _sources = new SourcesTable("");
_subs = new SubscriptionsTable(""); _subs = new SubscriptionsTable("");
_icons = new IconsTable("");
} }
public void InitAndExecute(DiscordNotificationJobOptions options) public void InitAndExecute(DiscordNotificationJobOptions options)
{ {
_queue = new DiscordQueueTable(options.DatabaseConnectionString ?? ""); options.ConnectionStrings ??= new ConfigSectionConnectionStrings();
_article = new ArticlesTable(options.DatabaseConnectionString ?? ""); options.Config ??= new ConfigSectionNotificationsDiscord();
_webhook = new DiscordWebhooksTable(options.DatabaseConnectionString ?? "");
_sources = new SourcesTable(options.DatabaseConnectionString ?? ""); _queue = new DiscordQueueTable(options.ConnectionStrings.Database ?? "");
_subs = new SubscriptionsTable(options.DatabaseConnectionString ?? ""); _article = new ArticlesTable(options.ConnectionStrings.Database ?? "");
_webhook = new DiscordWebhooksTable(options.ConnectionStrings.Database ?? "");
_sources = new SourcesTable(options.ConnectionStrings.Database ?? "");
_subs = new SubscriptionsTable(options.ConnectionStrings.Database ?? "");
_icons = new IconsTable(options.ConnectionStrings.Database ?? "");
Execute(); Execute();
} }
@ -59,6 +69,8 @@ public class DiscordNotificationJob
continue; continue;
} }
var sourceIcon = _icons.GetBySourceId(sourceDetails.ID);
// Find all the subscriptions for that source // Find all the subscriptions for that source
var allSubscriptions = _subs.ListBySourceID(sourceDetails.ID); var allSubscriptions = _subs.ListBySourceID(sourceDetails.ID);
@ -74,7 +86,7 @@ public class DiscordNotificationJob
var client = new DiscordWebhookClient(discordDetails.Url); var client = new DiscordWebhookClient(discordDetails.Url);
try try
{ {
client.SendMessage(GenerateDiscordMessage(sourceDetails, articleDetails)); client.SendMessage(GenerateDiscordMessage(sourceDetails, articleDetails, sourceIcon));
} }
catch (Exception e) catch (Exception e)
{ {
@ -89,7 +101,7 @@ public class DiscordNotificationJob
} }
} }
public DiscordMessage GenerateDiscordMessage(SourceModel source, ArticlesModel article) public DiscordMessage GenerateDiscordMessage(SourceModel source, ArticlesModel article, IconModel icon)
{ {
var embed = new DiscordMessageEmbed var embed = new DiscordMessageEmbed
{ {
@ -99,6 +111,7 @@ public class DiscordNotificationJob
Author = new DiscordMessageEmbedAuthor Author = new DiscordMessageEmbedAuthor
{ {
Name = article.AuthorName, Name = article.AuthorName,
IconUrl = icon.FileName
}, },
Footer = new DiscordMessageEmbedFooter Footer = new DiscordMessageEmbedFooter
{ {

View File

@ -1,18 +1,23 @@
using System.ServiceModel.Syndication; using System.ServiceModel.Syndication;
using System.Xml; using System.Xml;
using Newsbot.Collector.Database.Repositories; using Newsbot.Collector.Database.Repositories;
using Newsbot.Collector.Domain.Consts;
using Newsbot.Collector.Domain.Interfaces; using Newsbot.Collector.Domain.Interfaces;
using Newsbot.Collector.Domain.Models; using Newsbot.Collector.Domain.Models;
using Newsbot.Collector.Domain.Models.Config;
using Newsbot.Collector.Services.HtmlParser; using Newsbot.Collector.Services.HtmlParser;
namespace Newsbot.Collector.Services.Jobs; namespace Newsbot.Collector.Services.Jobs;
public class GithubWatcherJobOptions public class GithubWatcherJobOptions
{ {
public string ConnectionString { get; set; } = ""; public ConfigSectionConnectionStrings? ConnectionStrings { get; set; }
//public string ConnectionString { get; set; } = "";
public bool FeaturePullReleases { get; set; } = false; public bool FeaturePullReleases { get; set; } = false;
public bool FeaturePullCommits { get; set; } = false; public bool FeaturePullCommits { get; set; } = false;
public bool PullIssues { get; set; } = false; //public bool PullIssues { get; set; } = false;
} }
public class GithubWatcherJob public class GithubWatcherJob
@ -28,24 +33,24 @@ public class GithubWatcherJob
_source = new SourcesTable(""); _source = new SourcesTable("");
} }
public void Init(GithubWatcherJobOptions options)
{
_articles = new ArticlesTable(options.ConnectionString);
_queue = new DiscordQueueTable(options.ConnectionString);
_source = new SourcesTable(options.ConnectionString);
}
public void InitAndExecute(GithubWatcherJobOptions options) public void InitAndExecute(GithubWatcherJobOptions options)
{ {
Init(options); options.ConnectionStrings ??= new ConfigSectionConnectionStrings();
_articles = new ArticlesTable(options.ConnectionStrings.Database ?? "");
_queue = new DiscordQueueTable(options.ConnectionStrings.Database ?? "");
_source = new SourcesTable(options.ConnectionStrings.Database ?? "");
Execute(); Execute();
} }
private void Execute() private void Execute()
{ {
_source.ListBySource(SourceTypes.GitHub, 25);
// query sources for things to pull // query sources for things to pull
var items = new List<ArticlesModel>(); var items = new List<ArticlesModel>();
items.AddRange(Collect(new Uri("https://github.com/jtom38/dvb"))); items.AddRange(Collect(new Uri("https://github.com/jtom38/dvb")));
// query */commits/master.atom // query */commits/master.atom
@ -56,7 +61,7 @@ public class GithubWatcherJob
{ {
var items = new List<ArticlesModel>(); var items = new List<ArticlesModel>();
Guid placeHolderId = Guid.NewGuid(); var placeHolderId = Guid.NewGuid();
// query */release.atom // query */release.atom
items.AddRange(CollectItems($"{url.AbsoluteUri}/releases.atom", placeHolderId)); items.AddRange(CollectItems($"{url.AbsoluteUri}/releases.atom", placeHolderId));
items.AddRange(CollectItems($"{url.AbsoluteUri}/master.atom", placeHolderId)); items.AddRange(CollectItems($"{url.AbsoluteUri}/master.atom", placeHolderId));
@ -75,12 +80,12 @@ public class GithubWatcherJob
{ {
var itemUrl = item.Links[0].Uri.AbsoluteUri; var itemUrl = item.Links[0].Uri.AbsoluteUri;
var exits = _articles.GetByUrl(itemUrl); var exits = _articles.GetByUrl(itemUrl);
if (exits.ID != Guid.Empty) if (exits.ID != Guid.Empty) continue;
{
continue;
}
var parser = new HtmlPageReader(itemUrl); var parser = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = itemUrl
});
parser.Parse(); parser.Parse();
try try
@ -104,6 +109,7 @@ public class GithubWatcherJob
Console.WriteLine(e); Console.WriteLine(e);
} }
} }
return items; return items;
} }
} }

View File

@ -4,6 +4,7 @@ using Newsbot.Collector.Database.Repositories;
using Newsbot.Collector.Domain.Consts; using Newsbot.Collector.Domain.Consts;
using Newsbot.Collector.Domain.Interfaces; using Newsbot.Collector.Domain.Interfaces;
using Newsbot.Collector.Domain.Models; using Newsbot.Collector.Domain.Models;
using Newsbot.Collector.Domain.Models.Config;
using Newsbot.Collector.Services.HtmlParser; using Newsbot.Collector.Services.HtmlParser;
using Serilog; using Serilog;
@ -11,13 +12,18 @@ namespace Newsbot.Collector.Services.Jobs;
public class RssWatcherJobOptions public class RssWatcherJobOptions
{ {
public string? ConnectionString { get; init; } //public string? ConnectionString { get; init; }
public string? OpenTelemetry { get; init; } //public string? OpenTelemetry { get; init; }
public ConfigSectionConnectionStrings? ConnectionStrings { get; set; }
public ConfigSectionRssModel? Config { get; set; }
} }
// This class was made to work with Hangfire and it does not support constructors. // This class was made to work with Hangfire and it does not support constructors.
public class RssWatcherJob public class RssWatcherJob
{ {
private const string JobName = "RssWatcherJob";
private IArticlesRepository _articles; private IArticlesRepository _articles;
private ILogger _logger; private ILogger _logger;
private IDiscordQueueRepository _queue; private IDiscordQueueRepository _queue;
@ -28,62 +34,53 @@ public class RssWatcherJob
_articles = new ArticlesTable(""); _articles = new ArticlesTable("");
_queue = new DiscordQueueTable(""); _queue = new DiscordQueueTable("");
_source = new SourcesTable(""); _source = new SourcesTable("");
_logger = GetLogger(""); _logger = JobLogger.GetLogger("", JobName);
} }
public void InitAndExecute(RssWatcherJobOptions options) public void InitAndExecute(RssWatcherJobOptions options)
{ {
Init(options); options.ConnectionStrings ??= new ConfigSectionConnectionStrings();
options.Config ??= new ConfigSectionRssModel();
_logger.Information("RssWatcherJob - Job was triggered"); _articles = new ArticlesTable(options.ConnectionStrings.Database ?? "");
_logger.Information("RssWatcherJob - Setting up the job"); _queue = new DiscordQueueTable(options.ConnectionStrings.Database ?? "");
_source = new SourcesTable(options.ConnectionStrings.Database ?? "");
_logger = JobLogger.GetLogger(options.ConnectionStrings.OpenTelemetry ?? "", JobName);
_logger.Information($"{JobName} - Job was triggered");
if (!options.Config.IsEnabled)
{
_logger.Information($"{JobName} - Going to exit because feature flag is off.");
return;
}
_logger.Information($"{JobName} - Setting up the job");
Execute(); Execute();
} }
private ILogger GetLogger(string connectionString)
{
return Log.Logger = new LoggerConfiguration()
.WriteTo.Console()
.WriteTo.OpenTelemetry(
connectionString,
resourceAttributes: new Dictionary<string, object>
{
{ "service.name", "newsbot-collector-api" },
{ "Job", "RssWatcherJob" }
})
.CreateLogger();
}
public void Init(RssWatcherJobOptions options)
{
_articles = new ArticlesTable(options.ConnectionString ?? "");
_queue = new DiscordQueueTable(options.ConnectionString ?? "");
_source = new SourcesTable(options.ConnectionString ?? "");
_logger = GetLogger(options.OpenTelemetry ?? "");
}
public void Execute() public void Execute()
{ {
var articles = new List<ArticlesModel>(); var articles = new List<ArticlesModel>();
_logger.Information("RssWatcherJob - Requesting sources"); _logger.Information($"{JobName} - Requesting sources");
var sources = _source.ListByType(SourceTypes.Rss); var sources = _source.ListByType(SourceTypes.Rss);
_logger.Information($"RssWatcherJob - Got {sources.Count} back"); _logger.Information($"{JobName} - Got {sources.Count} back");
foreach (var source in sources) foreach (var source in sources)
{ {
_logger.Information($"RssWatcherJob - Starting to process '{source.Name}'"); _logger.Information($"{JobName} - Starting to process '{source.Name}'");
_logger.Information("RssWatcherJob - Starting to request feed to be processed"); _logger.Information($"{JobName} - Starting to request feed to be processed");
var results = Collect(source.Url, source.ID); var results = Collect(source.Url, source.ID);
_logger.Information($"RssWatcherJob - Collected {results.Count} posts"); _logger.Information($"{JobName} - Collected {results.Count} posts");
articles.AddRange(results); articles.AddRange(results);
} }
_logger.Information("RssWatcherJob - Sending posts over to the database"); _logger.Information($"{JobName} - Sending posts over to the database");
UpdateDatabase(articles); UpdateDatabase(articles);
_logger.Information("RssWatcherJob - Done!"); _logger.Information($"{JobName} - Done!");
} }
public List<ArticlesModel> Collect(string url, Guid sourceId, int sleep = 3000) public List<ArticlesModel> Collect(string url, Guid sourceId, int sleep = 3000)
@ -101,7 +98,10 @@ public class RssWatcherJob
// If we have, skip and save the site bandwidth // If we have, skip and save the site bandwidth
if (IsThisUrlKnown(articleUrl)) continue; if (IsThisUrlKnown(articleUrl)) continue;
var meta = new HtmlPageReader(articleUrl); var meta = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = articleUrl
});
meta.Parse(); meta.Parse();
var article = new ArticlesModel var article = new ArticlesModel

View File

@ -1,19 +1,18 @@
<Project Sdk="Microsoft.NET.Sdk"> <Project Sdk="Microsoft.NET.Sdk">
<ItemGroup> <ItemGroup>
<ProjectReference Include="..\Newsbot.Collector.Domain\Newsbot.Collector.Domain.csproj"/> <ProjectReference Include="..\Newsbot.Collector.Domain\Newsbot.Collector.Domain.csproj" />
<ProjectReference Include="..\Newsbot.Collector.Database\Newsbot.Collector.Database.csproj"/> <ProjectReference Include="..\Newsbot.Collector.Database\Newsbot.Collector.Database.csproj" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<PackageReference Include="Google.Apis.YouTube.v3" Version="1.60.0.2945"/> <PackageReference Include="HtmlAgilityPack" Version="1.11.46" />
<PackageReference Include="HtmlAgilityPack" Version="1.11.46"/> <PackageReference Include="Selenium.WebDriver" Version="4.8.1" />
<PackageReference Include="Selenium.WebDriver" Version="4.8.1"/> <PackageReference Include="Selenium.WebDriver.GeckoDriver" Version="0.32.2" />
<PackageReference Include="Selenium.WebDriver.GeckoDriver" Version="0.32.2"/> <PackageReference Include="Serilog" Version="2.12.0" />
<PackageReference Include="Serilog" Version="2.12.0"/> <PackageReference Include="Serilog.Sinks.Console" Version="4.1.0" />
<PackageReference Include="Serilog.Sinks.Console" Version="4.1.0"/> <PackageReference Include="Serilog.Sinks.OpenTelemetry" Version="1.0.0-dev-00113" />
<PackageReference Include="Serilog.Sinks.OpenTelemetry" Version="1.0.0-dev-00113"/> <PackageReference Include="System.ServiceModel.Syndication" Version="7.0.0" />
<PackageReference Include="System.ServiceModel.Syndication" Version="7.0.0"/>
</ItemGroup> </ItemGroup>
<PropertyGroup> <PropertyGroup>

View File

@ -8,12 +8,11 @@ namespace Newsbot.Collector.Services.Notifications.Discord;
public class DiscordWebhookClient : IDiscordNotificatioClient public class DiscordWebhookClient : IDiscordNotificatioClient
{ {
private readonly string[] _webhooks;
private string[] _webhooks;
public DiscordWebhookClient(string webhook) public DiscordWebhookClient(string webhook)
{ {
_webhooks = new string[] { webhook }; _webhooks = new[] { webhook };
} }
public DiscordWebhookClient(string[] webhooks) public DiscordWebhookClient(string[] webhooks)
@ -23,25 +22,22 @@ public class DiscordWebhookClient : IDiscordNotificatioClient
public void SendMessage(DiscordMessage payload) public void SendMessage(DiscordMessage payload)
{ {
if (payload.Embeds is not null) if (payload.Embeds is not null) MessageValidation.IsEmbedFooterValid(payload.Embeds);
{
MessageValidation.IsEmbedFooterValid(payload.Embeds);
}
foreach (var webhook in _webhooks) foreach (var webhook in _webhooks)
{ {
var jsonRaw = JsonConvert.SerializeObject(payload, Newtonsoft.Json.Formatting.None, new JsonSerializerSettings { NullValueHandling = NullValueHandling.Ignore }); var jsonRaw = JsonConvert.SerializeObject(payload, Formatting.None,
new JsonSerializerSettings { NullValueHandling = NullValueHandling.Ignore });
using StringContent jsonContent = new(jsonRaw, Encoding.UTF8, "application/json"); using StringContent jsonContent = new(jsonRaw, Encoding.UTF8, "application/json");
using var client = new HttpClient(); using var client = new HttpClient();
var resp = client.PostAsync(webhook, jsonContent); var resp = client.PostAsync(webhook, jsonContent);
resp.Wait(); resp.Wait();
// can be 204 or a message, might be 200
Console.WriteLine(resp.Result.StatusCode);
if (resp.Result.StatusCode != HttpStatusCode.NoContent) if (resp.Result.StatusCode != HttpStatusCode.NoContent)
{
throw new Exception("Message was not accepted by the sever."); throw new Exception("Message was not accepted by the sever.");
}
} }
} }
} }

View File

@ -34,6 +34,11 @@ public class DiscordNotificationJobTest
Thumbnail = "https://cdn.arstechnica.net/wp-content/uploads/2023/03/GettyImages-944827400-800x534.jpg", Thumbnail = "https://cdn.arstechnica.net/wp-content/uploads/2023/03/GettyImages-944827400-800x534.jpg",
Description = "Please work", Description = "Please work",
AuthorName = "No one knows" AuthorName = "No one knows"
},
new IconModel
{
Id = Guid.NewGuid(),
FileName = "https://www.redditstatic.com/desktop2x/img/favicon/android-icon-192x192.png"
}); });
webhookClient.SendMessage(msg); webhookClient.SendMessage(msg);
} }

View File

@ -1,4 +1,6 @@
using Microsoft.Extensions.Configuration; using Microsoft.Extensions.Configuration;
using Newsbot.Collector.Domain.Models;
using Newsbot.Collector.Domain.Models.Config;
using Newsbot.Collector.Services.Jobs; using Newsbot.Collector.Services.Jobs;
namespace Newsbot.Collector.Tests.Jobs; namespace Newsbot.Collector.Tests.Jobs;
@ -7,8 +9,12 @@ public class GithubWatcherJobTests
{ {
private IConfiguration GetConfiguration() private IConfiguration GetConfiguration()
{ {
var inMemorySettings = new Dictionary<string, string> { var inMemorySettings = new Dictionary<string, string>
{"ConnectionStrings:database", "Host=localhost;Username=postgres;Password=postgres;Database=postgres;sslmode=disable"} {
{
"ConnectionStrings:database",
"Host=localhost;Username=postgres;Password=postgres;Database=postgres;sslmode=disable"
}
}; };
IConfiguration configuration = new ConfigurationBuilder() IConfiguration configuration = new ConfigurationBuilder()
@ -26,9 +32,12 @@ public class GithubWatcherJobTests
public void CanPullAFeed() public void CanPullAFeed()
{ {
var client = new GithubWatcherJob(); var client = new GithubWatcherJob();
client.Init(new GithubWatcherJobOptions client.InitAndExecute(new GithubWatcherJobOptions
{ {
ConnectionString = ConnectionString(), ConnectionStrings = new ConfigSectionConnectionStrings
{
Database = ConnectionString()
},
FeaturePullCommits = true, FeaturePullCommits = true,
FeaturePullReleases = true FeaturePullReleases = true
}); });

View File

@ -1,14 +1,27 @@
using Microsoft.Extensions.Configuration; using Microsoft.Extensions.Configuration;
using Newsbot.Collector.Domain.Models.Config;
using Newsbot.Collector.Services.Jobs; using Newsbot.Collector.Services.Jobs;
using Xunit.Abstractions;
namespace Newsbot.Collector.Tests.Jobs; namespace Newsbot.Collector.Tests.Jobs;
public class RssWatcherJobTest public class RssWatcherJobTest
{ {
private readonly ITestOutputHelper _testOutputHelper;
public RssWatcherJobTest(ITestOutputHelper testOutputHelper)
{
_testOutputHelper = testOutputHelper;
}
private IConfiguration GetConfiguration() private IConfiguration GetConfiguration()
{ {
var inMemorySettings = new Dictionary<string, string> { var inMemorySettings = new Dictionary<string, string>
{"ConnectionStrings:database", "Host=localhost;Username=postgres;Password=postgres;Database=postgres;sslmode=disable"} {
{
"ConnectionStrings:database",
"Host=localhost;Username=postgres;Password=postgres;Database=postgres;sslmode=disable"
}
}; };
IConfiguration configuration = new ConfigurationBuilder() IConfiguration configuration = new ConfigurationBuilder()
@ -25,9 +38,9 @@ public class RssWatcherJobTest
[Fact] [Fact]
public void CanFindItemsNoDb() public void CanFindItemsNoDb()
{ {
var url = "https://www.engadget.com/rss.xml"; const string url = "https://www.engadget.com/rss.xml";
var client = new RssWatcherJob(); var client = new RssWatcherJob();
var items = client.Collect(url, Guid.NewGuid(), 0); client.Collect(url, Guid.NewGuid(), 0);
} }
[Fact] [Fact]
@ -35,7 +48,13 @@ public class RssWatcherJobTest
{ {
var url = "https://www.engadget.com/rss.xml"; var url = "https://www.engadget.com/rss.xml";
var client = new RssWatcherJob(); var client = new RssWatcherJob();
client.Init(ConnectionString()); client.InitAndExecute(new RssWatcherJobOptions
{
ConnectionStrings = new ConfigSectionConnectionStrings
{
Database = ConnectionString()
}
});
var items = client.Collect(url, Guid.NewGuid(), 0); var items = client.Collect(url, Guid.NewGuid(), 0);
client.UpdateDatabase(items); client.UpdateDatabase(items);
} }
@ -44,20 +63,13 @@ public class RssWatcherJobTest
public void CanReadHtmlDrivenFeedPage() public void CanReadHtmlDrivenFeedPage()
{ {
var url = "https://www.howtogeek.com/feed/"; var url = "https://www.howtogeek.com/feed/";
var client = new RssWatcherJob();
client.Init(ConnectionString());
var items = client.Collect(url, Guid.NewGuid(), 0);
Console.WriteLine('k');
}
[Fact]
public void InitAndExecuteTest()
{
var client = new RssWatcherJob(); var client = new RssWatcherJob();
client.InitAndExecute(new RssWatcherJobOptions client.InitAndExecute(new RssWatcherJobOptions
{ {
ConnectionString = ConnectionString() ConnectionStrings = new ConfigSectionConnectionStrings
{
Database = ConnectionString()
}
}); });
} }
} }

View File

@ -27,4 +27,12 @@
<ProjectReference Include="..\Newsbot.Collector.Database\Newsbot.Collector.Database.csproj" /> <ProjectReference Include="..\Newsbot.Collector.Database\Newsbot.Collector.Database.csproj" />
</ItemGroup> </ItemGroup>
<ItemGroup>
<Content Update="appsettings.json">
<ExcludeFromSingleFile>true</ExcludeFromSingleFile>
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<CopyToPublishDirectory>PreserveNewest</CopyToPublishDirectory>
</Content>
</ItemGroup>
</Project> </Project>

View File

@ -7,32 +7,74 @@ public class HtmlPageReaderTests
[Fact] [Fact]
public void BaseSiteContainsRssFeed() public void BaseSiteContainsRssFeed()
{ {
var client = new HtmlPageReader("https://dotnettutorials.net/"); var client = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = "https://dotnettutorials.net/"
});
var headClient = new HeadParserClient(client.GetSiteContent()); var headClient = new HeadParserClient(client.GetSiteContent());
var feedUri = headClient.GetSiteFeed(); var feedUri = headClient.GetSiteFeed();
if (feedUri == "") if (feedUri == "") Assert.Fail("Failed to find the RSS feed");
{
Assert.Fail("Failed to find the RSS feed");
}
} }
[Fact] [Fact]
public void SiteDoesNotReturnRssFeed() public void SiteDoesNotReturnRssFeed()
{ {
var client = new HtmlPageReader("https://www.engadget.com/"); var client = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = "https://www.engadget.com/"
});
var headClient = new HeadParserClient(client.GetSiteContent()); var headClient = new HeadParserClient(client.GetSiteContent());
var feedUri = headClient.GetSiteFeed(); var feedUri = headClient.GetSiteFeed();
if (feedUri == "") if (feedUri == "") Assert.Fail("");
{
Assert.Fail("");
}
} }
[Fact] [Fact]
public void CanFindBodyOfTheArticle() public void CanFindBodyOfTheArticle()
{ {
var client = new HtmlPageReader("https://www.engadget.com/apple-is-convinced-my-dog-is-stalking-me-143100932.html"); var client = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = "https://www.engadget.com/apple-is-convinced-my-dog-is-stalking-me-143100932.html"
});
var c = client.CollectPostContent(); var c = client.CollectPostContent();
Console.WriteLine(c); Console.WriteLine(c);
} }
[Fact]
public void FindYoutubeChannelId()
{
var url = "https://www.youtube.com/@CityPlannerPlays";
//var b = new BrowserClient();
//var pageSource = b.GetPageSource(url);
var client = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = url
});
client.Parse();
if (client.Data.Header.YoutubeChannelID is null) Assert.Fail("missing youtube id");
}
[Fact]
public void CanExtractHeadersFromReddit()
{
var url = "https://www.reddit.com/";
var client = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = url
});
client.Parse();
if (client.Data.Header.Image == "") Assert.Fail("missing an expected image from the reddit header.");
}
[Fact]
public void CanExtractHeadersFromSubreddit()
{
var url = "https://www.reddit.com/r/ffxiv";
var client = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = url
});
client.Parse();
if (client.Data.Header.Image == "") Assert.Fail("missing an expected image from the reddit header.");
}
} }

View File

@ -1,8 +1,8 @@
using Newsbot.Collector.Database.Repositories; using Newsbot.Collector.Database.Repositories;
using Newsbot.Collector.Domain.Models; using Newsbot.Collector.Domain.Models;
namespace Newsbot.Collector.Tests.Tables;
public class SourcesTableTests public class SourcesTableTests
{ {
[Fact] [Fact]

View File

@ -1,10 +0,0 @@
namespace Newsbot.Collector.Tests;
public class UnitTest1
{
[Fact]
public void Test1()
{
}
}

View File

@ -6,20 +6,19 @@ param (
$ErrorActionPreference = 'Stop' $ErrorActionPreference = 'Stop'
function NewRedditSource { function New-RedditSource {
param ( param (
[string] $Name,
[string] $Url [string] $Url
) )
$urlEncoded = [uri]::EscapeDataString($Url) $urlEncoded = [uri]::EscapeDataString($Url)
$param = "name=$Name&url=$urlEncoded" $param = "url=$urlEncoded"
$uri = "$ApiServer/api/sources/new/reddit?$param" $uri = "$ApiServer/api/sources/new/reddit?$param"
$res = Invoke-RestMethod -Method Post -Uri $uri $res = Invoke-RestMethod -Method Post -Uri $uri
return $res return $res
} }
function NewRssSource { function New-RssSource {
param ( param (
[string] $Name, [string] $Name,
[string] $Url [string] $Url
@ -31,19 +30,18 @@ function NewRssSource {
return $res return $res
} }
function NewYoutubeSource { function New-YoutubeSource {
param ( param (
[string] $Name, [Parameter(Required)][string] $Url
[string] $Url
) )
$urlEncoded = [uri]::EscapeDataString($Url) $urlEncoded = [uri]::EscapeDataString($Url)
[string] $param = "name=$Name&url=$urlEncoded" [string] $param = "url=$urlEncoded"
[string] $uri = "$ApiServer/api/sources/new/youtube?$param" [string] $uri = "$ApiServer/api/sources/new/youtube?$param"
$res = Invoke-RestMethod -Method Post -Uri $uri $res = Invoke-RestMethod -Method Post -Uri $uri
return $res return $res
} }
function NewTwitchSource { function New-TwitchSource {
param ( param (
[string] $Name [string] $Name
) )
@ -81,18 +79,18 @@ function New-Subscription {
# Load Secrets file # Load Secrets file
$secrets = Get-Content $JsonSecrets -Raw | ConvertFrom-Json $secrets = Get-Content $JsonSecrets -Raw | ConvertFrom-Json
$redditDadJokes = NewRedditSource -Name "dadjokes" -Url "https://reddit.com/r/dadjokes" $redditDadJokes = New-RedditSource -Name "dadjokes"
$redditSteamDeck = NewRedditSource -Name "steamdeck" -Url "https://reddit.com/r/steamdeck" $redditSteamDeck = New-RedditSource -Name "steamdeck"
$rssSteamDeck = NewRssSource -Name "Steampowered - Steam Deck" -Url "https://store.steampowered.com/feeds/news/app/1675200/?cc=US&l=english&snr=1_2108_9__2107" $rssSteamDeck = New-RssSource -Name "Steampowered - Steam Deck" -Url "https://store.steampowered.com/feeds/news/app/1675200/?cc=US&l=english&snr=1_2108_9__2107"
$rssFaysHaremporium = NewRssSource -Name "Fay's Haremporium" -Url "https://blog.nyxstudios.moe/rss/" $rssFaysHaremporium = New-RssSource -Name "Fay's Haremporium" -Url "https://blog.nyxstudios.moe/rss/"
$rssPodcastLetsMosley = NewRssSource -Name "Let's Mosley" -Url "https://anchor.fm/s/6c7aa4c4/podcast/rss" $rssPodcastLetsMosley = New-RssSource -Name "Let's Mosley" -Url "https://anchor.fm/s/6c7aa4c4/podcast/rss"
$youtubeGameGrumps = NewYoutubeSource -Name "Game Grumps" -Url "https://www.youtube.com/user/GameGrumps" $youtubeGameGrumps = New-YoutubeSource -Url "https://www.youtube.com/user/GameGrumps"
$youtubeCityPlannerPlays = NewYoutubeSource -Name "City Planner Plays" -Url "https://www.youtube.com/c/cityplannerplays" $youtubeCityPlannerPlays = New-YoutubeSource -Url "https://www.youtube.com/c/cityplannerplays"
$youtubeLinusTechTips = New-YoutubeSource -Url "https://www.youtube.com/@LinusTechTips"
$twitchNintendo = NewTwitchSource -Name "Nintendo" $twitchNintendo = NewTwitchSource -Name "Nintendo"
$twitchNintendo.id
$miharuMonitor = New-DiscordWebhook -Server "Miharu Monitor" -Channel "dev" -Url $secrets.MiharuMonitor.dev01 $miharuMonitor = New-DiscordWebhook -Server "Miharu Monitor" -Channel "dev" -Url $secrets.MiharuMonitor.dev01
@ -103,4 +101,5 @@ New-Subscription -SourceId $rssFaysHaremporium.id -DiscordWebhookId $miharuMonit
New-Subscription -SourceId $rssPodcastLetsMosley.id -DiscordWebhookId $miharuMonitor.id New-Subscription -SourceId $rssPodcastLetsMosley.id -DiscordWebhookId $miharuMonitor.id
New-Subscription -SourceId $youtubeGameGrumps.id -DiscordWebhookId $miharuMonitor.id New-Subscription -SourceId $youtubeGameGrumps.id -DiscordWebhookId $miharuMonitor.id
New-Subscription -SourceId $youtubeCityPlannerPlays.id -DiscordWebhookId $miharuMonitor.id New-Subscription -SourceId $youtubeCityPlannerPlays.id -DiscordWebhookId $miharuMonitor.id
New-Subscription -SourceId $youtubeLinusTechTips.id -DiscordWebhookId $miharuMonitor.id
New-Subscription -SourceId $twitchNintendo.id -DiscordWebhookId $miharuMonitor.id New-Subscription -SourceId $twitchNintendo.id -DiscordWebhookId $miharuMonitor.id