Features/missing files (#14)
* Added jobs Controller to trigger collection. * Added backgroundjobs to move them out of program.cs * new column to track youtube ID values and adding a sourceid column on the icon for linking * Added icon table repo * added interface for IconsRepo * hey the missing config models * adding section const keys to pull blocks of configs * Added youtubewatcher to the code but not ready to enable it in the background. More testing needed. * Test... improvements?
This commit is contained in:
parent
9be985da0a
commit
adb4799206
26
Newsbot.Collector.Api/BackgroundJobs.cs
Normal file
26
Newsbot.Collector.Api/BackgroundJobs.cs
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
using Hangfire;
|
||||||
|
using Newsbot.Collector.Domain.Consts;
|
||||||
|
using Newsbot.Collector.Domain.Models.Config;
|
||||||
|
using Newsbot.Collector.Services.Jobs;
|
||||||
|
|
||||||
|
namespace Newsbot.Collector.Api;
|
||||||
|
|
||||||
|
public class BackgroundJobs
|
||||||
|
{
|
||||||
|
public static void SetupRecurringJobs(IConfiguration configuration)
|
||||||
|
{
|
||||||
|
RecurringJob.AddOrUpdate<RssWatcherJob>("RSS", x => x.InitAndExecute(new RssWatcherJobOptions
|
||||||
|
{
|
||||||
|
ConnectionStrings =
|
||||||
|
configuration.GetValue<ConfigSectionConnectionStrings>(ConfigSectionsConst.ConnectionStrings),
|
||||||
|
Config = configuration.GetValue<ConfigSectionRssModel>(ConfigSectionsConst.Rss)
|
||||||
|
}), "15 0-23 * * *");
|
||||||
|
|
||||||
|
RecurringJob.AddOrUpdate<DiscordNotificationJob>("Discord Alerts", x =>
|
||||||
|
x.InitAndExecute(new DiscordNotificationJobOptions
|
||||||
|
{
|
||||||
|
ConnectionStrings = configuration.GetValue<ConfigSectionConnectionStrings>(ConfigSectionsConst.ConnectionStrings),
|
||||||
|
Config = configuration.GetValue<ConfigSectionNotificationsDiscord>(ConfigSectionsConst.NotificationsDiscord)
|
||||||
|
}), "5/10 * * * *");
|
||||||
|
}
|
||||||
|
}
|
47
Newsbot.Collector.Api/Controllers/JobsController.cs
Normal file
47
Newsbot.Collector.Api/Controllers/JobsController.cs
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
using Hangfire;
|
||||||
|
using Microsoft.AspNetCore.Mvc;
|
||||||
|
using Microsoft.Extensions.Options;
|
||||||
|
using Newsbot.Collector.Database.Repositories;
|
||||||
|
using Newsbot.Collector.Domain.Interfaces;
|
||||||
|
using Newsbot.Collector.Domain.Models.Config;
|
||||||
|
using Newsbot.Collector.Services.Jobs;
|
||||||
|
|
||||||
|
namespace Newsbot.Collector.Api.Controllers;
|
||||||
|
|
||||||
|
[ApiController]
|
||||||
|
[Route("api/jobs")]
|
||||||
|
public class JobsController
|
||||||
|
{
|
||||||
|
private readonly ConfigSectionConnectionStrings _connectionStrings;
|
||||||
|
private readonly ConfigSectionRssModel _rssConfig;
|
||||||
|
private readonly ILogger<SourcesController> _logger;
|
||||||
|
private readonly ISourcesRepository _sources;
|
||||||
|
|
||||||
|
public JobsController(ILogger<SourcesController> logger, IOptions<ConfigSectionConnectionStrings> connectionStrings,
|
||||||
|
IOptions<ConfigSectionRssModel> rss)
|
||||||
|
{
|
||||||
|
_logger = logger;
|
||||||
|
_connectionStrings = connectionStrings.Value;
|
||||||
|
_rssConfig = rss.Value;
|
||||||
|
_sources = new SourcesTable(connectionStrings.Value.Database ?? "");
|
||||||
|
}
|
||||||
|
|
||||||
|
[HttpPost("check/rss")]
|
||||||
|
public void CheckReddit()
|
||||||
|
{
|
||||||
|
BackgroundJob.Enqueue<RssWatcherJob>(x => x.InitAndExecute(new RssWatcherJobOptions
|
||||||
|
{
|
||||||
|
ConnectionStrings = _connectionStrings,
|
||||||
|
Config = _rssConfig
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
[HttpPost("check/youtube")]
|
||||||
|
public void CheckYoutube()
|
||||||
|
{
|
||||||
|
BackgroundJob.Enqueue<YoutubeWatcherJob>(x => x.InitAndExecute(new YoutubeWatcherJobOptions
|
||||||
|
{
|
||||||
|
ConnectionStrings = _connectionStrings
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,13 @@
|
|||||||
|
-- +goose Up
|
||||||
|
-- +goose StatementBegin
|
||||||
|
SELECT 'up SQL query';
|
||||||
|
ALTER TABLE sources
|
||||||
|
ADD COLUMN YoutubeId TEXT;
|
||||||
|
-- +goose StatementEnd
|
||||||
|
|
||||||
|
-- +goose Down
|
||||||
|
-- +goose StatementBegin
|
||||||
|
SELECT 'down SQL query';
|
||||||
|
ALTER TABLE sources
|
||||||
|
DROP COLUMN YoutubeId;
|
||||||
|
-- +goose StatementEnd
|
@ -0,0 +1,13 @@
|
|||||||
|
-- +goose Up
|
||||||
|
-- +goose StatementBegin
|
||||||
|
SELECT 'up SQL query';
|
||||||
|
ALTER TABLE icons
|
||||||
|
ADD COLUMN SourceId uuid;
|
||||||
|
-- +goose StatementEnd
|
||||||
|
|
||||||
|
-- +goose Down
|
||||||
|
-- +goose StatementBegin
|
||||||
|
SELECT 'down SQL query';
|
||||||
|
ALTER TABLE icons
|
||||||
|
DROP COLUMN SourceId;
|
||||||
|
-- +goose StatementEnd
|
65
Newsbot.Collector.Database/Repositories/IconsTable.cs
Normal file
65
Newsbot.Collector.Database/Repositories/IconsTable.cs
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
using System.Data;
|
||||||
|
using Dapper;
|
||||||
|
using Microsoft.Extensions.Configuration;
|
||||||
|
using Newsbot.Collector.Domain.Interfaces;
|
||||||
|
using Newsbot.Collector.Domain.Models;
|
||||||
|
using Npgsql;
|
||||||
|
|
||||||
|
namespace Newsbot.Collector.Database.Repositories;
|
||||||
|
|
||||||
|
public class IconsTable : IIconsRepository
|
||||||
|
{
|
||||||
|
private readonly string _connectionString;
|
||||||
|
|
||||||
|
public IconsTable(string connectionString)
|
||||||
|
{
|
||||||
|
_connectionString = connectionString;
|
||||||
|
}
|
||||||
|
|
||||||
|
public IconsTable(IConfiguration configuration)
|
||||||
|
{
|
||||||
|
var connstr = configuration.GetConnectionString("database");
|
||||||
|
if (connstr is null) connstr = "";
|
||||||
|
_connectionString = connstr;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void New(IconModel model)
|
||||||
|
{
|
||||||
|
model.Id = Guid.NewGuid();
|
||||||
|
|
||||||
|
using var conn = OpenConnection(_connectionString);
|
||||||
|
var q = @"Insert Into icons (id, filename, site, sourceid) values (@Id,@FileName, @Site, @SourceId)";
|
||||||
|
conn.Execute(q, model);
|
||||||
|
}
|
||||||
|
|
||||||
|
public IconModel GetById(Guid id)
|
||||||
|
{
|
||||||
|
using var conn = OpenConnection(_connectionString);
|
||||||
|
var query = "Select * From icons where ID = @id Limit 1;";
|
||||||
|
var res = conn.Query<IconModel>(query, new
|
||||||
|
{
|
||||||
|
id
|
||||||
|
});
|
||||||
|
if (!res.Any()) return new IconModel();
|
||||||
|
return res.First();
|
||||||
|
}
|
||||||
|
|
||||||
|
public IconModel GetBySourceId(Guid id)
|
||||||
|
{
|
||||||
|
using var conn = OpenConnection(_connectionString);
|
||||||
|
var query = "Select * From icons where sourceid = @id Limit 1;";
|
||||||
|
var res = conn.Query<IconModel>(query, new
|
||||||
|
{
|
||||||
|
id
|
||||||
|
});
|
||||||
|
if (!res.Any()) return new IconModel();
|
||||||
|
return res.First();
|
||||||
|
}
|
||||||
|
|
||||||
|
private IDbConnection OpenConnection(string connectionString)
|
||||||
|
{
|
||||||
|
var conn = new NpgsqlConnection(_connectionString);
|
||||||
|
conn.Open();
|
||||||
|
return conn;
|
||||||
|
}
|
||||||
|
}
|
16
Newsbot.Collector.Domain/Consts/ConfigSectionsConst.cs
Normal file
16
Newsbot.Collector.Domain/Consts/ConfigSectionsConst.cs
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
namespace Newsbot.Collector.Domain.Consts;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// This class contains the keys to find the objects in the config to load.
|
||||||
|
/// </summary>
|
||||||
|
public static class ConfigSectionsConst
|
||||||
|
{
|
||||||
|
public const string ConnectionStrings = "ConnectionStrings";
|
||||||
|
public const string FinalFantasyXiv = "FinalFantasyXiv";
|
||||||
|
public const string Reddit = "Reddit";
|
||||||
|
public const string Rss = "Rss";
|
||||||
|
public const string Twitch = "Twitch";
|
||||||
|
public const string Youtube = "Youtube";
|
||||||
|
|
||||||
|
public const string NotificationsDiscord = "Notifications:Discord";
|
||||||
|
}
|
11
Newsbot.Collector.Domain/Interfaces/IIconsRepository.cs
Normal file
11
Newsbot.Collector.Domain/Interfaces/IIconsRepository.cs
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
using Newsbot.Collector.Domain.Models;
|
||||||
|
|
||||||
|
namespace Newsbot.Collector.Domain.Interfaces;
|
||||||
|
|
||||||
|
public interface IIconsRepository
|
||||||
|
{
|
||||||
|
public void New(IconModel model);
|
||||||
|
|
||||||
|
public IconModel GetById(Guid id);
|
||||||
|
public IconModel GetBySourceId(Guid id);
|
||||||
|
}
|
@ -0,0 +1,7 @@
|
|||||||
|
namespace Newsbot.Collector.Domain.Models.Config;
|
||||||
|
|
||||||
|
public class ConfigSectionConnectionStrings
|
||||||
|
{
|
||||||
|
public string? Database { get; init; }
|
||||||
|
public string? OpenTelemetry { get; init; }
|
||||||
|
}
|
@ -0,0 +1,6 @@
|
|||||||
|
namespace Newsbot.Collector.Domain.Models.Config;
|
||||||
|
|
||||||
|
public class ConfigSectionNotificationsDiscord
|
||||||
|
{
|
||||||
|
public bool IsEnabled { get; set; }
|
||||||
|
}
|
@ -0,0 +1,9 @@
|
|||||||
|
namespace Newsbot.Collector.Domain.Models.Config;
|
||||||
|
|
||||||
|
public class ConfigSectionRedditModel
|
||||||
|
{
|
||||||
|
public bool IsEnabled { get; set; }
|
||||||
|
public bool PullHot { get; set; }
|
||||||
|
public bool PullNsfw { get; set; }
|
||||||
|
public bool PullTop { get; set; }
|
||||||
|
}
|
@ -0,0 +1,6 @@
|
|||||||
|
namespace Newsbot.Collector.Domain.Models.Config;
|
||||||
|
|
||||||
|
public class ConfigSectionRssModel
|
||||||
|
{
|
||||||
|
public bool IsEnabled { get; set; }
|
||||||
|
}
|
@ -0,0 +1,7 @@
|
|||||||
|
namespace Newsbot.Collector.Domain.Models.Config;
|
||||||
|
|
||||||
|
public class ConfigSectionYoutubeModel
|
||||||
|
{
|
||||||
|
public bool IsEnabled { get; set; }
|
||||||
|
public bool DebugMode { get; set; }
|
||||||
|
}
|
30
Newsbot.Collector.Services/HtmlParser/BrowserClient.cs
Normal file
30
Newsbot.Collector.Services/HtmlParser/BrowserClient.cs
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
using OpenQA.Selenium;
|
||||||
|
using OpenQA.Selenium.Firefox;
|
||||||
|
|
||||||
|
namespace Newsbot.Collector.Services.HtmlParser;
|
||||||
|
|
||||||
|
public class BrowserClient : IDisposable
|
||||||
|
{
|
||||||
|
private readonly IWebDriver _driver;
|
||||||
|
|
||||||
|
public BrowserClient()
|
||||||
|
{
|
||||||
|
_driver = new FirefoxDriver();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Dispose()
|
||||||
|
{
|
||||||
|
_driver.Close();
|
||||||
|
_driver.Quit();
|
||||||
|
_driver.Dispose();
|
||||||
|
}
|
||||||
|
|
||||||
|
public string GetPageSource(string url, int sleep = 5000)
|
||||||
|
{
|
||||||
|
_driver.Navigate().GoToUrl(url);
|
||||||
|
|
||||||
|
// Give the page some time to finish loading js
|
||||||
|
Thread.Sleep(sleep);
|
||||||
|
return _driver.PageSource;
|
||||||
|
}
|
||||||
|
}
|
22
Newsbot.Collector.Services/Jobs/JobLogger.cs
Normal file
22
Newsbot.Collector.Services/Jobs/JobLogger.cs
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
using Serilog;
|
||||||
|
|
||||||
|
namespace Newsbot.Collector.Services.Jobs;
|
||||||
|
|
||||||
|
public static class JobLogger
|
||||||
|
{
|
||||||
|
public static ILogger GetLogger(string connectionString, string jobName)
|
||||||
|
{
|
||||||
|
if (connectionString == "")
|
||||||
|
return Log.Logger = new LoggerConfiguration().WriteTo.Console().CreateLogger();
|
||||||
|
return Log.Logger = new LoggerConfiguration()
|
||||||
|
.WriteTo.Console()
|
||||||
|
.WriteTo.OpenTelemetry(
|
||||||
|
connectionString,
|
||||||
|
resourceAttributes: new Dictionary<string, object>
|
||||||
|
{
|
||||||
|
{ "service.name", "newsbot-collector-api" },
|
||||||
|
{ "Job", jobName }
|
||||||
|
})
|
||||||
|
.CreateLogger();
|
||||||
|
}
|
||||||
|
}
|
152
Newsbot.Collector.Services/Jobs/YoutubeWatcherJob.cs
Normal file
152
Newsbot.Collector.Services/Jobs/YoutubeWatcherJob.cs
Normal file
@ -0,0 +1,152 @@
|
|||||||
|
using System.ServiceModel.Syndication;
|
||||||
|
using System.Xml;
|
||||||
|
using Newsbot.Collector.Database.Repositories;
|
||||||
|
using Newsbot.Collector.Domain.Consts;
|
||||||
|
using Newsbot.Collector.Domain.Interfaces;
|
||||||
|
using Newsbot.Collector.Domain.Models;
|
||||||
|
using Newsbot.Collector.Domain.Models.Config;
|
||||||
|
using Newsbot.Collector.Services.HtmlParser;
|
||||||
|
using Serilog;
|
||||||
|
|
||||||
|
namespace Newsbot.Collector.Services.Jobs;
|
||||||
|
|
||||||
|
public class YoutubeWatcherJobOptions
|
||||||
|
{
|
||||||
|
public ConfigSectionConnectionStrings? ConnectionStrings { get; set; }
|
||||||
|
public int SleepTimer { get; set; } = 3000;
|
||||||
|
}
|
||||||
|
|
||||||
|
public class YoutubeWatcherJob
|
||||||
|
{
|
||||||
|
private readonly YoutubeWatcherJobOptions _options;
|
||||||
|
private IArticlesRepository _articles;
|
||||||
|
private IIconsRepository _icons;
|
||||||
|
private ILogger _logger;
|
||||||
|
private IDiscordQueueRepository _queue;
|
||||||
|
private ISourcesRepository _source;
|
||||||
|
|
||||||
|
public YoutubeWatcherJob()
|
||||||
|
{
|
||||||
|
_options = new YoutubeWatcherJobOptions();
|
||||||
|
_articles = new ArticlesTable("");
|
||||||
|
_queue = new DiscordQueueTable("");
|
||||||
|
_source = new SourcesTable("");
|
||||||
|
_icons = new IconsTable("");
|
||||||
|
_logger = JobLogger.GetLogger("", "YoutubeWatcherJob");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void InitAndExecute(YoutubeWatcherJobOptions options)
|
||||||
|
{
|
||||||
|
options.ConnectionStrings ??= new ConfigSectionConnectionStrings();
|
||||||
|
|
||||||
|
_articles = new ArticlesTable(options.ConnectionStrings.Database ?? "");
|
||||||
|
_queue = new DiscordQueueTable(options.ConnectionStrings.Database ?? "");
|
||||||
|
_source = new SourcesTable(options.ConnectionStrings.Database ?? "");
|
||||||
|
_icons = new IconsTable(options.ConnectionStrings.Database ?? "");
|
||||||
|
_logger = JobLogger.GetLogger(options.ConnectionStrings.OpenTelemetry ?? "", "YoutubeWatcherJob");
|
||||||
|
|
||||||
|
Execute();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void Execute()
|
||||||
|
{
|
||||||
|
var videos = new List<ArticlesModel>();
|
||||||
|
|
||||||
|
var sources = _source.ListByType(SourceTypes.YouTube, 100);
|
||||||
|
|
||||||
|
foreach (var source in sources) CheckSource(source);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void CheckSource(SourceModel source)
|
||||||
|
{
|
||||||
|
var channelId = "";
|
||||||
|
|
||||||
|
if (source.YoutubeId == "")
|
||||||
|
{
|
||||||
|
channelId = GetChannelId(source.Url);
|
||||||
|
_source.UpdateYoutubeId(source.ID, channelId);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
channelId = source.YoutubeId;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure we have a Icon for the channel
|
||||||
|
var icon = _icons.GetBySourceId(source.ID);
|
||||||
|
if (icon.Id == Guid.Empty)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
var url = $"https://www.youtube.com/feeds/videos.xml?channel_id={channelId}";
|
||||||
|
|
||||||
|
var newVideos = CheckFeed(url, source);
|
||||||
|
foreach (var video in newVideos) _articles.New(video);
|
||||||
|
}
|
||||||
|
|
||||||
|
private string GetChannelId(string url)
|
||||||
|
{
|
||||||
|
// Collect the Channel ID and store it for later.
|
||||||
|
var pageReader = new HtmlPageReader(new HtmlPageReaderOptions
|
||||||
|
{
|
||||||
|
Url = url
|
||||||
|
});
|
||||||
|
pageReader.Parse();
|
||||||
|
|
||||||
|
var id = pageReader.Data.Header.YoutubeChannelID ?? "";
|
||||||
|
if (id == "") _logger.Error(new Exception("Unable to find the Youtube Channel ID for the requested url."), url);
|
||||||
|
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<ArticlesModel> CheckFeed(string url, SourceModel source)
|
||||||
|
{
|
||||||
|
var videos = new List<ArticlesModel>();
|
||||||
|
|
||||||
|
using var reader = XmlReader.Create(url);
|
||||||
|
var feed = SyndicationFeed.Load(reader);
|
||||||
|
foreach (var post in feed.Items.ToList())
|
||||||
|
{
|
||||||
|
var articleUrl = post.Links[0].Uri.AbsoluteUri;
|
||||||
|
if (IsThisUrlKnown(articleUrl)) continue;
|
||||||
|
|
||||||
|
var videoDetails = new HtmlPageReader(new HtmlPageReaderOptions
|
||||||
|
{
|
||||||
|
Url = articleUrl
|
||||||
|
});
|
||||||
|
videoDetails.Parse();
|
||||||
|
|
||||||
|
var article = new ArticlesModel
|
||||||
|
{
|
||||||
|
//Todo add the icon
|
||||||
|
AuthorName = post.Authors[0].Name,
|
||||||
|
Title = post.Title.Text,
|
||||||
|
Tags = FetchTags(post),
|
||||||
|
URL = articleUrl,
|
||||||
|
PubDate = post.PublishDate.DateTime,
|
||||||
|
Thumbnail = videoDetails.Data.Header.Image,
|
||||||
|
Description = videoDetails.Data.Header.Description,
|
||||||
|
SourceID = source.ID,
|
||||||
|
Video = "true"
|
||||||
|
};
|
||||||
|
|
||||||
|
videos.Add(article);
|
||||||
|
Thread.Sleep(_options.SleepTimer);
|
||||||
|
}
|
||||||
|
|
||||||
|
return videos;
|
||||||
|
}
|
||||||
|
|
||||||
|
private bool IsThisUrlKnown(string url)
|
||||||
|
{
|
||||||
|
var isKnown = _articles.GetByUrl(url);
|
||||||
|
if (isKnown.URL == url) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string FetchTags(SyndicationItem post)
|
||||||
|
{
|
||||||
|
var result = "";
|
||||||
|
foreach (var tag in post.Categories) result += $"{tag.Name},";
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
29
Newsbot.Collector.Tests/Services/BrowserClientTests.cs
Normal file
29
Newsbot.Collector.Tests/Services/BrowserClientTests.cs
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
using Newsbot.Collector.Services.HtmlParser;
|
||||||
|
|
||||||
|
namespace Newsbot.Collector.Tests.Services;
|
||||||
|
|
||||||
|
public class BrowserClientTests
|
||||||
|
{
|
||||||
|
[Fact]
|
||||||
|
public void LoadsBrowser()
|
||||||
|
{
|
||||||
|
using var client = new BrowserClient();
|
||||||
|
var pageSource = client.GetPageSource("https://www.google.com");
|
||||||
|
if (pageSource == "") Assert.Fail("failed to return page source");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void CanLoadHeadersFromSource()
|
||||||
|
{
|
||||||
|
using var bClient = new BrowserClient();
|
||||||
|
var pageSource = bClient.GetPageSource("https://www.youtube.com/gamegrumps");
|
||||||
|
|
||||||
|
var hClient = new HtmlPageReader(new HtmlPageReaderOptions
|
||||||
|
{
|
||||||
|
SourceCode = pageSource
|
||||||
|
});
|
||||||
|
hClient.Parse();
|
||||||
|
|
||||||
|
if (hClient.Data.Header.YoutubeChannelID is null) Assert.Fail("Failed to find the YoutubeChannelId");
|
||||||
|
}
|
||||||
|
}
|
5
Newsbot.Collector.Tests/TestHelper.cs
Normal file
5
Newsbot.Collector.Tests/TestHelper.cs
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
namespace Newsbot.Collector.Tests;
|
||||||
|
|
||||||
|
public static class TestHelper
|
||||||
|
{
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user