Features/missing files (#14)

* Added jobs Controller to trigger collection.

* Added backgroundjobs to move them out of program.cs

* new column to track youtube ID values and adding a sourceid column on the icon for linking

* Added icon table repo

* added interface for IconsRepo

* hey the missing config models

* adding section const keys to pull blocks of configs

* Added youtubewatcher to the code but not ready to enable it in the background.  More testing needed.

* Test... improvements?
This commit is contained in:
James Tombleson 2023-03-31 23:00:15 -07:00 committed by GitHub
parent 9be985da0a
commit adb4799206
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 464 additions and 0 deletions

View File

@ -0,0 +1,26 @@
using Hangfire;
using Newsbot.Collector.Domain.Consts;
using Newsbot.Collector.Domain.Models.Config;
using Newsbot.Collector.Services.Jobs;
namespace Newsbot.Collector.Api;
public class BackgroundJobs
{
public static void SetupRecurringJobs(IConfiguration configuration)
{
RecurringJob.AddOrUpdate<RssWatcherJob>("RSS", x => x.InitAndExecute(new RssWatcherJobOptions
{
ConnectionStrings =
configuration.GetValue<ConfigSectionConnectionStrings>(ConfigSectionsConst.ConnectionStrings),
Config = configuration.GetValue<ConfigSectionRssModel>(ConfigSectionsConst.Rss)
}), "15 0-23 * * *");
RecurringJob.AddOrUpdate<DiscordNotificationJob>("Discord Alerts", x =>
x.InitAndExecute(new DiscordNotificationJobOptions
{
ConnectionStrings = configuration.GetValue<ConfigSectionConnectionStrings>(ConfigSectionsConst.ConnectionStrings),
Config = configuration.GetValue<ConfigSectionNotificationsDiscord>(ConfigSectionsConst.NotificationsDiscord)
}), "5/10 * * * *");
}
}

View File

@ -0,0 +1,47 @@
using Hangfire;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Extensions.Options;
using Newsbot.Collector.Database.Repositories;
using Newsbot.Collector.Domain.Interfaces;
using Newsbot.Collector.Domain.Models.Config;
using Newsbot.Collector.Services.Jobs;
namespace Newsbot.Collector.Api.Controllers;
[ApiController]
[Route("api/jobs")]
public class JobsController
{
private readonly ConfigSectionConnectionStrings _connectionStrings;
private readonly ConfigSectionRssModel _rssConfig;
private readonly ILogger<SourcesController> _logger;
private readonly ISourcesRepository _sources;
public JobsController(ILogger<SourcesController> logger, IOptions<ConfigSectionConnectionStrings> connectionStrings,
IOptions<ConfigSectionRssModel> rss)
{
_logger = logger;
_connectionStrings = connectionStrings.Value;
_rssConfig = rss.Value;
_sources = new SourcesTable(connectionStrings.Value.Database ?? "");
}
[HttpPost("check/rss")]
public void CheckReddit()
{
BackgroundJob.Enqueue<RssWatcherJob>(x => x.InitAndExecute(new RssWatcherJobOptions
{
ConnectionStrings = _connectionStrings,
Config = _rssConfig
}));
}
[HttpPost("check/youtube")]
public void CheckYoutube()
{
BackgroundJob.Enqueue<YoutubeWatcherJob>(x => x.InitAndExecute(new YoutubeWatcherJobOptions
{
ConnectionStrings = _connectionStrings
}));
}
}

View File

@ -0,0 +1,13 @@
-- +goose Up
-- +goose StatementBegin
SELECT 'up SQL query';
ALTER TABLE sources
ADD COLUMN YoutubeId TEXT;
-- +goose StatementEnd
-- +goose Down
-- +goose StatementBegin
SELECT 'down SQL query';
ALTER TABLE sources
DROP COLUMN YoutubeId;
-- +goose StatementEnd

View File

@ -0,0 +1,13 @@
-- +goose Up
-- +goose StatementBegin
SELECT 'up SQL query';
ALTER TABLE icons
ADD COLUMN SourceId uuid;
-- +goose StatementEnd
-- +goose Down
-- +goose StatementBegin
SELECT 'down SQL query';
ALTER TABLE icons
DROP COLUMN SourceId;
-- +goose StatementEnd

View File

@ -0,0 +1,65 @@
using System.Data;
using Dapper;
using Microsoft.Extensions.Configuration;
using Newsbot.Collector.Domain.Interfaces;
using Newsbot.Collector.Domain.Models;
using Npgsql;
namespace Newsbot.Collector.Database.Repositories;
public class IconsTable : IIconsRepository
{
private readonly string _connectionString;
public IconsTable(string connectionString)
{
_connectionString = connectionString;
}
public IconsTable(IConfiguration configuration)
{
var connstr = configuration.GetConnectionString("database");
if (connstr is null) connstr = "";
_connectionString = connstr;
}
public void New(IconModel model)
{
model.Id = Guid.NewGuid();
using var conn = OpenConnection(_connectionString);
var q = @"Insert Into icons (id, filename, site, sourceid) values (@Id,@FileName, @Site, @SourceId)";
conn.Execute(q, model);
}
public IconModel GetById(Guid id)
{
using var conn = OpenConnection(_connectionString);
var query = "Select * From icons where ID = @id Limit 1;";
var res = conn.Query<IconModel>(query, new
{
id
});
if (!res.Any()) return new IconModel();
return res.First();
}
public IconModel GetBySourceId(Guid id)
{
using var conn = OpenConnection(_connectionString);
var query = "Select * From icons where sourceid = @id Limit 1;";
var res = conn.Query<IconModel>(query, new
{
id
});
if (!res.Any()) return new IconModel();
return res.First();
}
private IDbConnection OpenConnection(string connectionString)
{
var conn = new NpgsqlConnection(_connectionString);
conn.Open();
return conn;
}
}

View File

@ -0,0 +1,16 @@
namespace Newsbot.Collector.Domain.Consts;
/// <summary>
/// This class contains the keys to find the objects in the config to load.
/// </summary>
public static class ConfigSectionsConst
{
public const string ConnectionStrings = "ConnectionStrings";
public const string FinalFantasyXiv = "FinalFantasyXiv";
public const string Reddit = "Reddit";
public const string Rss = "Rss";
public const string Twitch = "Twitch";
public const string Youtube = "Youtube";
public const string NotificationsDiscord = "Notifications:Discord";
}

View File

@ -0,0 +1,11 @@
using Newsbot.Collector.Domain.Models;
namespace Newsbot.Collector.Domain.Interfaces;
public interface IIconsRepository
{
public void New(IconModel model);
public IconModel GetById(Guid id);
public IconModel GetBySourceId(Guid id);
}

View File

@ -0,0 +1,7 @@
namespace Newsbot.Collector.Domain.Models.Config;
public class ConfigSectionConnectionStrings
{
public string? Database { get; init; }
public string? OpenTelemetry { get; init; }
}

View File

@ -0,0 +1,6 @@
namespace Newsbot.Collector.Domain.Models.Config;
public class ConfigSectionNotificationsDiscord
{
public bool IsEnabled { get; set; }
}

View File

@ -0,0 +1,9 @@
namespace Newsbot.Collector.Domain.Models.Config;
public class ConfigSectionRedditModel
{
public bool IsEnabled { get; set; }
public bool PullHot { get; set; }
public bool PullNsfw { get; set; }
public bool PullTop { get; set; }
}

View File

@ -0,0 +1,6 @@
namespace Newsbot.Collector.Domain.Models.Config;
public class ConfigSectionRssModel
{
public bool IsEnabled { get; set; }
}

View File

@ -0,0 +1,7 @@
namespace Newsbot.Collector.Domain.Models.Config;
public class ConfigSectionYoutubeModel
{
public bool IsEnabled { get; set; }
public bool DebugMode { get; set; }
}

View File

@ -0,0 +1,30 @@
using OpenQA.Selenium;
using OpenQA.Selenium.Firefox;
namespace Newsbot.Collector.Services.HtmlParser;
public class BrowserClient : IDisposable
{
private readonly IWebDriver _driver;
public BrowserClient()
{
_driver = new FirefoxDriver();
}
public void Dispose()
{
_driver.Close();
_driver.Quit();
_driver.Dispose();
}
public string GetPageSource(string url, int sleep = 5000)
{
_driver.Navigate().GoToUrl(url);
// Give the page some time to finish loading js
Thread.Sleep(sleep);
return _driver.PageSource;
}
}

View File

@ -0,0 +1,22 @@
using Serilog;
namespace Newsbot.Collector.Services.Jobs;
public static class JobLogger
{
public static ILogger GetLogger(string connectionString, string jobName)
{
if (connectionString == "")
return Log.Logger = new LoggerConfiguration().WriteTo.Console().CreateLogger();
return Log.Logger = new LoggerConfiguration()
.WriteTo.Console()
.WriteTo.OpenTelemetry(
connectionString,
resourceAttributes: new Dictionary<string, object>
{
{ "service.name", "newsbot-collector-api" },
{ "Job", jobName }
})
.CreateLogger();
}
}

View File

@ -0,0 +1,152 @@
using System.ServiceModel.Syndication;
using System.Xml;
using Newsbot.Collector.Database.Repositories;
using Newsbot.Collector.Domain.Consts;
using Newsbot.Collector.Domain.Interfaces;
using Newsbot.Collector.Domain.Models;
using Newsbot.Collector.Domain.Models.Config;
using Newsbot.Collector.Services.HtmlParser;
using Serilog;
namespace Newsbot.Collector.Services.Jobs;
public class YoutubeWatcherJobOptions
{
public ConfigSectionConnectionStrings? ConnectionStrings { get; set; }
public int SleepTimer { get; set; } = 3000;
}
public class YoutubeWatcherJob
{
private readonly YoutubeWatcherJobOptions _options;
private IArticlesRepository _articles;
private IIconsRepository _icons;
private ILogger _logger;
private IDiscordQueueRepository _queue;
private ISourcesRepository _source;
public YoutubeWatcherJob()
{
_options = new YoutubeWatcherJobOptions();
_articles = new ArticlesTable("");
_queue = new DiscordQueueTable("");
_source = new SourcesTable("");
_icons = new IconsTable("");
_logger = JobLogger.GetLogger("", "YoutubeWatcherJob");
}
public void InitAndExecute(YoutubeWatcherJobOptions options)
{
options.ConnectionStrings ??= new ConfigSectionConnectionStrings();
_articles = new ArticlesTable(options.ConnectionStrings.Database ?? "");
_queue = new DiscordQueueTable(options.ConnectionStrings.Database ?? "");
_source = new SourcesTable(options.ConnectionStrings.Database ?? "");
_icons = new IconsTable(options.ConnectionStrings.Database ?? "");
_logger = JobLogger.GetLogger(options.ConnectionStrings.OpenTelemetry ?? "", "YoutubeWatcherJob");
Execute();
}
private void Execute()
{
var videos = new List<ArticlesModel>();
var sources = _source.ListByType(SourceTypes.YouTube, 100);
foreach (var source in sources) CheckSource(source);
}
private void CheckSource(SourceModel source)
{
var channelId = "";
if (source.YoutubeId == "")
{
channelId = GetChannelId(source.Url);
_source.UpdateYoutubeId(source.ID, channelId);
}
else
{
channelId = source.YoutubeId;
}
// Make sure we have a Icon for the channel
var icon = _icons.GetBySourceId(source.ID);
if (icon.Id == Guid.Empty)
{
}
var url = $"https://www.youtube.com/feeds/videos.xml?channel_id={channelId}";
var newVideos = CheckFeed(url, source);
foreach (var video in newVideos) _articles.New(video);
}
private string GetChannelId(string url)
{
// Collect the Channel ID and store it for later.
var pageReader = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = url
});
pageReader.Parse();
var id = pageReader.Data.Header.YoutubeChannelID ?? "";
if (id == "") _logger.Error(new Exception("Unable to find the Youtube Channel ID for the requested url."), url);
return id;
}
private List<ArticlesModel> CheckFeed(string url, SourceModel source)
{
var videos = new List<ArticlesModel>();
using var reader = XmlReader.Create(url);
var feed = SyndicationFeed.Load(reader);
foreach (var post in feed.Items.ToList())
{
var articleUrl = post.Links[0].Uri.AbsoluteUri;
if (IsThisUrlKnown(articleUrl)) continue;
var videoDetails = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = articleUrl
});
videoDetails.Parse();
var article = new ArticlesModel
{
//Todo add the icon
AuthorName = post.Authors[0].Name,
Title = post.Title.Text,
Tags = FetchTags(post),
URL = articleUrl,
PubDate = post.PublishDate.DateTime,
Thumbnail = videoDetails.Data.Header.Image,
Description = videoDetails.Data.Header.Description,
SourceID = source.ID,
Video = "true"
};
videos.Add(article);
Thread.Sleep(_options.SleepTimer);
}
return videos;
}
private bool IsThisUrlKnown(string url)
{
var isKnown = _articles.GetByUrl(url);
if (isKnown.URL == url) return true;
return false;
}
private static string FetchTags(SyndicationItem post)
{
var result = "";
foreach (var tag in post.Categories) result += $"{tag.Name},";
return result;
}
}

View File

@ -0,0 +1,29 @@
using Newsbot.Collector.Services.HtmlParser;
namespace Newsbot.Collector.Tests.Services;
public class BrowserClientTests
{
[Fact]
public void LoadsBrowser()
{
using var client = new BrowserClient();
var pageSource = client.GetPageSource("https://www.google.com");
if (pageSource == "") Assert.Fail("failed to return page source");
}
[Fact]
public void CanLoadHeadersFromSource()
{
using var bClient = new BrowserClient();
var pageSource = bClient.GetPageSource("https://www.youtube.com/gamegrumps");
var hClient = new HtmlPageReader(new HtmlPageReaderOptions
{
SourceCode = pageSource
});
hClient.Parse();
if (hClient.Data.Header.YoutubeChannelID is null) Assert.Fail("Failed to find the YoutubeChannelId");
}
}

View File

@ -0,0 +1,5 @@
namespace Newsbot.Collector.Tests;
public static class TestHelper
{
}