Features/missing files (#14)
* Added jobs Controller to trigger collection. * Added backgroundjobs to move them out of program.cs * new column to track youtube ID values and adding a sourceid column on the icon for linking * Added icon table repo * added interface for IconsRepo * hey the missing config models * adding section const keys to pull blocks of configs * Added youtubewatcher to the code but not ready to enable it in the background. More testing needed. * Test... improvements?
This commit is contained in:
parent
9be985da0a
commit
adb4799206
26
Newsbot.Collector.Api/BackgroundJobs.cs
Normal file
26
Newsbot.Collector.Api/BackgroundJobs.cs
Normal file
@ -0,0 +1,26 @@
|
||||
using Hangfire;
|
||||
using Newsbot.Collector.Domain.Consts;
|
||||
using Newsbot.Collector.Domain.Models.Config;
|
||||
using Newsbot.Collector.Services.Jobs;
|
||||
|
||||
namespace Newsbot.Collector.Api;
|
||||
|
||||
public class BackgroundJobs
|
||||
{
|
||||
public static void SetupRecurringJobs(IConfiguration configuration)
|
||||
{
|
||||
RecurringJob.AddOrUpdate<RssWatcherJob>("RSS", x => x.InitAndExecute(new RssWatcherJobOptions
|
||||
{
|
||||
ConnectionStrings =
|
||||
configuration.GetValue<ConfigSectionConnectionStrings>(ConfigSectionsConst.ConnectionStrings),
|
||||
Config = configuration.GetValue<ConfigSectionRssModel>(ConfigSectionsConst.Rss)
|
||||
}), "15 0-23 * * *");
|
||||
|
||||
RecurringJob.AddOrUpdate<DiscordNotificationJob>("Discord Alerts", x =>
|
||||
x.InitAndExecute(new DiscordNotificationJobOptions
|
||||
{
|
||||
ConnectionStrings = configuration.GetValue<ConfigSectionConnectionStrings>(ConfigSectionsConst.ConnectionStrings),
|
||||
Config = configuration.GetValue<ConfigSectionNotificationsDiscord>(ConfigSectionsConst.NotificationsDiscord)
|
||||
}), "5/10 * * * *");
|
||||
}
|
||||
}
|
47
Newsbot.Collector.Api/Controllers/JobsController.cs
Normal file
47
Newsbot.Collector.Api/Controllers/JobsController.cs
Normal file
@ -0,0 +1,47 @@
|
||||
using Hangfire;
|
||||
using Microsoft.AspNetCore.Mvc;
|
||||
using Microsoft.Extensions.Options;
|
||||
using Newsbot.Collector.Database.Repositories;
|
||||
using Newsbot.Collector.Domain.Interfaces;
|
||||
using Newsbot.Collector.Domain.Models.Config;
|
||||
using Newsbot.Collector.Services.Jobs;
|
||||
|
||||
namespace Newsbot.Collector.Api.Controllers;
|
||||
|
||||
[ApiController]
|
||||
[Route("api/jobs")]
|
||||
public class JobsController
|
||||
{
|
||||
private readonly ConfigSectionConnectionStrings _connectionStrings;
|
||||
private readonly ConfigSectionRssModel _rssConfig;
|
||||
private readonly ILogger<SourcesController> _logger;
|
||||
private readonly ISourcesRepository _sources;
|
||||
|
||||
public JobsController(ILogger<SourcesController> logger, IOptions<ConfigSectionConnectionStrings> connectionStrings,
|
||||
IOptions<ConfigSectionRssModel> rss)
|
||||
{
|
||||
_logger = logger;
|
||||
_connectionStrings = connectionStrings.Value;
|
||||
_rssConfig = rss.Value;
|
||||
_sources = new SourcesTable(connectionStrings.Value.Database ?? "");
|
||||
}
|
||||
|
||||
[HttpPost("check/rss")]
|
||||
public void CheckReddit()
|
||||
{
|
||||
BackgroundJob.Enqueue<RssWatcherJob>(x => x.InitAndExecute(new RssWatcherJobOptions
|
||||
{
|
||||
ConnectionStrings = _connectionStrings,
|
||||
Config = _rssConfig
|
||||
}));
|
||||
}
|
||||
|
||||
[HttpPost("check/youtube")]
|
||||
public void CheckYoutube()
|
||||
{
|
||||
BackgroundJob.Enqueue<YoutubeWatcherJob>(x => x.InitAndExecute(new YoutubeWatcherJobOptions
|
||||
{
|
||||
ConnectionStrings = _connectionStrings
|
||||
}));
|
||||
}
|
||||
}
|
@ -0,0 +1,13 @@
|
||||
-- +goose Up
|
||||
-- +goose StatementBegin
|
||||
SELECT 'up SQL query';
|
||||
ALTER TABLE sources
|
||||
ADD COLUMN YoutubeId TEXT;
|
||||
-- +goose StatementEnd
|
||||
|
||||
-- +goose Down
|
||||
-- +goose StatementBegin
|
||||
SELECT 'down SQL query';
|
||||
ALTER TABLE sources
|
||||
DROP COLUMN YoutubeId;
|
||||
-- +goose StatementEnd
|
@ -0,0 +1,13 @@
|
||||
-- +goose Up
|
||||
-- +goose StatementBegin
|
||||
SELECT 'up SQL query';
|
||||
ALTER TABLE icons
|
||||
ADD COLUMN SourceId uuid;
|
||||
-- +goose StatementEnd
|
||||
|
||||
-- +goose Down
|
||||
-- +goose StatementBegin
|
||||
SELECT 'down SQL query';
|
||||
ALTER TABLE icons
|
||||
DROP COLUMN SourceId;
|
||||
-- +goose StatementEnd
|
65
Newsbot.Collector.Database/Repositories/IconsTable.cs
Normal file
65
Newsbot.Collector.Database/Repositories/IconsTable.cs
Normal file
@ -0,0 +1,65 @@
|
||||
using System.Data;
|
||||
using Dapper;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Newsbot.Collector.Domain.Interfaces;
|
||||
using Newsbot.Collector.Domain.Models;
|
||||
using Npgsql;
|
||||
|
||||
namespace Newsbot.Collector.Database.Repositories;
|
||||
|
||||
public class IconsTable : IIconsRepository
|
||||
{
|
||||
private readonly string _connectionString;
|
||||
|
||||
public IconsTable(string connectionString)
|
||||
{
|
||||
_connectionString = connectionString;
|
||||
}
|
||||
|
||||
public IconsTable(IConfiguration configuration)
|
||||
{
|
||||
var connstr = configuration.GetConnectionString("database");
|
||||
if (connstr is null) connstr = "";
|
||||
_connectionString = connstr;
|
||||
}
|
||||
|
||||
public void New(IconModel model)
|
||||
{
|
||||
model.Id = Guid.NewGuid();
|
||||
|
||||
using var conn = OpenConnection(_connectionString);
|
||||
var q = @"Insert Into icons (id, filename, site, sourceid) values (@Id,@FileName, @Site, @SourceId)";
|
||||
conn.Execute(q, model);
|
||||
}
|
||||
|
||||
public IconModel GetById(Guid id)
|
||||
{
|
||||
using var conn = OpenConnection(_connectionString);
|
||||
var query = "Select * From icons where ID = @id Limit 1;";
|
||||
var res = conn.Query<IconModel>(query, new
|
||||
{
|
||||
id
|
||||
});
|
||||
if (!res.Any()) return new IconModel();
|
||||
return res.First();
|
||||
}
|
||||
|
||||
public IconModel GetBySourceId(Guid id)
|
||||
{
|
||||
using var conn = OpenConnection(_connectionString);
|
||||
var query = "Select * From icons where sourceid = @id Limit 1;";
|
||||
var res = conn.Query<IconModel>(query, new
|
||||
{
|
||||
id
|
||||
});
|
||||
if (!res.Any()) return new IconModel();
|
||||
return res.First();
|
||||
}
|
||||
|
||||
private IDbConnection OpenConnection(string connectionString)
|
||||
{
|
||||
var conn = new NpgsqlConnection(_connectionString);
|
||||
conn.Open();
|
||||
return conn;
|
||||
}
|
||||
}
|
16
Newsbot.Collector.Domain/Consts/ConfigSectionsConst.cs
Normal file
16
Newsbot.Collector.Domain/Consts/ConfigSectionsConst.cs
Normal file
@ -0,0 +1,16 @@
|
||||
namespace Newsbot.Collector.Domain.Consts;
|
||||
|
||||
/// <summary>
|
||||
/// This class contains the keys to find the objects in the config to load.
|
||||
/// </summary>
|
||||
public static class ConfigSectionsConst
|
||||
{
|
||||
public const string ConnectionStrings = "ConnectionStrings";
|
||||
public const string FinalFantasyXiv = "FinalFantasyXiv";
|
||||
public const string Reddit = "Reddit";
|
||||
public const string Rss = "Rss";
|
||||
public const string Twitch = "Twitch";
|
||||
public const string Youtube = "Youtube";
|
||||
|
||||
public const string NotificationsDiscord = "Notifications:Discord";
|
||||
}
|
11
Newsbot.Collector.Domain/Interfaces/IIconsRepository.cs
Normal file
11
Newsbot.Collector.Domain/Interfaces/IIconsRepository.cs
Normal file
@ -0,0 +1,11 @@
|
||||
using Newsbot.Collector.Domain.Models;
|
||||
|
||||
namespace Newsbot.Collector.Domain.Interfaces;
|
||||
|
||||
public interface IIconsRepository
|
||||
{
|
||||
public void New(IconModel model);
|
||||
|
||||
public IconModel GetById(Guid id);
|
||||
public IconModel GetBySourceId(Guid id);
|
||||
}
|
@ -0,0 +1,7 @@
|
||||
namespace Newsbot.Collector.Domain.Models.Config;
|
||||
|
||||
public class ConfigSectionConnectionStrings
|
||||
{
|
||||
public string? Database { get; init; }
|
||||
public string? OpenTelemetry { get; init; }
|
||||
}
|
@ -0,0 +1,6 @@
|
||||
namespace Newsbot.Collector.Domain.Models.Config;
|
||||
|
||||
public class ConfigSectionNotificationsDiscord
|
||||
{
|
||||
public bool IsEnabled { get; set; }
|
||||
}
|
@ -0,0 +1,9 @@
|
||||
namespace Newsbot.Collector.Domain.Models.Config;
|
||||
|
||||
public class ConfigSectionRedditModel
|
||||
{
|
||||
public bool IsEnabled { get; set; }
|
||||
public bool PullHot { get; set; }
|
||||
public bool PullNsfw { get; set; }
|
||||
public bool PullTop { get; set; }
|
||||
}
|
@ -0,0 +1,6 @@
|
||||
namespace Newsbot.Collector.Domain.Models.Config;
|
||||
|
||||
public class ConfigSectionRssModel
|
||||
{
|
||||
public bool IsEnabled { get; set; }
|
||||
}
|
@ -0,0 +1,7 @@
|
||||
namespace Newsbot.Collector.Domain.Models.Config;
|
||||
|
||||
public class ConfigSectionYoutubeModel
|
||||
{
|
||||
public bool IsEnabled { get; set; }
|
||||
public bool DebugMode { get; set; }
|
||||
}
|
30
Newsbot.Collector.Services/HtmlParser/BrowserClient.cs
Normal file
30
Newsbot.Collector.Services/HtmlParser/BrowserClient.cs
Normal file
@ -0,0 +1,30 @@
|
||||
using OpenQA.Selenium;
|
||||
using OpenQA.Selenium.Firefox;
|
||||
|
||||
namespace Newsbot.Collector.Services.HtmlParser;
|
||||
|
||||
public class BrowserClient : IDisposable
|
||||
{
|
||||
private readonly IWebDriver _driver;
|
||||
|
||||
public BrowserClient()
|
||||
{
|
||||
_driver = new FirefoxDriver();
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_driver.Close();
|
||||
_driver.Quit();
|
||||
_driver.Dispose();
|
||||
}
|
||||
|
||||
public string GetPageSource(string url, int sleep = 5000)
|
||||
{
|
||||
_driver.Navigate().GoToUrl(url);
|
||||
|
||||
// Give the page some time to finish loading js
|
||||
Thread.Sleep(sleep);
|
||||
return _driver.PageSource;
|
||||
}
|
||||
}
|
22
Newsbot.Collector.Services/Jobs/JobLogger.cs
Normal file
22
Newsbot.Collector.Services/Jobs/JobLogger.cs
Normal file
@ -0,0 +1,22 @@
|
||||
using Serilog;
|
||||
|
||||
namespace Newsbot.Collector.Services.Jobs;
|
||||
|
||||
public static class JobLogger
|
||||
{
|
||||
public static ILogger GetLogger(string connectionString, string jobName)
|
||||
{
|
||||
if (connectionString == "")
|
||||
return Log.Logger = new LoggerConfiguration().WriteTo.Console().CreateLogger();
|
||||
return Log.Logger = new LoggerConfiguration()
|
||||
.WriteTo.Console()
|
||||
.WriteTo.OpenTelemetry(
|
||||
connectionString,
|
||||
resourceAttributes: new Dictionary<string, object>
|
||||
{
|
||||
{ "service.name", "newsbot-collector-api" },
|
||||
{ "Job", jobName }
|
||||
})
|
||||
.CreateLogger();
|
||||
}
|
||||
}
|
152
Newsbot.Collector.Services/Jobs/YoutubeWatcherJob.cs
Normal file
152
Newsbot.Collector.Services/Jobs/YoutubeWatcherJob.cs
Normal file
@ -0,0 +1,152 @@
|
||||
using System.ServiceModel.Syndication;
|
||||
using System.Xml;
|
||||
using Newsbot.Collector.Database.Repositories;
|
||||
using Newsbot.Collector.Domain.Consts;
|
||||
using Newsbot.Collector.Domain.Interfaces;
|
||||
using Newsbot.Collector.Domain.Models;
|
||||
using Newsbot.Collector.Domain.Models.Config;
|
||||
using Newsbot.Collector.Services.HtmlParser;
|
||||
using Serilog;
|
||||
|
||||
namespace Newsbot.Collector.Services.Jobs;
|
||||
|
||||
public class YoutubeWatcherJobOptions
|
||||
{
|
||||
public ConfigSectionConnectionStrings? ConnectionStrings { get; set; }
|
||||
public int SleepTimer { get; set; } = 3000;
|
||||
}
|
||||
|
||||
public class YoutubeWatcherJob
|
||||
{
|
||||
private readonly YoutubeWatcherJobOptions _options;
|
||||
private IArticlesRepository _articles;
|
||||
private IIconsRepository _icons;
|
||||
private ILogger _logger;
|
||||
private IDiscordQueueRepository _queue;
|
||||
private ISourcesRepository _source;
|
||||
|
||||
public YoutubeWatcherJob()
|
||||
{
|
||||
_options = new YoutubeWatcherJobOptions();
|
||||
_articles = new ArticlesTable("");
|
||||
_queue = new DiscordQueueTable("");
|
||||
_source = new SourcesTable("");
|
||||
_icons = new IconsTable("");
|
||||
_logger = JobLogger.GetLogger("", "YoutubeWatcherJob");
|
||||
}
|
||||
|
||||
public void InitAndExecute(YoutubeWatcherJobOptions options)
|
||||
{
|
||||
options.ConnectionStrings ??= new ConfigSectionConnectionStrings();
|
||||
|
||||
_articles = new ArticlesTable(options.ConnectionStrings.Database ?? "");
|
||||
_queue = new DiscordQueueTable(options.ConnectionStrings.Database ?? "");
|
||||
_source = new SourcesTable(options.ConnectionStrings.Database ?? "");
|
||||
_icons = new IconsTable(options.ConnectionStrings.Database ?? "");
|
||||
_logger = JobLogger.GetLogger(options.ConnectionStrings.OpenTelemetry ?? "", "YoutubeWatcherJob");
|
||||
|
||||
Execute();
|
||||
}
|
||||
|
||||
private void Execute()
|
||||
{
|
||||
var videos = new List<ArticlesModel>();
|
||||
|
||||
var sources = _source.ListByType(SourceTypes.YouTube, 100);
|
||||
|
||||
foreach (var source in sources) CheckSource(source);
|
||||
}
|
||||
|
||||
private void CheckSource(SourceModel source)
|
||||
{
|
||||
var channelId = "";
|
||||
|
||||
if (source.YoutubeId == "")
|
||||
{
|
||||
channelId = GetChannelId(source.Url);
|
||||
_source.UpdateYoutubeId(source.ID, channelId);
|
||||
}
|
||||
else
|
||||
{
|
||||
channelId = source.YoutubeId;
|
||||
}
|
||||
|
||||
// Make sure we have a Icon for the channel
|
||||
var icon = _icons.GetBySourceId(source.ID);
|
||||
if (icon.Id == Guid.Empty)
|
||||
{
|
||||
}
|
||||
|
||||
var url = $"https://www.youtube.com/feeds/videos.xml?channel_id={channelId}";
|
||||
|
||||
var newVideos = CheckFeed(url, source);
|
||||
foreach (var video in newVideos) _articles.New(video);
|
||||
}
|
||||
|
||||
private string GetChannelId(string url)
|
||||
{
|
||||
// Collect the Channel ID and store it for later.
|
||||
var pageReader = new HtmlPageReader(new HtmlPageReaderOptions
|
||||
{
|
||||
Url = url
|
||||
});
|
||||
pageReader.Parse();
|
||||
|
||||
var id = pageReader.Data.Header.YoutubeChannelID ?? "";
|
||||
if (id == "") _logger.Error(new Exception("Unable to find the Youtube Channel ID for the requested url."), url);
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
private List<ArticlesModel> CheckFeed(string url, SourceModel source)
|
||||
{
|
||||
var videos = new List<ArticlesModel>();
|
||||
|
||||
using var reader = XmlReader.Create(url);
|
||||
var feed = SyndicationFeed.Load(reader);
|
||||
foreach (var post in feed.Items.ToList())
|
||||
{
|
||||
var articleUrl = post.Links[0].Uri.AbsoluteUri;
|
||||
if (IsThisUrlKnown(articleUrl)) continue;
|
||||
|
||||
var videoDetails = new HtmlPageReader(new HtmlPageReaderOptions
|
||||
{
|
||||
Url = articleUrl
|
||||
});
|
||||
videoDetails.Parse();
|
||||
|
||||
var article = new ArticlesModel
|
||||
{
|
||||
//Todo add the icon
|
||||
AuthorName = post.Authors[0].Name,
|
||||
Title = post.Title.Text,
|
||||
Tags = FetchTags(post),
|
||||
URL = articleUrl,
|
||||
PubDate = post.PublishDate.DateTime,
|
||||
Thumbnail = videoDetails.Data.Header.Image,
|
||||
Description = videoDetails.Data.Header.Description,
|
||||
SourceID = source.ID,
|
||||
Video = "true"
|
||||
};
|
||||
|
||||
videos.Add(article);
|
||||
Thread.Sleep(_options.SleepTimer);
|
||||
}
|
||||
|
||||
return videos;
|
||||
}
|
||||
|
||||
private bool IsThisUrlKnown(string url)
|
||||
{
|
||||
var isKnown = _articles.GetByUrl(url);
|
||||
if (isKnown.URL == url) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
private static string FetchTags(SyndicationItem post)
|
||||
{
|
||||
var result = "";
|
||||
foreach (var tag in post.Categories) result += $"{tag.Name},";
|
||||
return result;
|
||||
}
|
||||
}
|
29
Newsbot.Collector.Tests/Services/BrowserClientTests.cs
Normal file
29
Newsbot.Collector.Tests/Services/BrowserClientTests.cs
Normal file
@ -0,0 +1,29 @@
|
||||
using Newsbot.Collector.Services.HtmlParser;
|
||||
|
||||
namespace Newsbot.Collector.Tests.Services;
|
||||
|
||||
public class BrowserClientTests
|
||||
{
|
||||
[Fact]
|
||||
public void LoadsBrowser()
|
||||
{
|
||||
using var client = new BrowserClient();
|
||||
var pageSource = client.GetPageSource("https://www.google.com");
|
||||
if (pageSource == "") Assert.Fail("failed to return page source");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CanLoadHeadersFromSource()
|
||||
{
|
||||
using var bClient = new BrowserClient();
|
||||
var pageSource = bClient.GetPageSource("https://www.youtube.com/gamegrumps");
|
||||
|
||||
var hClient = new HtmlPageReader(new HtmlPageReaderOptions
|
||||
{
|
||||
SourceCode = pageSource
|
||||
});
|
||||
hClient.Parse();
|
||||
|
||||
if (hClient.Data.Header.YoutubeChannelID is null) Assert.Fail("Failed to find the YoutubeChannelId");
|
||||
}
|
||||
}
|
5
Newsbot.Collector.Tests/TestHelper.cs
Normal file
5
Newsbot.Collector.Tests/TestHelper.cs
Normal file
@ -0,0 +1,5 @@
|
||||
namespace Newsbot.Collector.Tests;
|
||||
|
||||
public static class TestHelper
|
||||
{
|
||||
}
|
Loading…
Reference in New Issue
Block a user