Newsbot.Collector/Newsbot.Collector.Services/Jobs/YoutubeWatcherJob.cs
James Tombleson adb4799206
Features/missing files (#14)
* Added jobs Controller to trigger collection.

* Added backgroundjobs to move them out of program.cs

* new column to track youtube ID values and adding a sourceid column on the icon for linking

* Added icon table repo

* added interface for IconsRepo

* hey the missing config models

* adding section const keys to pull blocks of configs

* Added youtubewatcher to the code but not ready to enable it in the background.  More testing needed.

* Test... improvements?
2023-03-31 23:00:15 -07:00

152 lines
4.6 KiB
C#

using System.ServiceModel.Syndication;
using System.Xml;
using Newsbot.Collector.Database.Repositories;
using Newsbot.Collector.Domain.Consts;
using Newsbot.Collector.Domain.Interfaces;
using Newsbot.Collector.Domain.Models;
using Newsbot.Collector.Domain.Models.Config;
using Newsbot.Collector.Services.HtmlParser;
using Serilog;
namespace Newsbot.Collector.Services.Jobs;
public class YoutubeWatcherJobOptions
{
public ConfigSectionConnectionStrings? ConnectionStrings { get; set; }
public int SleepTimer { get; set; } = 3000;
}
public class YoutubeWatcherJob
{
private readonly YoutubeWatcherJobOptions _options;
private IArticlesRepository _articles;
private IIconsRepository _icons;
private ILogger _logger;
private IDiscordQueueRepository _queue;
private ISourcesRepository _source;
public YoutubeWatcherJob()
{
_options = new YoutubeWatcherJobOptions();
_articles = new ArticlesTable("");
_queue = new DiscordQueueTable("");
_source = new SourcesTable("");
_icons = new IconsTable("");
_logger = JobLogger.GetLogger("", "YoutubeWatcherJob");
}
public void InitAndExecute(YoutubeWatcherJobOptions options)
{
options.ConnectionStrings ??= new ConfigSectionConnectionStrings();
_articles = new ArticlesTable(options.ConnectionStrings.Database ?? "");
_queue = new DiscordQueueTable(options.ConnectionStrings.Database ?? "");
_source = new SourcesTable(options.ConnectionStrings.Database ?? "");
_icons = new IconsTable(options.ConnectionStrings.Database ?? "");
_logger = JobLogger.GetLogger(options.ConnectionStrings.OpenTelemetry ?? "", "YoutubeWatcherJob");
Execute();
}
private void Execute()
{
var videos = new List<ArticlesModel>();
var sources = _source.ListByType(SourceTypes.YouTube, 100);
foreach (var source in sources) CheckSource(source);
}
private void CheckSource(SourceModel source)
{
var channelId = "";
if (source.YoutubeId == "")
{
channelId = GetChannelId(source.Url);
_source.UpdateYoutubeId(source.ID, channelId);
}
else
{
channelId = source.YoutubeId;
}
// Make sure we have a Icon for the channel
var icon = _icons.GetBySourceId(source.ID);
if (icon.Id == Guid.Empty)
{
}
var url = $"https://www.youtube.com/feeds/videos.xml?channel_id={channelId}";
var newVideos = CheckFeed(url, source);
foreach (var video in newVideos) _articles.New(video);
}
private string GetChannelId(string url)
{
// Collect the Channel ID and store it for later.
var pageReader = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = url
});
pageReader.Parse();
var id = pageReader.Data.Header.YoutubeChannelID ?? "";
if (id == "") _logger.Error(new Exception("Unable to find the Youtube Channel ID for the requested url."), url);
return id;
}
private List<ArticlesModel> CheckFeed(string url, SourceModel source)
{
var videos = new List<ArticlesModel>();
using var reader = XmlReader.Create(url);
var feed = SyndicationFeed.Load(reader);
foreach (var post in feed.Items.ToList())
{
var articleUrl = post.Links[0].Uri.AbsoluteUri;
if (IsThisUrlKnown(articleUrl)) continue;
var videoDetails = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = articleUrl
});
videoDetails.Parse();
var article = new ArticlesModel
{
//Todo add the icon
AuthorName = post.Authors[0].Name,
Title = post.Title.Text,
Tags = FetchTags(post),
URL = articleUrl,
PubDate = post.PublishDate.DateTime,
Thumbnail = videoDetails.Data.Header.Image,
Description = videoDetails.Data.Header.Description,
SourceID = source.ID,
Video = "true"
};
videos.Add(article);
Thread.Sleep(_options.SleepTimer);
}
return videos;
}
private bool IsThisUrlKnown(string url)
{
var isKnown = _articles.GetByUrl(url);
if (isKnown.URL == url) return true;
return false;
}
private static string FetchTags(SyndicationItem post)
{
var result = "";
foreach (var tag in post.Categories) result += $"{tag.Name},";
return result;
}
}