152 lines
4.6 KiB
C#
152 lines
4.6 KiB
C#
|
using System.ServiceModel.Syndication;
|
||
|
using System.Xml;
|
||
|
using Newsbot.Collector.Database.Repositories;
|
||
|
using Newsbot.Collector.Domain.Consts;
|
||
|
using Newsbot.Collector.Domain.Interfaces;
|
||
|
using Newsbot.Collector.Domain.Models;
|
||
|
using Newsbot.Collector.Domain.Models.Config;
|
||
|
using Newsbot.Collector.Services.HtmlParser;
|
||
|
using Serilog;
|
||
|
|
||
|
namespace Newsbot.Collector.Services.Jobs;
|
||
|
|
||
|
public class YoutubeWatcherJobOptions
|
||
|
{
|
||
|
public ConfigSectionConnectionStrings? ConnectionStrings { get; set; }
|
||
|
public int SleepTimer { get; set; } = 3000;
|
||
|
}
|
||
|
|
||
|
public class YoutubeWatcherJob
|
||
|
{
|
||
|
private readonly YoutubeWatcherJobOptions _options;
|
||
|
private IArticlesRepository _articles;
|
||
|
private IIconsRepository _icons;
|
||
|
private ILogger _logger;
|
||
|
private IDiscordQueueRepository _queue;
|
||
|
private ISourcesRepository _source;
|
||
|
|
||
|
public YoutubeWatcherJob()
|
||
|
{
|
||
|
_options = new YoutubeWatcherJobOptions();
|
||
|
_articles = new ArticlesTable("");
|
||
|
_queue = new DiscordQueueTable("");
|
||
|
_source = new SourcesTable("");
|
||
|
_icons = new IconsTable("");
|
||
|
_logger = JobLogger.GetLogger("", "YoutubeWatcherJob");
|
||
|
}
|
||
|
|
||
|
public void InitAndExecute(YoutubeWatcherJobOptions options)
|
||
|
{
|
||
|
options.ConnectionStrings ??= new ConfigSectionConnectionStrings();
|
||
|
|
||
|
_articles = new ArticlesTable(options.ConnectionStrings.Database ?? "");
|
||
|
_queue = new DiscordQueueTable(options.ConnectionStrings.Database ?? "");
|
||
|
_source = new SourcesTable(options.ConnectionStrings.Database ?? "");
|
||
|
_icons = new IconsTable(options.ConnectionStrings.Database ?? "");
|
||
|
_logger = JobLogger.GetLogger(options.ConnectionStrings.OpenTelemetry ?? "", "YoutubeWatcherJob");
|
||
|
|
||
|
Execute();
|
||
|
}
|
||
|
|
||
|
private void Execute()
|
||
|
{
|
||
|
var videos = new List<ArticlesModel>();
|
||
|
|
||
|
var sources = _source.ListByType(SourceTypes.YouTube, 100);
|
||
|
|
||
|
foreach (var source in sources) CheckSource(source);
|
||
|
}
|
||
|
|
||
|
private void CheckSource(SourceModel source)
|
||
|
{
|
||
|
var channelId = "";
|
||
|
|
||
|
if (source.YoutubeId == "")
|
||
|
{
|
||
|
channelId = GetChannelId(source.Url);
|
||
|
_source.UpdateYoutubeId(source.ID, channelId);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
channelId = source.YoutubeId;
|
||
|
}
|
||
|
|
||
|
// Make sure we have a Icon for the channel
|
||
|
var icon = _icons.GetBySourceId(source.ID);
|
||
|
if (icon.Id == Guid.Empty)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
var url = $"https://www.youtube.com/feeds/videos.xml?channel_id={channelId}";
|
||
|
|
||
|
var newVideos = CheckFeed(url, source);
|
||
|
foreach (var video in newVideos) _articles.New(video);
|
||
|
}
|
||
|
|
||
|
private string GetChannelId(string url)
|
||
|
{
|
||
|
// Collect the Channel ID and store it for later.
|
||
|
var pageReader = new HtmlPageReader(new HtmlPageReaderOptions
|
||
|
{
|
||
|
Url = url
|
||
|
});
|
||
|
pageReader.Parse();
|
||
|
|
||
|
var id = pageReader.Data.Header.YoutubeChannelID ?? "";
|
||
|
if (id == "") _logger.Error(new Exception("Unable to find the Youtube Channel ID for the requested url."), url);
|
||
|
|
||
|
return id;
|
||
|
}
|
||
|
|
||
|
private List<ArticlesModel> CheckFeed(string url, SourceModel source)
|
||
|
{
|
||
|
var videos = new List<ArticlesModel>();
|
||
|
|
||
|
using var reader = XmlReader.Create(url);
|
||
|
var feed = SyndicationFeed.Load(reader);
|
||
|
foreach (var post in feed.Items.ToList())
|
||
|
{
|
||
|
var articleUrl = post.Links[0].Uri.AbsoluteUri;
|
||
|
if (IsThisUrlKnown(articleUrl)) continue;
|
||
|
|
||
|
var videoDetails = new HtmlPageReader(new HtmlPageReaderOptions
|
||
|
{
|
||
|
Url = articleUrl
|
||
|
});
|
||
|
videoDetails.Parse();
|
||
|
|
||
|
var article = new ArticlesModel
|
||
|
{
|
||
|
//Todo add the icon
|
||
|
AuthorName = post.Authors[0].Name,
|
||
|
Title = post.Title.Text,
|
||
|
Tags = FetchTags(post),
|
||
|
URL = articleUrl,
|
||
|
PubDate = post.PublishDate.DateTime,
|
||
|
Thumbnail = videoDetails.Data.Header.Image,
|
||
|
Description = videoDetails.Data.Header.Description,
|
||
|
SourceID = source.ID,
|
||
|
Video = "true"
|
||
|
};
|
||
|
|
||
|
videos.Add(article);
|
||
|
Thread.Sleep(_options.SleepTimer);
|
||
|
}
|
||
|
|
||
|
return videos;
|
||
|
}
|
||
|
|
||
|
private bool IsThisUrlKnown(string url)
|
||
|
{
|
||
|
var isKnown = _articles.GetByUrl(url);
|
||
|
if (isKnown.URL == url) return true;
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
private static string FetchTags(SyndicationItem post)
|
||
|
{
|
||
|
var result = "";
|
||
|
foreach (var tag in post.Categories) result += $"{tag.Name},";
|
||
|
return result;
|
||
|
}
|
||
|
}
|