Newsbot.Collector/Newsbot.Collector.Services/Jobs/YoutubeWatcherJob.cs

165 lines
5.2 KiB
C#
Raw Normal View History

using System.ServiceModel.Syndication;
using System.Xml;
using Newsbot.Collector.Database.Repositories;
using Newsbot.Collector.Domain.Consts;
2023-06-23 20:09:57 -07:00
using Newsbot.Collector.Domain.Entities;
using Newsbot.Collector.Domain.Interfaces;
using Newsbot.Collector.Domain.Models;
using Newsbot.Collector.Services.HtmlParser;
using Serilog;
namespace Newsbot.Collector.Services.Jobs;
public class YoutubeWatcherJobOptions
{
public string? DatabaseConnectionString { get; set; }
public string? OpenTelemetryConnectionString { get; set; }
public int SleepTimer { get; set; } = 3000;
public bool IsEnabled { get; set; } = true;
}
public class YoutubeWatcherJob
{
private const string JobName = "YoutubeWatcherJob";
private readonly YoutubeWatcherJobOptions _options;
private IArticlesRepository _articles;
private IIconsRepository _icons;
private ILogger _logger;
private IDiscordQueueRepository _queue;
private ISourcesRepository _source;
public YoutubeWatcherJob()
{
_options = new YoutubeWatcherJobOptions();
_articles = new ArticlesTable("");
_queue = new DiscordQueueTable("");
_source = new SourcesTable("");
_icons = new IconsTable("");
_logger = JobLogger.GetLogger("", JobName);
}
public void InitAndExecute(YoutubeWatcherJobOptions options)
{
_articles = new ArticlesTable(options.DatabaseConnectionString ?? "");
_queue = new DiscordQueueTable(options.DatabaseConnectionString ?? "");
_source = new SourcesTable(options.DatabaseConnectionString ?? "");
_icons = new IconsTable(options.DatabaseConnectionString ?? "");
_logger = JobLogger.GetLogger(options.OpenTelemetryConnectionString ?? "", JobName);
Execute();
}
private void Execute()
{
var sources = _source.ListByType(SourceTypes.YouTube, 100);
foreach (var source in sources)
{
if (!source.Enabled)
{
_logger.Debug($"{JobName} - {source.Name} was disabled and will be skipped.");
continue;
}
var channelId = source.YoutubeId;
if (channelId == "")
{
channelId = GetChannelId(source.Url);
_source.UpdateYoutubeId(source.ID, channelId);
}
// Make sure we have a Icon for the channel
var icon = _icons.GetBySourceId(source.ID);
if (icon.Id == Guid.Empty) Console.WriteLine("I was triggered :V");
_logger.Information($"{JobName} - Checking '{source.Name}'");
var url = $"https://www.youtube.com/feeds/videos.xml?channel_id={channelId}";
var newVideos = CheckFeed(url, source);
_logger.Debug($"{JobName} - Collected {newVideos.Count} new videos");
foreach (var video in newVideos)
{
_logger.Debug($"{JobName} - {video.AuthorName} '{video.Title}' was found");
_articles.New(video);
2023-06-23 20:19:09 -07:00
_queue.New(new DiscordQueueEntity
{
2023-06-23 20:19:09 -07:00
ArticleId = video.Id
});
}
}
_logger.Information($"{JobName} - Done");
}
private string GetChannelId(string url)
{
// Collect the Channel ID and store it for later.
var pageReader = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = url
});
pageReader.Parse();
var id = pageReader.Data.Header.YoutubeChannelID ?? "";
if (id == "")
_logger.Error(new Exception($"{JobName} - Unable to find the Youtube Channel ID for the requested url."),
url);
return id;
}
2023-06-23 20:09:57 -07:00
private List<ArticlesEntity> CheckFeed(string url, SourceModel source)
{
2023-06-23 20:09:57 -07:00
var videos = new List<ArticlesEntity>();
using var reader = XmlReader.Create(url);
var feed = SyndicationFeed.Load(reader);
foreach (var post in feed.Items.ToList())
{
var articleUrl = post.Links[0].Uri.AbsoluteUri;
if (IsThisUrlKnown(articleUrl)) continue;
var videoDetails = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = articleUrl
});
videoDetails.Parse();
2023-06-23 20:09:57 -07:00
var article = new ArticlesEntity
{
//Todo add the icon
AuthorName = post.Authors[0].Name,
Title = post.Title.Text,
Tags = FetchTags(post),
2023-06-23 20:09:57 -07:00
Url = articleUrl,
PubDate = post.PublishDate.DateTime,
Thumbnail = videoDetails.Data.Header.Image,
Description = videoDetails.Data.Header.Description,
2023-06-23 20:09:57 -07:00
SourceId = source.ID,
Video = "true"
};
videos.Add(article);
Thread.Sleep(_options.SleepTimer);
}
return videos;
}
private bool IsThisUrlKnown(string url)
{
var isKnown = _articles.GetByUrl(url);
2023-06-23 20:09:57 -07:00
if (isKnown.Url == url) return true;
return false;
}
private static string FetchTags(SyndicationItem post)
{
var result = "";
foreach (var tag in post.Categories) result += $"{tag.Name},";
return result;
}
}