2023-03-31 23:00:15 -07:00
|
|
|
using System.ServiceModel.Syndication;
|
|
|
|
using System.Xml;
|
|
|
|
using Newsbot.Collector.Database.Repositories;
|
|
|
|
using Newsbot.Collector.Domain.Consts;
|
2023-06-23 20:09:57 -07:00
|
|
|
using Newsbot.Collector.Domain.Entities;
|
2023-03-31 23:00:15 -07:00
|
|
|
using Newsbot.Collector.Domain.Interfaces;
|
|
|
|
using Newsbot.Collector.Domain.Models;
|
|
|
|
using Newsbot.Collector.Services.HtmlParser;
|
|
|
|
using Serilog;
|
|
|
|
|
|
|
|
namespace Newsbot.Collector.Services.Jobs;
|
|
|
|
|
|
|
|
public class YoutubeWatcherJobOptions
|
|
|
|
{
|
2023-04-08 09:30:59 -07:00
|
|
|
public string? DatabaseConnectionString { get; set; }
|
|
|
|
public string? OpenTelemetryConnectionString { get; set; }
|
|
|
|
|
2023-03-31 23:00:15 -07:00
|
|
|
public int SleepTimer { get; set; } = 3000;
|
2023-04-08 09:30:59 -07:00
|
|
|
public bool IsEnabled { get; set; } = true;
|
2023-03-31 23:00:15 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
public class YoutubeWatcherJob
|
|
|
|
{
|
2023-04-08 09:30:59 -07:00
|
|
|
private const string JobName = "YoutubeWatcherJob";
|
|
|
|
|
2023-03-31 23:00:15 -07:00
|
|
|
private readonly YoutubeWatcherJobOptions _options;
|
|
|
|
private IArticlesRepository _articles;
|
|
|
|
private IIconsRepository _icons;
|
|
|
|
private ILogger _logger;
|
|
|
|
private IDiscordQueueRepository _queue;
|
|
|
|
private ISourcesRepository _source;
|
|
|
|
|
|
|
|
public YoutubeWatcherJob()
|
|
|
|
{
|
|
|
|
_options = new YoutubeWatcherJobOptions();
|
|
|
|
_articles = new ArticlesTable("");
|
|
|
|
_queue = new DiscordQueueTable("");
|
|
|
|
_source = new SourcesTable("");
|
|
|
|
_icons = new IconsTable("");
|
2023-04-08 09:30:59 -07:00
|
|
|
_logger = JobLogger.GetLogger("", JobName);
|
2023-03-31 23:00:15 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
public void InitAndExecute(YoutubeWatcherJobOptions options)
|
|
|
|
{
|
2023-04-08 09:30:59 -07:00
|
|
|
_articles = new ArticlesTable(options.DatabaseConnectionString ?? "");
|
|
|
|
_queue = new DiscordQueueTable(options.DatabaseConnectionString ?? "");
|
|
|
|
_source = new SourcesTable(options.DatabaseConnectionString ?? "");
|
|
|
|
_icons = new IconsTable(options.DatabaseConnectionString ?? "");
|
|
|
|
_logger = JobLogger.GetLogger(options.OpenTelemetryConnectionString ?? "", JobName);
|
2023-03-31 23:00:15 -07:00
|
|
|
|
|
|
|
Execute();
|
|
|
|
}
|
|
|
|
|
|
|
|
private void Execute()
|
|
|
|
{
|
|
|
|
var sources = _source.ListByType(SourceTypes.YouTube, 100);
|
|
|
|
|
2023-04-08 09:30:59 -07:00
|
|
|
foreach (var source in sources)
|
2023-03-31 23:00:15 -07:00
|
|
|
{
|
2023-04-08 09:30:59 -07:00
|
|
|
if (!source.Enabled)
|
|
|
|
{
|
|
|
|
_logger.Debug($"{JobName} - {source.Name} was disabled and will be skipped.");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
var channelId = source.YoutubeId;
|
|
|
|
if (channelId == "")
|
|
|
|
{
|
|
|
|
channelId = GetChannelId(source.Url);
|
|
|
|
_source.UpdateYoutubeId(source.ID, channelId);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Make sure we have a Icon for the channel
|
|
|
|
var icon = _icons.GetBySourceId(source.ID);
|
|
|
|
if (icon.Id == Guid.Empty) Console.WriteLine("I was triggered :V");
|
|
|
|
|
|
|
|
_logger.Information($"{JobName} - Checking '{source.Name}'");
|
|
|
|
var url = $"https://www.youtube.com/feeds/videos.xml?channel_id={channelId}";
|
|
|
|
|
|
|
|
var newVideos = CheckFeed(url, source);
|
|
|
|
_logger.Debug($"{JobName} - Collected {newVideos.Count} new videos");
|
|
|
|
foreach (var video in newVideos)
|
|
|
|
{
|
|
|
|
_logger.Debug($"{JobName} - {video.AuthorName} '{video.Title}' was found");
|
|
|
|
_articles.New(video);
|
2023-06-23 20:19:09 -07:00
|
|
|
_queue.New(new DiscordQueueEntity
|
2023-04-08 09:30:59 -07:00
|
|
|
{
|
2023-06-23 20:19:09 -07:00
|
|
|
ArticleId = video.Id
|
2023-04-08 09:30:59 -07:00
|
|
|
});
|
|
|
|
}
|
2023-03-31 23:00:15 -07:00
|
|
|
}
|
2023-04-08 09:30:59 -07:00
|
|
|
|
|
|
|
_logger.Information($"{JobName} - Done");
|
2023-03-31 23:00:15 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
private string GetChannelId(string url)
|
|
|
|
{
|
|
|
|
// Collect the Channel ID and store it for later.
|
|
|
|
var pageReader = new HtmlPageReader(new HtmlPageReaderOptions
|
|
|
|
{
|
|
|
|
Url = url
|
|
|
|
});
|
|
|
|
pageReader.Parse();
|
|
|
|
|
|
|
|
var id = pageReader.Data.Header.YoutubeChannelID ?? "";
|
2023-04-08 09:30:59 -07:00
|
|
|
if (id == "")
|
|
|
|
_logger.Error(new Exception($"{JobName} - Unable to find the Youtube Channel ID for the requested url."),
|
|
|
|
url);
|
2023-03-31 23:00:15 -07:00
|
|
|
|
|
|
|
return id;
|
|
|
|
}
|
|
|
|
|
2023-06-23 20:09:57 -07:00
|
|
|
private List<ArticlesEntity> CheckFeed(string url, SourceModel source)
|
2023-03-31 23:00:15 -07:00
|
|
|
{
|
2023-06-23 20:09:57 -07:00
|
|
|
var videos = new List<ArticlesEntity>();
|
2023-03-31 23:00:15 -07:00
|
|
|
|
|
|
|
using var reader = XmlReader.Create(url);
|
|
|
|
var feed = SyndicationFeed.Load(reader);
|
|
|
|
foreach (var post in feed.Items.ToList())
|
|
|
|
{
|
|
|
|
var articleUrl = post.Links[0].Uri.AbsoluteUri;
|
|
|
|
if (IsThisUrlKnown(articleUrl)) continue;
|
|
|
|
|
|
|
|
var videoDetails = new HtmlPageReader(new HtmlPageReaderOptions
|
|
|
|
{
|
|
|
|
Url = articleUrl
|
|
|
|
});
|
|
|
|
videoDetails.Parse();
|
|
|
|
|
2023-06-23 20:09:57 -07:00
|
|
|
var article = new ArticlesEntity
|
2023-03-31 23:00:15 -07:00
|
|
|
{
|
|
|
|
//Todo add the icon
|
|
|
|
AuthorName = post.Authors[0].Name,
|
|
|
|
Title = post.Title.Text,
|
|
|
|
Tags = FetchTags(post),
|
2023-06-23 20:09:57 -07:00
|
|
|
Url = articleUrl,
|
2023-03-31 23:00:15 -07:00
|
|
|
PubDate = post.PublishDate.DateTime,
|
|
|
|
Thumbnail = videoDetails.Data.Header.Image,
|
|
|
|
Description = videoDetails.Data.Header.Description,
|
2023-06-23 20:09:57 -07:00
|
|
|
SourceId = source.ID,
|
2023-03-31 23:00:15 -07:00
|
|
|
Video = "true"
|
|
|
|
};
|
|
|
|
|
|
|
|
videos.Add(article);
|
2023-04-08 09:30:59 -07:00
|
|
|
|
2023-03-31 23:00:15 -07:00
|
|
|
Thread.Sleep(_options.SleepTimer);
|
|
|
|
}
|
|
|
|
|
|
|
|
return videos;
|
|
|
|
}
|
|
|
|
|
|
|
|
private bool IsThisUrlKnown(string url)
|
|
|
|
{
|
|
|
|
var isKnown = _articles.GetByUrl(url);
|
2023-06-23 20:09:57 -07:00
|
|
|
if (isKnown.Url == url) return true;
|
2023-03-31 23:00:15 -07:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
private static string FetchTags(SyndicationItem post)
|
|
|
|
{
|
|
|
|
var result = "";
|
|
|
|
foreach (var tag in post.Categories) result += $"{tag.Name},";
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
}
|