Newsbot.Collector/Newsbot.Collector.Services/Jobs/CodeProjectWatcherJob.cs

195 lines
6.4 KiB
C#

using System.Collections;
using System.ServiceModel.Syndication;
using System.Xml;
using Newsbot.Collector.Database.Repositories;
using Newsbot.Collector.Domain.Consts;
using Newsbot.Collector.Domain.Entities;
using Newsbot.Collector.Domain.Interfaces;
using Newsbot.Collector.Domain.Models;
using Newsbot.Collector.Domain.Models.Config;
using Newsbot.Collector.Services.HtmlParser;
using Serilog;
namespace Newsbot.Collector.Services.Jobs;
public class CodeProjectWatcherJobOptions
{
public ConfigSectionConnectionStrings? ConnectionStrings { get; set; }
//public string ConnectionString { get; set; } = "";
public bool FeaturePullReleases { get; set; } = false;
public bool FeaturePullCommits { get; set; } = false;
//public bool PullIssues { get; set; } = false;
}
public class CodeProjectWatcherJob
{
private const string JobName = "CodeProjectWatcher";
private IArticlesRepository _articles;
private ILogger _logger;
private IDiscordQueueRepository _queue;
private ISourcesRepository _source;
private IAuthorTable _author;
public CodeProjectWatcherJob()
{
_articles = new ArticlesTable("");
_queue = new DiscordQueueTable("");
_source = new SourcesTable("");
_logger = JobLogger.GetLogger("", JobName);
_author = new AuthorsTable("");
}
public CodeProjectWatcherJob(CodeProjectWatcherJobOptions options)
{
options.ConnectionStrings ??= new ConfigSectionConnectionStrings();
_articles = new ArticlesTable(options.ConnectionStrings.Database ?? "");
_author = new AuthorsTable(options.ConnectionStrings.Database ?? "");
_queue = new DiscordQueueTable(options.ConnectionStrings.Database ?? "");
_source = new SourcesTable(options.ConnectionStrings.Database ?? "");
_logger = JobLogger.GetLogger(options.ConnectionStrings.OpenTelemetry ?? "", JobName);
}
public void InitAndExecute(CodeProjectWatcherJobOptions options)
{
options.ConnectionStrings ??= new ConfigSectionConnectionStrings();
_articles = new ArticlesTable(options.ConnectionStrings.Database ?? "");
_author = new AuthorsTable(options.ConnectionStrings.Database ?? "");
_queue = new DiscordQueueTable(options.ConnectionStrings.Database ?? "");
_source = new SourcesTable(options.ConnectionStrings.Database ?? "");
_logger = JobLogger.GetLogger(options.ConnectionStrings.OpenTelemetry ?? "", JobName);
Execute();
}
private void Execute()
{
var sources = _source.ListByType(SourceTypes.CodeProject, 0, 100);
// query sources for things to pull
var items = new List<ArticlesEntity>();
foreach (var source in sources)
{
items.AddRange(CheckForReleases(source));
//items.AddRange(CheckForCommits(source));
}
foreach (var item in items)
{
_articles.New(item);
_queue.New(new DiscordQueueEntity()
{
ArticleId = item.Id
});
}
}
public IEnumerable<ArticlesEntity> CheckForReleases(SourceEntity source)
{
var url = new Uri(source.Url);
var links = new List<string>
{
$"{url.AbsoluteUri}/releases.atom",
$"{url.AbsoluteUri}/tags.atom" //github converts tags as releases
};
foreach (var link in links)
try
{
using var reader = XmlReader.Create(link);
var client = SyndicationFeed.Load(reader);
return ProcessFeed(client.Items, source, true, false);
//if (link.EndsWith("tags.atom"))
//{
// return ProcessFeed(client.Items, source, false, true, false);
//}
}
catch
{
_logger.Debug("{JobName} - Does not respond to {UrlAbsoluteUri}. Might not have anything", JobName,
url.AbsoluteUri);
}
return new List<ArticlesEntity>();
}
public IEnumerable<ArticlesEntity> CheckForCommits(SourceEntity source)
{
var url = new Uri(source.Url);
var links = new List<string>
{
$"{url.AbsoluteUri}/commits/main.atom",
$"{url.AbsoluteUri}/commits/master.atom"
};
foreach (var link in links)
try
{
using var reader = XmlReader.Create(link);
var client = SyndicationFeed.Load(reader);
return ProcessFeed(client.Items, source, false, true);
}
catch
{
_logger.Debug("{JobName} - Does not respond to {UrlAbsoluteUri}. Might not have anything", JobName,
url.AbsoluteUri);
}
return new List<ArticlesEntity>();
}
private IEnumerable<ArticlesEntity> ProcessFeed(IEnumerable<SyndicationItem> feed, SourceEntity source,
bool isRelease, bool isCommit)
{
var items = new List<ArticlesEntity>();
foreach (var item in feed)
{
var itemUrl = item.Links[0].Uri.AbsoluteUri;
var exits = _articles.GetByUrl(itemUrl);
if (exits.Id != Guid.Empty) continue;
var parser = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = itemUrl
});
parser.Parse();
if (item.Authors[0].Name is null)
{
_logger.Warning("Author was missing from the record and will continue with a missing author");
}
var authorExists = _author.CreateIfMissingAsync(new AuthorEntity
{
Name = item.Authors[0].Name,
SourceId = source.Id,
Image = "",
});
authorExists.Wait();
var a = new ArticlesEntity
{
SourceId = source.Id,
AuthorId = authorExists.Result.Id,
Tags = source.Tags,
Title = item.Title.Text,
Url = itemUrl,
PubDate = item.LastUpdatedTime.DateTime.ToUniversalTime(),
Thumbnail = parser.Data.Header.Image,
Description = item.Title.Text,
CodeIsRelease = isRelease,
CodeIsCommit = isCommit,
};
items.Add(a);
}
return items;
}
}