Newsbot.Collector/Newsbot.Collector.Services/Jobs/GithubWatcherJob.cs
James Tombleson 9be985da0a
Features/adding youtube (#13)
* Found the meta tags on youtube... in the body and updated the client to pull them out.

* Updated namespace on test

* I think formatting cleaned this up

* Seed migrations have been cleaned up to get my configs out and moving them to a script.

* Updates to the ISourcesRepository.cs to allow for new calls to the db.

* formatter

* Db models updated. Icon now can track sourceID and source can have a youtube id.

* Updated api logger to ignore otel if no connection string given.

* updated docker init so I can run migrations from the image

* seed was updated to reflect the new api changes

* Updated the SourcesController.cs to grab icon data.

* Added reddit const values

* Minor changes to HtmlPageReader.cs

* Jobs are now pulling in the config section to bundle values.

* Removed youtube api, not needed anymore.

* test updates
2023-03-31 22:49:39 -07:00

115 lines
3.5 KiB
C#

using System.ServiceModel.Syndication;
using System.Xml;
using Newsbot.Collector.Database.Repositories;
using Newsbot.Collector.Domain.Consts;
using Newsbot.Collector.Domain.Interfaces;
using Newsbot.Collector.Domain.Models;
using Newsbot.Collector.Domain.Models.Config;
using Newsbot.Collector.Services.HtmlParser;
namespace Newsbot.Collector.Services.Jobs;
public class GithubWatcherJobOptions
{
public ConfigSectionConnectionStrings? ConnectionStrings { get; set; }
//public string ConnectionString { get; set; } = "";
public bool FeaturePullReleases { get; set; } = false;
public bool FeaturePullCommits { get; set; } = false;
//public bool PullIssues { get; set; } = false;
}
public class GithubWatcherJob
{
private IArticlesRepository _articles;
private IDiscordQueueRepository _queue;
private ISourcesRepository _source;
public GithubWatcherJob()
{
_articles = new ArticlesTable("");
_queue = new DiscordQueueTable("");
_source = new SourcesTable("");
}
public void InitAndExecute(GithubWatcherJobOptions options)
{
options.ConnectionStrings ??= new ConfigSectionConnectionStrings();
_articles = new ArticlesTable(options.ConnectionStrings.Database ?? "");
_queue = new DiscordQueueTable(options.ConnectionStrings.Database ?? "");
_source = new SourcesTable(options.ConnectionStrings.Database ?? "");
Execute();
}
private void Execute()
{
_source.ListBySource(SourceTypes.GitHub, 25);
// query sources for things to pull
var items = new List<ArticlesModel>();
items.AddRange(Collect(new Uri("https://github.com/jtom38/dvb")));
// query */commits/master.atom
// query */commits/main.atom
}
public List<ArticlesModel> Collect(Uri url)
{
var items = new List<ArticlesModel>();
var placeHolderId = Guid.NewGuid();
// query */release.atom
items.AddRange(CollectItems($"{url.AbsoluteUri}/releases.atom", placeHolderId));
items.AddRange(CollectItems($"{url.AbsoluteUri}/master.atom", placeHolderId));
return items;
}
private List<ArticlesModel> CollectItems(string baseUrl, Guid sourceId)
{
var items = new List<ArticlesModel>();
using var reader = XmlReader.Create(baseUrl);
var client = SyndicationFeed.Load(reader);
foreach (var item in client.Items)
{
var itemUrl = item.Links[0].Uri.AbsoluteUri;
var exits = _articles.GetByUrl(itemUrl);
if (exits.ID != Guid.Empty) continue;
var parser = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = itemUrl
});
parser.Parse();
try
{
var a = new ArticlesModel
{
SourceID = sourceId,
Tags = "github",
Title = item.Title.Text,
URL = itemUrl,
//PubDate = item.LastUpdatedTime.DateTime,
Thumbnail = parser.Data.Header.Image,
Description = $"'dvb' has released '{item.Title.Text}'!",
AuthorName = item.Authors[0].Name ?? "",
AuthorImage = item.Authors[0].Uri ?? ""
};
items.Add(a);
}
catch (Exception e)
{
Console.WriteLine(e);
}
}
return items;
}
}