Newsbot.Collector/Newsbot.Collector.Services/Jobs/RssWatcherJob.cs

66 lines
1.6 KiB
C#
Raw Normal View History

using System.ServiceModel.Syndication;
using System.Xml;
using Newsbot.Collector.Domain.Interfaces;
using Newsbot.Collector.Domain.Models;
namespace Newsbot.Collector.Services.Jobs;
public class RssWatcherJob : ICollector
{
private string? _url;
public RssWatcherJob(string url)
{
_url = url;
}
public List<ArticlesModel> Collect()
{
var CollectedPosts = new List<ArticlesModel>();
if (_url is null)
{
_url = "";
}
using var reader = XmlReader.Create(_url);
var feed = SyndicationFeed.Load(reader);
var posts = feed.Items.ToList();
foreach (var post in posts)
{
var url = post.Links[0].Uri.AbsoluteUri;
// Check if we have seen the url before
// If we have, skip and save the site bandwidth
var meta = new HtmlPageReader(url);
var article = new ArticlesModel
{
Title = post.Title.Text,
Tags = FetchTags(post),
URL = post.Links[0].Uri.ToString(),
PubDate = post.PublishDate.DateTime,
Thumbnail = meta.Data.Header.Meta.Image,
Description = meta.Data.Header.Meta.Description,
};
CollectedPosts.Add(article);
// try to not be too greedy
Thread.Sleep(3000);
}
return CollectedPosts;
}
private string FetchTags(SyndicationItem post)
{
string result = "";
foreach (var tag in post.Categories)
{
result += $"{tag.Name},";
}
return result;
}
}