using System.ServiceModel.Syndication; using System.Xml; using Newsbot.Collector.Domain.Interfaces; using Newsbot.Collector.Domain.Models; namespace Newsbot.Collector.Services.Jobs; public class RssWatcherJob : ICollector { private string? _url; public RssWatcherJob(string url) { _url = url; } public List Collect() { var CollectedPosts = new List(); if (_url is null) { _url = ""; } using var reader = XmlReader.Create(_url); var feed = SyndicationFeed.Load(reader); var posts = feed.Items.ToList(); foreach (var post in posts) { var url = post.Links[0].Uri.AbsoluteUri; // Check if we have seen the url before // If we have, skip and save the site bandwidth var meta = new HtmlPageReader(url); var article = new ArticlesModel { Title = post.Title.Text, Tags = FetchTags(post), URL = post.Links[0].Uri.ToString(), PubDate = post.PublishDate.DateTime, Thumbnail = meta.Data.Header.Meta.Image, Description = meta.Data.Header.Meta.Description, }; CollectedPosts.Add(article); // try to not be too greedy Thread.Sleep(3000); } return CollectedPosts; } private string FetchTags(SyndicationItem post) { string result = ""; foreach (var tag in post.Categories) { result += $"{tag.Name},"; } return result; } }