diff --git a/.vscode/settings.json b/.vscode/settings.json
index 1c6dc21..b285125 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,6 +1,8 @@
{
"files.exclude": {
"**/obj": true,
- "**/bin": true
- }
+ "**/bin": false
+ },
+ "csharp.inlayHints.types.enabled": true,
+ "omnisharp.enableImportCompletion": true
}
\ No newline at end of file
diff --git a/Newsbot.Collector.Api/Newsbot.Collector.Api.csproj b/Newsbot.Collector.Api/Newsbot.Collector.Api.csproj
index d2595e3..0c9a799 100644
--- a/Newsbot.Collector.Api/Newsbot.Collector.Api.csproj
+++ b/Newsbot.Collector.Api/Newsbot.Collector.Api.csproj
@@ -16,6 +16,7 @@
+
diff --git a/Newsbot.Collector.Api/Program.cs b/Newsbot.Collector.Api/Program.cs
index 7656abc..3f0af26 100644
--- a/Newsbot.Collector.Api/Program.cs
+++ b/Newsbot.Collector.Api/Program.cs
@@ -1,6 +1,6 @@
using Hangfire;
using Hangfire.MemoryStorage;
-using Newsbot.Collector.Services;
+using Newsbot.Collector.Services.Jobs;
using Newsbot.Collector.Domain.Models;
var builder = WebApplication.CreateBuilder(args);
@@ -35,7 +35,7 @@ if (app.Environment.IsDevelopment())
app.UseHttpsRedirection();
app.UseHangfireDashboard();
-//RecurringJob.AddOrUpdate()
+RecurringJob.AddOrUpdate("Example", x => x.Execute(), "0/2 * * * *");
app.UseAuthorization();
diff --git a/Newsbot.Collector.Database/Newsbot.Collector.Database.csproj b/Newsbot.Collector.Database/Newsbot.Collector.Database.csproj
index a58561a..876eb77 100644
--- a/Newsbot.Collector.Database/Newsbot.Collector.Database.csproj
+++ b/Newsbot.Collector.Database/Newsbot.Collector.Database.csproj
@@ -4,6 +4,11 @@
+
+
+
+
+
net7.0
enable
diff --git a/Newsbot.Collector.Database/Repositories/ArticlesTable.cs b/Newsbot.Collector.Database/Repositories/ArticlesTable.cs
new file mode 100644
index 0000000..a7a0344
--- /dev/null
+++ b/Newsbot.Collector.Database/Repositories/ArticlesTable.cs
@@ -0,0 +1,79 @@
+using System.Data;
+using Dapper;
+using Newsbot.Collector.Domain.Models;
+using Npgsql;
+
+namespace Newsbot.Collector.Database.Repositories;
+
+public class ArticlesTable
+{
+
+ private string _connectionString;
+
+ public ArticlesTable(string connectionString)
+ {
+ _connectionString = connectionString;
+ }
+
+ public static IDbConnection OpenConnection(string connectionString)
+ {
+ var cs = "Host=localhost;Username=postgres;Password=postgres;Database=postgres;sslmode=disable";
+ var conn = new NpgsqlConnection(cs);
+ conn.Open();
+ return conn;
+ }
+
+ public List List(int Page = 0, int Count = 25)
+ {
+ using var conn = OpenConnection(_connectionString);
+ var res = conn.Query(@"select * from articles
+ Order By PubDate Desc
+ Offset @Page
+ Fetch Next @Count Rows Only", new { Page = Page * Count, Count = Count }).ToList();
+ return res;
+ }
+
+ public ArticlesModel GetById(Guid ID)
+ {
+ using var conn = OpenConnection(_connectionString);
+ var res = conn.Query("select * from articles where ID = @ID", new { ID = ID });
+ return res.First();
+ }
+
+ public ArticlesModel GetByUrl(string url)
+ {
+ using var conn = OpenConnection(_connectionString);
+ var res = conn.Query("select * from articles where Url = @Url Limit 1", new { Url = url });
+ return res.First();
+ }
+
+ public void New(ArticlesModel model)
+ {
+ model.ID = Guid.NewGuid();
+
+ using var conn = OpenConnection(_connectionString);
+ var q = @"INSERT INTO Articles
+ (ID, SourceId, Tags, Title, Url, PubDate, Video, VideoHeight, VideoWidth, Thumbnail, Description, AuthorName, AuthorImage)
+ Values
+ (@Id, @SourceId, @Tags, @Title, @Url, @PubDate, @Video, @VideoHeight, @VideoWidth, @Thumbnail, @Description, @AuthorName, @AuthorImage);
+ ";
+ var res = conn.Execute(q, model);
+ //new{
+ // Id = Guid.NewGuid(),
+ // SourceId = model.SourceID,
+ // Tags = model.Tags,
+ // Title = model.Title,
+ // Url = model.URL,
+ // PubDate = model.PubDate,
+ // Video = model.Video,
+ // VideoHeight = model.VideoHeight,
+ // VideoWidth = model.VideoWidth,
+ // Thumbnail = model.Thumbnail,
+ // Description = model.Description,
+ // AuthorName = model.AuthorName,
+ // AuthorImage = model.AuthorImage
+ //});
+ Console.WriteLine(res);
+ }
+
+}
\ No newline at end of file
diff --git a/Newsbot.Collector.Database/Repositories/SettingsTable.cs b/Newsbot.Collector.Database/Repositories/SettingsTable.cs
new file mode 100644
index 0000000..1cd784f
--- /dev/null
+++ b/Newsbot.Collector.Database/Repositories/SettingsTable.cs
@@ -0,0 +1,34 @@
+using System.Data;
+using Dapper;
+using Newsbot.Collector.Domain.Models;
+using Npgsql;
+
+namespace Newsbot.Collector.Database.Repositories;
+
+public class SettingsTable
+{
+
+ private string _connectionString;
+
+ public SettingsTable(string connectionString)
+ {
+ _connectionString = connectionString;
+ }
+
+ public static IDbConnection OpenConnection(string connectionString)
+ {
+ var cs = "Host=localhost;Username=postgres;Password=postgres;Database=postgres;sslmode=disable";
+ var conn = new NpgsqlConnection(cs);
+ conn.Open();
+ return conn;
+ }
+
+ public void New(SettingModel model)
+ {
+ model.ID = Guid.NewGuid();
+
+ using var conn = OpenConnection(_connectionString);
+ var q = @"Insert Into Settings (ID, Key, Value, OPTIONS) Values (@ID,@Key,@Value,@Options)";
+ conn.Execute(q, model);
+ }
+}
\ No newline at end of file
diff --git a/Newsbot.Collector.Domain/Interfaces/ICollector.cs b/Newsbot.Collector.Domain/Interfaces/ICollector.cs
index 8a6dd43..bca8bfd 100644
--- a/Newsbot.Collector.Domain/Interfaces/ICollector.cs
+++ b/Newsbot.Collector.Domain/Interfaces/ICollector.cs
@@ -1,6 +1,8 @@
+using Newsbot.Collector.Domain.Models;
+
namespace Newsbot.Collector.Domain.Interfaces;
public interface ICollector
{
- void Collect();
+ List Collect();
}
\ No newline at end of file
diff --git a/Newsbot.Collector.Domain/Models/DatabaseModel.cs b/Newsbot.Collector.Domain/Models/DatabaseModel.cs
index 1f32a86..c3af5cb 100644
--- a/Newsbot.Collector.Domain/Models/DatabaseModel.cs
+++ b/Newsbot.Collector.Domain/Models/DatabaseModel.cs
@@ -5,6 +5,7 @@ public class ArticlesModel
public Guid ID { get; set; }
public Guid SourceID { get; set; }
public string Tags { get; set; } = "";
+ public string Title { get; set; } = "";
public string URL { get; set; } = "";
public DateTime PubDate { get; set; }
public string Video { get; set; } = "";
@@ -72,5 +73,5 @@ public class SubscriptionModel
{
public Guid ID { get; set; }
public Guid DiscordWebHookID { get; set; }
- public Guid SourceID { get; set;}
+ public Guid SourceID { get; set; }
}
\ No newline at end of file
diff --git a/Newsbot.Collector.Services/EnvLoader.cs b/Newsbot.Collector.Services/EnvLoader.cs
deleted file mode 100644
index 2dd8c49..0000000
--- a/Newsbot.Collector.Services/EnvLoader.cs
+++ /dev/null
@@ -1,77 +0,0 @@
-using Newsbot.Collector.Domain.Models;
-
-namespace Newsbot.Collector.Services;
-
-public static class EnvLoader
-{
-
- public static ConfigModel Load()
- {
- var reddit = new RedditConfigModel
- {
- IsEnabled = Bool("FEATURE_ENABLE_REDDIT_BACKEND"),
- PullHot = Bool("REDDIT_PULL_HOT"),
- PullNsfw = Bool("REDDIT_PULL_NSFW"),
- PullTop = Bool("REDDIT_PULL_TOP")
- };
-
- return new ConfigModel
- {
- ServerAddress = String("SERVER_ADDRESS"),
- SqlConnectionString = String("SQL_CONNECTION_STRING"),
- Reddit = reddit,
- };
- }
-
- public static void LoadEnvFile()
- {
- var curDir = Directory.GetCurrentDirectory();
- var filePath = Path.Combine(curDir, ".env");
-
- if (!File.Exists(filePath))
- return;
-
- foreach (var line in File.ReadAllLines(filePath))
- {
- var parts = line.Split('=', StringSplitOptions.RemoveEmptyEntries);
-
- if (parts.Length != 2)
- continue;
-
- if (parts[1].Contains("'") == true ){
- parts[1] = parts[1].Replace("'", "");
- }
-
- Environment.SetEnvironmentVariable(parts[0], parts[1]);
- }
-}
-
-private static string String(string Key)
-{
- var result = Environment.GetEnvironmentVariable(Key);
- if (result is null)
- {
- return "";
- }
-
- return result;
-}
-
-private static bool Bool(string Key)
-{
- var result = String(Key);
- if (result == "")
- {
- return false;
- }
-
- if (result.ToLower() == "true")
- {
- return true;
- }
- else
- {
- return false;
- }
-}
-}
\ No newline at end of file
diff --git a/Newsbot.Collector.Services/HtmlMeta.cs b/Newsbot.Collector.Services/HtmlMeta.cs
new file mode 100644
index 0000000..79b3c8d
--- /dev/null
+++ b/Newsbot.Collector.Services/HtmlMeta.cs
@@ -0,0 +1,129 @@
+using System.Data;
+using System.Runtime.Serialization;
+using System.Xml;
+using HtmlAgilityPack;
+
+namespace Newsbot.Collector.Services;
+
+public class HtmlData
+{
+ public HtmlHeaderData Header { get; set; } = new HtmlHeaderData();
+}
+
+public class HtmlHeaderData
+{
+ public HtmlMetaData Meta { get; set; } = new HtmlMetaData();
+}
+
+public class HtmlMetaData
+{
+ public string Title { get; set; } = "";
+ public string Description { get; set; } = "";
+ public string Image { get; set; } = "";
+ public string Url { get; set; } = "";
+ public string PageType { get; set; } = "";
+ //public string Color { get; set; }
+}
+
+public class HtmlPageReader
+{
+
+ public HtmlData Data { get; set; }
+
+ private const string XPathMetaTag = "//head/meta";
+
+ private string _siteContent;
+
+ public HtmlPageReader(string pageUrl)
+ {
+ _siteContent = ReadSiteContent(pageUrl);
+ var tags = CollectMetaTags();
+
+ Data = new HtmlData();
+ Data.Header.Meta.Title = GetMetaTitle();
+ Data.Header.Meta.Description = GetDescription();
+ Data.Header.Meta.Image = GetImage();
+ Data.Header.Meta.Url = GetUrl();
+ Data.Header.Meta.PageType = GetPageType();
+ }
+
+ private string ReadSiteContent(string url)
+ {
+ using var client = new HttpClient();
+ var html = client.GetStringAsync(url);
+ html.Wait();
+
+ var content = html.Result.ToString();
+ return content;
+ }
+
+ private List CollectMetaTags()
+ {
+ var htmlDoc = new HtmlDocument();
+ htmlDoc.LoadHtml(_siteContent);
+
+ var tags = htmlDoc.DocumentNode.SelectNodes(XPathMetaTag).ToList();
+
+ return tags;
+ }
+
+ public string GetTagValue(string Tag)
+ {
+ var tags = CollectMetaTags();
+
+ foreach (var meta in tags)
+ {
+ //Console.WriteLine($"Name={meta.Attributes[0].Name} & Value={meta.Attributes[0].Value}");
+ if (meta.Attributes[0].Value.Contains(Tag) == false)
+ {
+ continue;
+ }
+ return meta.Attributes[1].Value;
+ }
+ return "";
+ }
+
+ private string FindFirstResult(string[] tags)
+ {
+ foreach (var tag in tags)
+ {
+ var res = GetTagValue(tag);
+ if (res == "")
+ {
+ continue;
+ }
+ return res;
+ }
+ return "";
+ }
+
+ public string GetMetaTitle()
+ {
+ string[] tags = new string[] { "og:title", "twitter:title", "title" };
+ return FindFirstResult(tags);
+ }
+
+ public string GetDescription()
+ {
+ string[] tags = new string[] { "description", "og:description" };
+ return FindFirstResult(tags);
+ }
+
+ public string GetImage()
+ {
+ string[] tags = new string[] { "image", "og:image", "twitter:image" };
+ return FindFirstResult(tags);
+ }
+
+ public string GetUrl()
+ {
+ string[] tags = new string[] { "url", "og:url", "twitter:url" };
+ return FindFirstResult(tags);
+ }
+
+ public string GetPageType()
+ {
+ string[] tags = new string[] { "og:type", "type" };
+ return FindFirstResult(tags);
+ }
+}
\ No newline at end of file
diff --git a/Newsbot.Collector.Services/Jobs/Factory.cs b/Newsbot.Collector.Services/Jobs/Factory.cs
deleted file mode 100644
index e69de29..0000000
diff --git a/Newsbot.Collector.Services/Jobs/HelloWorldJob.cs b/Newsbot.Collector.Services/Jobs/HelloWorldJob.cs
index 8ff4d9d..e305568 100644
--- a/Newsbot.Collector.Services/Jobs/HelloWorldJob.cs
+++ b/Newsbot.Collector.Services/Jobs/HelloWorldJob.cs
@@ -4,13 +4,18 @@ namespace Newsbot.Collector.Services.Jobs;
public class HelloWorldJob
{
- public readonly string _message;
+ public string _message { get; set; }
public HelloWorldJob(string message)
{
_message = message;
}
+ public void SetMessage(string message)
+ {
+ _message = message;
+ }
+
public void Execute()
{
Console.WriteLine(_message);
diff --git a/Newsbot.Collector.Services/Jobs/RssWatcherJob.cs b/Newsbot.Collector.Services/Jobs/RssWatcherJob.cs
new file mode 100644
index 0000000..fcbbc74
--- /dev/null
+++ b/Newsbot.Collector.Services/Jobs/RssWatcherJob.cs
@@ -0,0 +1,66 @@
+using System.ServiceModel.Syndication;
+using System.Xml;
+using Newsbot.Collector.Domain.Interfaces;
+using Newsbot.Collector.Domain.Models;
+
+namespace Newsbot.Collector.Services.Jobs;
+
+public class RssWatcherJob : ICollector
+{
+
+ private string? _url;
+
+ public RssWatcherJob(string url)
+ {
+ _url = url;
+ }
+
+ public List Collect()
+ {
+ var CollectedPosts = new List();
+
+ if (_url is null)
+ {
+ _url = "";
+ }
+
+ using var reader = XmlReader.Create(_url);
+ var feed = SyndicationFeed.Load(reader);
+ var posts = feed.Items.ToList();
+
+ foreach (var post in posts)
+ {
+ var url = post.Links[0].Uri.AbsoluteUri;
+
+ // Check if we have seen the url before
+ // If we have, skip and save the site bandwidth
+
+ var meta = new HtmlPageReader(url);
+
+ var article = new ArticlesModel
+ {
+ Title = post.Title.Text,
+ Tags = FetchTags(post),
+ URL = post.Links[0].Uri.ToString(),
+ PubDate = post.PublishDate.DateTime,
+ Thumbnail = meta.Data.Header.Meta.Image,
+ Description = meta.Data.Header.Meta.Description,
+ };
+ CollectedPosts.Add(article);
+
+ // try to not be too greedy
+ Thread.Sleep(3000);
+ }
+ return CollectedPosts;
+ }
+
+ private string FetchTags(SyndicationItem post)
+ {
+ string result = "";
+ foreach (var tag in post.Categories)
+ {
+ result += $"{tag.Name},";
+ }
+ return result;
+ }
+}
\ No newline at end of file
diff --git a/Newsbot.Collector.Services/Newsbot.Collector.Services.csproj b/Newsbot.Collector.Services/Newsbot.Collector.Services.csproj
index f793c4e..36f4513 100644
--- a/Newsbot.Collector.Services/Newsbot.Collector.Services.csproj
+++ b/Newsbot.Collector.Services/Newsbot.Collector.Services.csproj
@@ -2,10 +2,12 @@
+
-
+
+
diff --git a/Newsbot.Collector.Tests/Jobs/RssWatcherJobTest.cs b/Newsbot.Collector.Tests/Jobs/RssWatcherJobTest.cs
new file mode 100644
index 0000000..1f24394
--- /dev/null
+++ b/Newsbot.Collector.Tests/Jobs/RssWatcherJobTest.cs
@@ -0,0 +1,14 @@
+using Newsbot.Collector.Services.Jobs;
+
+namespace Newsbot.Collector.Tests.Jobs;
+
+public class RssWatcherJobTest
+{
+ [Fact]
+ public void CanFindItems()
+ {
+ var url = "https://www.engadget.com/rss.xml";
+ var client = new RssWatcherJob(url);
+ var items = client.Collect();
+ }
+}
\ No newline at end of file
diff --git a/Newsbot.Collector.Tests/Newsbot.Collector.Tests.csproj b/Newsbot.Collector.Tests/Newsbot.Collector.Tests.csproj
index 86a36ef..759ae6c 100644
--- a/Newsbot.Collector.Tests/Newsbot.Collector.Tests.csproj
+++ b/Newsbot.Collector.Tests/Newsbot.Collector.Tests.csproj
@@ -21,4 +21,9 @@
+
+
+
+
+
diff --git a/Newsbot.Collector.Tests/Tables/ArticlesTableTests.cs b/Newsbot.Collector.Tests/Tables/ArticlesTableTests.cs
new file mode 100644
index 0000000..80de049
--- /dev/null
+++ b/Newsbot.Collector.Tests/Tables/ArticlesTableTests.cs
@@ -0,0 +1,40 @@
+using Newsbot.Collector.Database.Repositories;
+using Newsbot.Collector.Domain.Models;
+
+namespace Newsbot.Collector.Tests.Tables;
+
+public class ArticlesTableTests
+{
+
+ [Fact]
+ public void ArticlesListTest()
+ {
+ var client = new ArticlesTable("");
+ client.List();
+ }
+
+ [Fact]
+ public void GetByIDTest()
+ {
+ var uid = Guid.Parse("4ac46772-253c-4c3d-8a2c-29239abd2ad4");
+
+ var client = new ArticlesTable("");
+ var res = client.GetById(uid);
+ if (!res.ID.Equals(uid))
+ {
+ Assert.Fail("Incorrect record or not found");
+ }
+ }
+
+ [Fact]
+ public void NewRecordTest()
+ {
+ var client = new ArticlesTable("");
+ client.New(new ArticlesModel
+ {
+ Title = "Unit Testing!",
+ SourceID = Guid.NewGuid(),
+ PubDate = DateTime.Now
+ });
+ }
+}
\ No newline at end of file
diff --git a/Newsbot.Collector.Tests/Tables/SettingsTableTests.cs b/Newsbot.Collector.Tests/Tables/SettingsTableTests.cs
new file mode 100644
index 0000000..1f49ee5
--- /dev/null
+++ b/Newsbot.Collector.Tests/Tables/SettingsTableTests.cs
@@ -0,0 +1,19 @@
+using Newsbot.Collector.Database.Repositories;
+using Newsbot.Collector.Domain.Models;
+
+namespace Newsbot.Collector.Tests.Tables;
+
+public class SettingsTableTests
+{
+ [Fact]
+ public void New()
+ {
+ var client = new SettingsTable("");
+ client.New(new SettingModel
+ {
+ Key = "Unit Testing",
+ Value = "Unit",
+ Options = ""
+ });
+ }
+}
\ No newline at end of file