Features/adding youtube (#13)

* Found the meta tags on youtube... in the body and updated the client to pull them out.

* Updated namespace on test

* I think formatting cleaned this up

* Seed migrations have been cleaned up to get my configs out and moving them to a script.

* Updates to the ISourcesRepository.cs to allow for new calls to the db.

* formatter

* Db models updated. Icon now can track sourceID and source can have a youtube id.

* Updated api logger to ignore otel if no connection string given.

* updated docker init so I can run migrations from the image

* seed was updated to reflect the new api changes

* Updated the SourcesController.cs to grab icon data.

* Added reddit const values

* Minor changes to HtmlPageReader.cs

* Jobs are now pulling in the config section to bundle values.

* Removed youtube api, not needed anymore.

* test updates
This commit is contained in:
James Tombleson 2023-03-31 22:49:39 -07:00 committed by GitHub
parent ac6bdaa184
commit 9be985da0a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 471 additions and 351 deletions

View File

@ -31,4 +31,4 @@ COPY --from=publish /app/build /app
COPY --from=build ./app/Newsbot.Collector.Database/Migrations/ /app/migrations
COPY --from=goose /go/bin/goose /app
ENTRYPOINT [ "dotnet", "Newsbot.Collector.Api.dll" ]
CMD [ "dotnet", "Newsbot.Collector.Api.dll" ]

View File

@ -5,6 +5,7 @@ using Newsbot.Collector.Domain.Consts;
using Newsbot.Collector.Domain.Dto;
using Newsbot.Collector.Domain.Interfaces;
using Newsbot.Collector.Domain.Models;
using Newsbot.Collector.Services.HtmlParser;
namespace Newsbot.Collector.Api.Controllers;
@ -12,6 +13,7 @@ namespace Newsbot.Collector.Api.Controllers;
[Route("api/sources")]
public class SourcesController : ControllerBase
{
private readonly IIconsRepository _icons;
private readonly ILogger<SourcesController> _logger;
//private readonly ConnectionStrings _settings;
@ -22,6 +24,7 @@ public class SourcesController : ControllerBase
_logger = logger;
//_settings = settings.Value;
_sources = new SourcesTable(settings.Value.Database);
_icons = new IconsTable(settings.Value.Database);
}
[HttpGet(Name = "GetSources")]
@ -43,11 +46,19 @@ public class SourcesController : ControllerBase
}
[HttpPost("new/reddit")]
public SourceDto NewReddit(string name, string url)
public SourceDto NewReddit(string name)
{
var res = _sources.GetByNameAndType(name, SourceTypes.Reddit);
if (res.ID != Guid.Empty) return SourceDto.Convert(res);
var uri = new Uri($"https://reddit.com/r/{name}");
var pageReader = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = uri.ToString()
});
pageReader.Parse();
var item = _sources.New(new SourceModel
{
Site = SourceTypes.Reddit,
@ -55,9 +66,18 @@ public class SourcesController : ControllerBase
Type = SourceTypes.Reddit,
Source = "feed",
Enabled = true,
Url = url,
Tags = $"{SourceTypes.Reddit}, {name}"
Url = uri.ToString(),
Tags = $"{SourceTypes.Reddit},{name}"
});
// Not all subreddits have an Icon, so we only want to add a record when it has one.
if (pageReader.Data.Header.Image != "")
_icons.New(new IconModel
{
Id = Guid.NewGuid(),
FileName = pageReader.Data.Header.Image,
SourceId = item.ID
});
return SourceDto.Convert(item);
}
@ -75,27 +95,41 @@ public class SourcesController : ControllerBase
Source = "feed",
Enabled = true,
Url = url,
Tags = $"{SourceTypes.Rss}, {name}"
Tags = $"{SourceTypes.Rss},{name}"
};
var item = _sources.New(m);
return SourceDto.Convert(item);
}
[HttpPost("new/youtube")]
public SourceDto NewYoutube(string name, string url)
public SourceDto NewYoutube(string url)
{
var res = _sources.GetByNameAndType(name, SourceTypes.YouTube);
var res = _sources.GetByUrl(url);
if (res.ID != Guid.Empty) return SourceDto.Convert(res);
var htmlClient = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = url
});
htmlClient.Parse();
var item = _sources.New(new SourceModel
{
Site = SourceTypes.YouTube,
Type = SourceTypes.YouTube,
Name = name,
Name = htmlClient.Data.Header.Title,
Source = "feed",
Url = url,
Url = "feed",
Enabled = true,
Tags = $"{SourceTypes.YouTube}, {name}"
Tags = $"{SourceTypes.YouTube},{htmlClient.Data.Header.Title}",
YoutubeId = htmlClient.Data.Header.YoutubeChannelID ?? ""
});
_icons.New(new IconModel
{
Id = Guid.NewGuid(),
FileName = htmlClient.Data.Header.Image,
SourceId = item.ID
});
return SourceDto.Convert(item);
@ -115,11 +149,48 @@ public class SourcesController : ControllerBase
Url = $"https://twitch.tv/{name}",
Source = "api",
Enabled = true,
Tags = $"{SourceTypes.Twitch}, {name}"
Tags = $"{SourceTypes.Twitch},{name}"
});
return SourceDto.Convert(item);
}
[HttpPost("new/github")]
public SourceDto NewGithub(string url)
{
if (!url.Contains("github.com")) return new SourceDto();
var res = _sources.GetByUrl(url);
if (res.ID != Guid.Empty) return SourceDto.Convert(res);
var slice = url.Split('/');
var pageReader = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = url
});
pageReader.Parse();
var item = _sources.New(new SourceModel
{
Site = SourceTypes.GitHub,
Type = SourceTypes.GitHub,
Name = $"{slice[3]}/{slice[4]}",
Url = url,
Source = "feed",
Enabled = true,
Tags = $"{SourceTypes.GitHub}, {slice[3]}, {slice[4]}"
});
_icons.New(new IconModel
{
Id = Guid.NewGuid(),
FileName = pageReader.Data.Header.Image,
SourceId = item.ID
});
return SourceDto.Convert(item);
}
[HttpGet("{id}")]
public SourceDto GetById(Guid id)
{

View File

@ -2,10 +2,12 @@ using Hangfire;
using Hangfire.MemoryStorage;
using HealthChecks.UI.Client;
using Microsoft.AspNetCore.Diagnostics.HealthChecks;
using Newsbot.Collector.Api;
using Newsbot.Collector.Domain.Consts;
using Newsbot.Collector.Domain.Models;
using Newsbot.Collector.Services.Jobs;
using Newsbot.Collector.Domain.Models.Config;
using Serilog;
using ILogger = Serilog.ILogger;
var builder = WebApplication.CreateBuilder(args);
@ -17,15 +19,7 @@ builder.Host.UseSerilog();
var config = GetConfiguration();
builder.Configuration.AddConfiguration(config);
Log.Logger = new LoggerConfiguration()
.WriteTo.Console()
.WriteTo.OpenTelemetry(
config.GetValue<string>(ConfigConnectionStringConst.OpenTelemetry) ?? "",
resourceAttributes: new Dictionary<string, object>
{
{ "service.name", "newsbot-collector-api" }
})
.CreateLogger();
Log.Logger = GetLogger(config);
Log.Information("Starting up");
// Configure Hangfire
@ -43,6 +37,11 @@ builder.Services.AddSwaggerGen();
builder.Services.Configure<ConnectionStrings>(config.GetSection("ConnectionStrings"));
builder.Services.Configure<ConfigSectionConnectionStrings>(config.GetSection(ConfigSectionsConst.ConnectionStrings));
builder.Services.Configure<ConfigSectionRssModel>(config.GetSection(ConfigSectionsConst.Rss));
builder.Services.Configure<ConfigSectionYoutubeModel>(config.GetSection(ConfigSectionsConst.Youtube));
//builder.Services.Configure<
var app = builder.Build();
// Configure the HTTP request pipeline.
@ -55,7 +54,7 @@ if (config.GetValue<bool>("EnableSwagger"))
app.UseHttpsRedirection();
app.UseHangfireDashboard();
SetupRecurringJobs(config, Log.Logger);
BackgroundJobs.SetupRecurringJobs(config);
app.UseAuthorization();
@ -77,21 +76,22 @@ static IConfiguration GetConfiguration()
.Build();
}
static void SetupRecurringJobs(IConfiguration configuration, ILogger logger)
static ILogger GetLogger(IConfiguration configuration)
{
//RecurringJob.AddOrUpdate<HelloWorldJob>("Example", x => x.InitAndExecute(new HelloWorldJobOptions
//{
// Message = "Hello from the background!"
//}), "0/1 * * * *");
var otel = configuration.GetValue<string>(ConfigConnectionStringConst.OpenTelemetry) ?? "";
RecurringJob.AddOrUpdate<RssWatcherJob>("RSS", x => x.InitAndExecute(new RssWatcherJobOptions
{
ConnectionString = configuration.GetSection(ConfigConnectionStringConst.Database).Value ?? ""
}), "15 0-23 * * *");
if (otel == "")
return Log.Logger = new LoggerConfiguration()
.WriteTo.Console()
.CreateLogger();
RecurringJob.AddOrUpdate<DiscordNotificationJob>("Discord Alerts", x =>
x.InitAndExecute(new DiscordNotificationJobOptions
{
DatabaseConnectionString = configuration.GetSection(ConfigConnectionStringConst.Database).Value ?? ""
}), "5/10 * * * *");
return Log.Logger = new LoggerConfiguration()
.WriteTo.Console()
.WriteTo.OpenTelemetry(
otel,
resourceAttributes: new Dictionary<string, object>
{
{ "service.name", "newsbot-collector-api" }
})
.CreateLogger();
}

View File

@ -6,45 +6,27 @@ SELECT 'up SQL query';
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
-- Final Fantasy XIV Entries
INSERT INTO sources VALUES
(uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - NA', 'scrape', 'ffxiv', 'a', TRUE, 'https://na.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, na, lodestone');
INSERT INTO sources VALUES
(uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - JP', 'scrape', 'ffxiv', 'a', FALSE, 'https://jp.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, jp, lodestone');
INSERT INTO sources VALUES
(uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - EU', 'scrape', 'ffxiv', 'a', FALSE, 'https://eu.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, eu, lodestone');
INSERT INTO sources VALUES
(uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - FR', 'scrape', 'ffxiv', 'a', FALSE, 'https://fr.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, fr, lodestone');
INSERT INTO sources VALUES
(uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - DE', 'scrape', 'ffxiv', 'a', FALSE, 'https://de.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, de, lodestone');
-- Reddit Entries
INSERT INTO sources VALUES
(uuid_generate_v4(), 'reddit', 'dadjokes', 'feed', 'reddit', 'a', TRUE, 'https://reddit.com/r/dadjokes', 'reddit, dadjokes');
INSERT INTO sources VALUES
(uuid_generate_v4(), 'reddit', 'steamdeck', 'feed', 'reddit', 'a', TRUE, 'https://reddit.com/r/steamdeck', 'reddit, steam deck, steam, deck');
-- Youtube Entries
INSERT INTO sources VALUES
(uuid_generate_v4(), 'youtube', 'Game Grumps', 'feed', 'youtube', 'a', TRUE, 'https://www.youtube.com/user/GameGrumps', 'youtube, game grumps, game, grumps');
-- RSS Entries
INSERT INTO sources VALUES
(uuid_generate_v4(), 'steampowered', 'steam deck', 'feed', 'rss', 'a', TRUE, 'https://store.steampowered.com/feeds/news/app/1675200/?cc=US&l=english&snr=1_2108_9__2107', 'rss, steampowered, steam, deck, steam deck');
-- Twitch Entries
INSERT INTO sources VALUES
(uuid_generate_v4(), 'twitch', 'Nintendo', 'api', 'twitch', 'a', TRUE, 'https://twitch.tv/nintendo', 'twitch, nintendo');
INSERT INTO sources
VALUES (uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - NA', 'scrape', 'ffxiv', 'a', TRUE,
'https://na.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, na, lodestone');
INSERT INTO sources
VALUES (uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - JP', 'scrape', 'ffxiv', 'a', FALSE,
'https://jp.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, jp, lodestone');
INSERT INTO sources
VALUES (uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - EU', 'scrape', 'ffxiv', 'a', FALSE,
'https://eu.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, eu, lodestone');
INSERT INTO sources
VALUES (uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - FR', 'scrape', 'ffxiv', 'a', FALSE,
'https://fr.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, fr, lodestone');
INSERT INTO sources
VALUES (uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - DE', 'scrape', 'ffxiv', 'a', FALSE,
'https://de.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, de, lodestone');
-- +goose StatementEnd
-- +goose Down
-- +goose StatementBegin
--SELECT 'down SQL query';
DELETE FROM sources where source = 'reddit' and name = 'dadjokes';
DELETE FROM sources where source = 'reddit' and name = 'steamdeck';
DELETE FROM sources where source = 'ffxiv';
DELETE FROM sources WHERE source = 'twitch' and name = 'Nintendo';
DELETE FROM sources WHERE source = 'youtube' and name = 'Game Grumps';
DELETE FROM SOURCES WHERE source = 'rss' and name = 'steam deck';
DELETE
FROM sources
where source = 'ffxiv';
-- +goose StatementEnd

View File

@ -1,20 +0,0 @@
-- +goose Up
-- +goose StatementBegin
INSERT INTO sources VALUES (
uuid_generate_v4(),
'rss',
'Let''s Mosley',
'feed',
'rss',
'podcast',
TRUE,
'https://anchor.fm/s/6c7aa4c4/podcast/rss',
'rss,let''s mosley,fitnes,coach',
FALSE);
-- +goose StatementEnd
-- +goose Down
-- +goose StatementBegin
DELETE FROM sources Where type = 'rss' And Name = 'Let''s Mosley'
-- +goose StatementEnd

View File

@ -9,7 +9,7 @@ namespace Newsbot.Collector.Database.Repositories;
public class SourcesTable : ISourcesRepository
{
private string _connectionString;
private readonly string _connectionString;
public SourcesTable(string connectionString)
{
@ -19,25 +19,16 @@ public class SourcesTable : ISourcesRepository
public SourcesTable(IConfiguration configuration)
{
var connstr = configuration.GetConnectionString("database");
if (connstr is null)
{
connstr = "";
}
if (connstr is null) connstr = "";
_connectionString = connstr;
}
private IDbConnection OpenConnection(string connectionString)
{
var conn = new NpgsqlConnection(_connectionString);
conn.Open();
return conn;
}
public SourceModel New(SourceModel model)
{
model.ID = Guid.NewGuid();
using var conn = OpenConnection(_connectionString);
var query = "Insert Into Sources (ID, Site, Name, Source, Type, Value, Enabled, Url, Tags) Values (@id ,@site,@name,@source,@type,@value,@enabled,@url,@tags);";
var query =
"Insert Into Sources (ID, Site, Name, Source, Type, Value, Enabled, Url, Tags, YoutubeId) Values (@id ,@site,@name,@source,@type,@value,@enabled,@url,@tags,@youtubeid);";
conn.Execute(query, new
{
id = model.ID,
@ -48,7 +39,8 @@ public class SourcesTable : ISourcesRepository
model.Value,
model.Enabled,
model.Url,
model.Tags
model.Tags,
model.YoutubeId
});
return model;
}
@ -61,10 +53,7 @@ public class SourcesTable : ISourcesRepository
{
id = ID
});
if (res.Count() == 0)
{
return new SourceModel();
}
if (res.Count() == 0) return new SourceModel();
return res.First();
}
@ -83,10 +72,7 @@ public class SourcesTable : ISourcesRepository
name = Name
});
if (res.Count() == 0)
{
return new SourceModel();
}
if (res.Count() == 0) return new SourceModel();
return res.First();
}
@ -96,14 +82,24 @@ public class SourcesTable : ISourcesRepository
var query = "Select * from Sources WHERE name = @name and type = @type;";
var res = conn.Query<SourceModel>(query, new
{
name = name,
type = type
name, type
});
if (res.Count() == 0)
if (res.Count() == 0) return new SourceModel();
return res.First();
}
public SourceModel GetByUrl(string url)
{
using var conn = OpenConnection(_connectionString);
var query = "Select * from Sources WHERE url = @url;";
var res = conn.Query<SourceModel>(query, new
{
return new SourceModel();
}
url
});
if (res.ToList().Count == 0) return new SourceModel();
return res.First();
}
@ -115,8 +111,7 @@ public class SourcesTable : ISourcesRepository
Fetch Next @count Rows Only;";
return conn.Query<SourceModel>(query, new
{
page = page * count,
count = count
page = page * count, count
}).ToList();
}
@ -126,8 +121,7 @@ public class SourcesTable : ISourcesRepository
var query = "Select * From Sources where Source = @source Limit @limit;";
return conn.Query<SourceModel>(query, new
{
source = source,
limit = limit
source, limit
}).ToList();
}
@ -137,28 +131,44 @@ public class SourcesTable : ISourcesRepository
var query = "Select * From Sources where Type = @type Limit @limit;";
return conn.Query<SourceModel>(query, new
{
type = type,
limit = limit
type, limit
}).ToList();
}
public int Disable(Guid ID)
public int Disable(Guid id)
{
using var conn = OpenConnection(_connectionString);
var query = "Update Sources Set Enabled = FALSE where ID = @id;";
return conn.Execute(query, new
{
id = ID
id
});
}
public int Enable(Guid ID)
public int Enable(Guid id)
{
using var conn = OpenConnection(_connectionString);
var query = "Update Sources Set Enabled = TRUE where ID = @id;";
return conn.Execute(query, new
{
id = ID
id
});
}
public int UpdateYoutubeId(Guid id, string youtubeId)
{
using var conn = OpenConnection(_connectionString);
var query = "Update Sources Set youtubeid = @youtubeId where ID = @id;";
return conn.Execute(query, new
{
id, youtubeId
});
}
private IDbConnection OpenConnection(string connectionString)
{
var conn = new NpgsqlConnection(_connectionString);
conn.Open();
return conn;
}
}

View File

@ -1,9 +0,0 @@
namespace Newsbot.Collector.Domain.Consts;
public class ConfigRedditConst
{
public const string IsEnabled = "Reddit:IsEnabled";
public const string PullHot = "Reddit:PullHot";
public const string PullNsfw = "Reddit:PullNsfw";
public const string PullTop = "Reddit:PullTop";
}

View File

@ -1,4 +1,3 @@
using System.Globalization;
using Newsbot.Collector.Domain.Models;
namespace Newsbot.Collector.Domain.Interfaces;
@ -10,9 +9,11 @@ public interface ISourcesRepository
public SourceModel GetByID(string ID);
public SourceModel GetByName(string name);
public SourceModel GetByNameAndType(string name, string type);
SourceModel GetByUrl(string url);
public List<SourceModel> List(int page, int count);
public List<SourceModel> ListBySource(string source, int limit);
public List<SourceModel> ListByType(string type, int limit = 25);
public int Disable(Guid ID);
public int Enable(Guid ID);
public int Disable(Guid id);
public int Enable(Guid id);
public int UpdateYoutubeId(Guid id, string youtubeId);
}

View File

@ -42,9 +42,10 @@ public class DiscordWebHookModel
public class IconModel
{
public Guid ID { get; set; }
public Guid Id { get; set; }
public string FileName { get; set; } = "";
public string Site { get; set; } = "";
public Guid SourceId { get; set; }
}
public class SettingModel
@ -61,7 +62,7 @@ public class SourceModel
public string Site { get; set; } = "";
public string Name { get; set; } = "";
// Source use to deinfe the worker to query with but moving to Type as it was not used really.
// Source use to define the worker to query with but moving to Type as it was not used really.
public string Source { get; set; } = "";
public string Type { get; set; } = "";
public string Value { get; set; } = "";
@ -69,6 +70,7 @@ public class SourceModel
public string Url { get; set; } = "";
public string Tags { get; set; } = "";
public bool Deleted { get; set; }
public string YoutubeId { get; set; } = "";
}
public class SubscriptionModel

View File

@ -5,12 +5,11 @@ namespace Newsbot.Collector.Services.HtmlParser;
public class HeadParserClient
{
private const string XPathMetaTag = "//head/meta";
private const string XPathHeadMetaTag = "//head/meta";
private const string XPathBodyMetaTag = "//body/meta";
private const string XPathLinkTag = "//head/link";
public HeadParserModel Data { get; set; }
private string _htmlContent;
private readonly string _htmlContent;
public HeadParserClient(string htmlContent, bool useBrowser = false)
{
@ -18,6 +17,8 @@ public class HeadParserClient
Data = new HeadParserModel();
}
public HeadParserModel Data { get; set; }
public void Parse()
{
Data.Title = GetMetaTitle();
@ -36,9 +37,23 @@ public class HeadParserClient
var htmlDoc = new HtmlDocument();
htmlDoc.LoadHtml(_htmlContent);
var tags = htmlDoc.DocumentNode.SelectNodes(XPathMetaTag).ToList();
var allTags = new List<HtmlNode>();
return tags;
var headerTags = htmlDoc.DocumentNode.SelectNodes(XPathHeadMetaTag).ToList();
allTags.AddRange(headerTags);
try
{
var bodyTags = htmlDoc.DocumentNode.SelectNodes(XPathBodyMetaTag).ToList();
allTags.AddRange(bodyTags);
}
catch
{
// no tags found in the body and that's ok.
// we check the body thanks to Youtube.
}
return allTags;
}
private List<HtmlNode> CollectLinkTags()
@ -53,13 +68,13 @@ public class HeadParserClient
{
foreach (var meta in html)
{
if (meta.Attributes.Count == 0) continue;
;
//Console.WriteLine($"Name={meta.Attributes[0].Name} & Value={meta.Attributes[0].Value}");
if (meta.Attributes[0].Value.Contains(Tag) == false)
{
continue;
}
if (meta.Attributes[0].Value.Contains(Tag) == false) continue;
return meta.Attributes[1].Value;
}
return "";
}
@ -68,91 +83,86 @@ public class HeadParserClient
foreach (var tag in tags)
{
var res = GetTagValue(tag, htmlTags);
if (res == "")
{
continue;
}
if (res == "") continue;
return res;
}
return "";
}
public string GetMetaTitle()
{
var htmlTags = CollectMetaTags();
string[] tags = new string[] { "twitter:title", "og:title", "title" };
string[] tags = { "twitter:title", "og:title", "title" };
return FindFirstResult(tags, htmlTags);
}
public string GetMetaDescription()
{
var htmlTags = CollectMetaTags();
string[] tags = new string[] { "twitter:description", "og:description", "description" };
string[] tags = { "twitter:description", "og:description", "description" };
return FindFirstResult(tags, htmlTags);
}
public string GetMetaImage()
{
var htmlTags = CollectMetaTags();
string[] tags = new string[] { "twitter:image", "og:image", "image" };
string[] tags = { "twitter:image", "og:image", "image" };
return FindFirstResult(tags, htmlTags);
}
public string GetMetaUrl()
{
var htmlTags = CollectMetaTags();
string[] tags = new string[] { "twitter:url", "og:url", "url" };
string[] tags = { "twitter:url", "og:url", "url" };
return FindFirstResult(tags, htmlTags);
}
public string GetMetaPageType()
{
var htmlTags = CollectMetaTags();
string[] tags = new string[] { "og:type", "type" };
string[] tags = { "og:type", "type" };
return FindFirstResult(tags, htmlTags);
}
public string GetMetaColorTheme()
{
var htmlTags = CollectMetaTags();
string[] tags = new string[] { "theme-color" };
string[] tags = { "theme-color" };
return FindFirstResult(tags, htmlTags);
}
public string GetYouTubeChannelId()
{
var htmlTags = CollectMetaTags();
string[] tags = new string[] { "channelId" };
return FindFirstResult(tags, htmlTags);
string[] tags = { "og:url", "channelId" };
var results = FindFirstResult(tags, htmlTags);
var id = results.Replace("https://www.youtube.com/channel/", "");
return id;
}
/// <summary>
/// This will parse the headers looking for known keys that will contain a RSS feed link.
/// If the feed is not found, this will throw an exception (MissingHeaderValueException).
/// This will parse the headers looking for known keys that will contain a RSS feed link.
/// If the feed is not found, this will throw an exception (MissingHeaderValueException).
/// </summary>
/// <returns></returns>
public string GetSiteFeed()
{
var htmlTags = CollectLinkTags();
var tags = new string[] { "alternate" };
var tags = new[] { "alternate" };
try
{
var attr = FindFirstAttribute(tags, htmlTags);
foreach (var item in attr)
{
if (item.Name != "href")
{
continue;
}
if (item.Name != "href") continue;
var uri = item.Value;
if (uri.StartsWith("//"))
{
uri = uri.Replace("//", "https://");
}
if (uri.StartsWith("//")) uri = uri.Replace("//", "https://");
return uri;
}
return "";
}
catch
@ -165,7 +175,6 @@ public class HeadParserClient
private HtmlAttributeCollection FindFirstAttribute(string[] tags, List<HtmlNode> htmlTags)
{
foreach (var tag in tags)
{
try
{
var res = GetValidAttribute(tag, htmlTags);
@ -175,7 +184,7 @@ public class HeadParserClient
{
// Nothing was found in the given tag but we will keep looking till we finish all the entries.
}
}
throw new MissingHeaderValueException("Unable to find the requested value");
}
@ -183,12 +192,10 @@ public class HeadParserClient
{
foreach (var meta in html)
{
if (meta.Attributes[0].Value.Contains(Tag) == false)
{
continue;
}
if (meta.Attributes[0].Value.Contains(Tag) == false) continue;
return meta.Attributes;
}
throw new MissingHeaderValueException("Site does not expose requested tag.");
}
}

View File

@ -1,25 +1,32 @@
using HtmlAgilityPack;
using Newsbot.Collector.Domain.Exceptions;
namespace Newsbot.Collector.Services.HtmlParser;
public class HtmlPageReaderOptions
{
public string? Url { get; init; }
public string? SourceCode { get; init; }
}
public class HtmlPageReader
{
private readonly HeadParserClient _headClient;
private readonly string _siteContent;
public HtmlData Data { get; set; }
private HeadParserClient _headClient;
private string _siteContent;
public HtmlPageReader(string pageUrl)
public HtmlPageReader(HtmlPageReaderOptions options)
{
_siteContent = ReadSiteContent(pageUrl);
_headClient = new HeadParserClient(_siteContent);
if (options.SourceCode is not null) _siteContent = options.SourceCode;
if (options.Url is not null) _siteContent = ReadSiteContent(options.Url);
if (_siteContent is null) throw new Exception("SiteContent was not filled and expected.");
_headClient = new HeadParserClient(_siteContent);
Data = new HtmlData();
}
public HtmlData Data { get; set; }
public void Parse()
{
_headClient.Parse();
@ -32,7 +39,7 @@ public class HtmlPageReader
var html = client.GetStringAsync(url);
html.Wait();
var content = html.Result.ToString();
var content = html.Result;
return content;
}
@ -47,24 +54,14 @@ public class HtmlPageReader
htmlDoc.LoadHtml(_siteContent);
var links = htmlDoc.DocumentNode.SelectNodes("//div[contains(@class, 'article-text')]").ToList();
if (links.Count == 0)
{
throw new Exception("Unable to parse body. Tag is unkown.");
}
if (links.Count == 0) throw new Exception("Unable to parse body. Tag is unknown.");
if (links.Count >= 2)
{
throw new Exception("Too many results back for the body");
}
if (links.Count >= 2) throw new Exception("Too many results back for the body");
var content = new List<string>();
foreach (var item in links[0].ChildNodes)
{
if (item.Name == "p")
{
content.Add(item.InnerText);
}
}
//var content = new List<string>();
//foreach (var item in links[0].ChildNodes)
// if (item.Name == "p")
// content.Add(item.InnerText);
return links;
}

View File

@ -1,6 +1,7 @@
using Newsbot.Collector.Database.Repositories;
using Newsbot.Collector.Domain.Interfaces;
using Newsbot.Collector.Domain.Models;
using Newsbot.Collector.Domain.Models.Config;
using Newsbot.Collector.Services.Notifications.Discord;
using Serilog;
@ -8,7 +9,9 @@ namespace Newsbot.Collector.Services.Jobs;
public class DiscordNotificationJobOptions
{
public string? DatabaseConnectionString { get; set; }
public ConfigSectionConnectionStrings? ConnectionStrings { get; set; }
public ConfigSectionNotificationsDiscord? Config { get; set; }
}
public class DiscordNotificationJob
@ -19,6 +22,7 @@ public class DiscordNotificationJob
private IDiscordWebHooksRepository _webhook;
private ISourcesRepository _sources;
private ISubscriptionRepository _subs;
private IIconsRepository _icons;
public DiscordNotificationJob()
{
@ -27,16 +31,22 @@ public class DiscordNotificationJob
_webhook = new DiscordWebhooksTable("");
_sources = new SourcesTable("");
_subs = new SubscriptionsTable("");
_icons = new IconsTable("");
}
public void InitAndExecute(DiscordNotificationJobOptions options)
{
_queue = new DiscordQueueTable(options.DatabaseConnectionString ?? "");
_article = new ArticlesTable(options.DatabaseConnectionString ?? "");
_webhook = new DiscordWebhooksTable(options.DatabaseConnectionString ?? "");
_sources = new SourcesTable(options.DatabaseConnectionString ?? "");
_subs = new SubscriptionsTable(options.DatabaseConnectionString ?? "");
options.ConnectionStrings ??= new ConfigSectionConnectionStrings();
options.Config ??= new ConfigSectionNotificationsDiscord();
_queue = new DiscordQueueTable(options.ConnectionStrings.Database ?? "");
_article = new ArticlesTable(options.ConnectionStrings.Database ?? "");
_webhook = new DiscordWebhooksTable(options.ConnectionStrings.Database ?? "");
_sources = new SourcesTable(options.ConnectionStrings.Database ?? "");
_subs = new SubscriptionsTable(options.ConnectionStrings.Database ?? "");
_icons = new IconsTable(options.ConnectionStrings.Database ?? "");
Execute();
}
@ -59,6 +69,8 @@ public class DiscordNotificationJob
continue;
}
var sourceIcon = _icons.GetBySourceId(sourceDetails.ID);
// Find all the subscriptions for that source
var allSubscriptions = _subs.ListBySourceID(sourceDetails.ID);
@ -74,7 +86,7 @@ public class DiscordNotificationJob
var client = new DiscordWebhookClient(discordDetails.Url);
try
{
client.SendMessage(GenerateDiscordMessage(sourceDetails, articleDetails));
client.SendMessage(GenerateDiscordMessage(sourceDetails, articleDetails, sourceIcon));
}
catch (Exception e)
{
@ -89,7 +101,7 @@ public class DiscordNotificationJob
}
}
public DiscordMessage GenerateDiscordMessage(SourceModel source, ArticlesModel article)
public DiscordMessage GenerateDiscordMessage(SourceModel source, ArticlesModel article, IconModel icon)
{
var embed = new DiscordMessageEmbed
{
@ -99,6 +111,7 @@ public class DiscordNotificationJob
Author = new DiscordMessageEmbedAuthor
{
Name = article.AuthorName,
IconUrl = icon.FileName
},
Footer = new DiscordMessageEmbedFooter
{

View File

@ -1,18 +1,23 @@
using System.ServiceModel.Syndication;
using System.Xml;
using Newsbot.Collector.Database.Repositories;
using Newsbot.Collector.Domain.Consts;
using Newsbot.Collector.Domain.Interfaces;
using Newsbot.Collector.Domain.Models;
using Newsbot.Collector.Domain.Models.Config;
using Newsbot.Collector.Services.HtmlParser;
namespace Newsbot.Collector.Services.Jobs;
public class GithubWatcherJobOptions
{
public string ConnectionString { get; set; } = "";
public ConfigSectionConnectionStrings? ConnectionStrings { get; set; }
//public string ConnectionString { get; set; } = "";
public bool FeaturePullReleases { get; set; } = false;
public bool FeaturePullCommits { get; set; } = false;
public bool PullIssues { get; set; } = false;
//public bool PullIssues { get; set; } = false;
}
public class GithubWatcherJob
@ -28,24 +33,24 @@ public class GithubWatcherJob
_source = new SourcesTable("");
}
public void Init(GithubWatcherJobOptions options)
{
_articles = new ArticlesTable(options.ConnectionString);
_queue = new DiscordQueueTable(options.ConnectionString);
_source = new SourcesTable(options.ConnectionString);
}
public void InitAndExecute(GithubWatcherJobOptions options)
{
Init(options);
options.ConnectionStrings ??= new ConfigSectionConnectionStrings();
_articles = new ArticlesTable(options.ConnectionStrings.Database ?? "");
_queue = new DiscordQueueTable(options.ConnectionStrings.Database ?? "");
_source = new SourcesTable(options.ConnectionStrings.Database ?? "");
Execute();
}
private void Execute()
{
_source.ListBySource(SourceTypes.GitHub, 25);
// query sources for things to pull
var items = new List<ArticlesModel>();
items.AddRange(Collect(new Uri("https://github.com/jtom38/dvb")));
// query */commits/master.atom
@ -56,7 +61,7 @@ public class GithubWatcherJob
{
var items = new List<ArticlesModel>();
Guid placeHolderId = Guid.NewGuid();
var placeHolderId = Guid.NewGuid();
// query */release.atom
items.AddRange(CollectItems($"{url.AbsoluteUri}/releases.atom", placeHolderId));
items.AddRange(CollectItems($"{url.AbsoluteUri}/master.atom", placeHolderId));
@ -75,12 +80,12 @@ public class GithubWatcherJob
{
var itemUrl = item.Links[0].Uri.AbsoluteUri;
var exits = _articles.GetByUrl(itemUrl);
if (exits.ID != Guid.Empty)
{
continue;
}
if (exits.ID != Guid.Empty) continue;
var parser = new HtmlPageReader(itemUrl);
var parser = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = itemUrl
});
parser.Parse();
try
@ -104,6 +109,7 @@ public class GithubWatcherJob
Console.WriteLine(e);
}
}
return items;
}
}

View File

@ -4,6 +4,7 @@ using Newsbot.Collector.Database.Repositories;
using Newsbot.Collector.Domain.Consts;
using Newsbot.Collector.Domain.Interfaces;
using Newsbot.Collector.Domain.Models;
using Newsbot.Collector.Domain.Models.Config;
using Newsbot.Collector.Services.HtmlParser;
using Serilog;
@ -11,13 +12,18 @@ namespace Newsbot.Collector.Services.Jobs;
public class RssWatcherJobOptions
{
public string? ConnectionString { get; init; }
public string? OpenTelemetry { get; init; }
//public string? ConnectionString { get; init; }
//public string? OpenTelemetry { get; init; }
public ConfigSectionConnectionStrings? ConnectionStrings { get; set; }
public ConfigSectionRssModel? Config { get; set; }
}
// This class was made to work with Hangfire and it does not support constructors.
public class RssWatcherJob
{
private const string JobName = "RssWatcherJob";
private IArticlesRepository _articles;
private ILogger _logger;
private IDiscordQueueRepository _queue;
@ -28,62 +34,53 @@ public class RssWatcherJob
_articles = new ArticlesTable("");
_queue = new DiscordQueueTable("");
_source = new SourcesTable("");
_logger = GetLogger("");
_logger = JobLogger.GetLogger("", JobName);
}
public void InitAndExecute(RssWatcherJobOptions options)
{
Init(options);
options.ConnectionStrings ??= new ConfigSectionConnectionStrings();
options.Config ??= new ConfigSectionRssModel();
_logger.Information("RssWatcherJob - Job was triggered");
_logger.Information("RssWatcherJob - Setting up the job");
_articles = new ArticlesTable(options.ConnectionStrings.Database ?? "");
_queue = new DiscordQueueTable(options.ConnectionStrings.Database ?? "");
_source = new SourcesTable(options.ConnectionStrings.Database ?? "");
_logger = JobLogger.GetLogger(options.ConnectionStrings.OpenTelemetry ?? "", JobName);
_logger.Information($"{JobName} - Job was triggered");
if (!options.Config.IsEnabled)
{
_logger.Information($"{JobName} - Going to exit because feature flag is off.");
return;
}
_logger.Information($"{JobName} - Setting up the job");
Execute();
}
private ILogger GetLogger(string connectionString)
{
return Log.Logger = new LoggerConfiguration()
.WriteTo.Console()
.WriteTo.OpenTelemetry(
connectionString,
resourceAttributes: new Dictionary<string, object>
{
{ "service.name", "newsbot-collector-api" },
{ "Job", "RssWatcherJob" }
})
.CreateLogger();
}
public void Init(RssWatcherJobOptions options)
{
_articles = new ArticlesTable(options.ConnectionString ?? "");
_queue = new DiscordQueueTable(options.ConnectionString ?? "");
_source = new SourcesTable(options.ConnectionString ?? "");
_logger = GetLogger(options.OpenTelemetry ?? "");
}
public void Execute()
{
var articles = new List<ArticlesModel>();
_logger.Information("RssWatcherJob - Requesting sources");
_logger.Information($"{JobName} - Requesting sources");
var sources = _source.ListByType(SourceTypes.Rss);
_logger.Information($"RssWatcherJob - Got {sources.Count} back");
_logger.Information($"{JobName} - Got {sources.Count} back");
foreach (var source in sources)
{
_logger.Information($"RssWatcherJob - Starting to process '{source.Name}'");
_logger.Information("RssWatcherJob - Starting to request feed to be processed");
_logger.Information($"{JobName} - Starting to process '{source.Name}'");
_logger.Information($"{JobName} - Starting to request feed to be processed");
var results = Collect(source.Url, source.ID);
_logger.Information($"RssWatcherJob - Collected {results.Count} posts");
_logger.Information($"{JobName} - Collected {results.Count} posts");
articles.AddRange(results);
}
_logger.Information("RssWatcherJob - Sending posts over to the database");
_logger.Information($"{JobName} - Sending posts over to the database");
UpdateDatabase(articles);
_logger.Information("RssWatcherJob - Done!");
_logger.Information($"{JobName} - Done!");
}
public List<ArticlesModel> Collect(string url, Guid sourceId, int sleep = 3000)
@ -101,7 +98,10 @@ public class RssWatcherJob
// If we have, skip and save the site bandwidth
if (IsThisUrlKnown(articleUrl)) continue;
var meta = new HtmlPageReader(articleUrl);
var meta = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = articleUrl
});
meta.Parse();
var article = new ArticlesModel

View File

@ -1,19 +1,18 @@
<Project Sdk="Microsoft.NET.Sdk">
<ItemGroup>
<ProjectReference Include="..\Newsbot.Collector.Domain\Newsbot.Collector.Domain.csproj"/>
<ProjectReference Include="..\Newsbot.Collector.Database\Newsbot.Collector.Database.csproj"/>
<ProjectReference Include="..\Newsbot.Collector.Domain\Newsbot.Collector.Domain.csproj" />
<ProjectReference Include="..\Newsbot.Collector.Database\Newsbot.Collector.Database.csproj" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="Google.Apis.YouTube.v3" Version="1.60.0.2945"/>
<PackageReference Include="HtmlAgilityPack" Version="1.11.46"/>
<PackageReference Include="Selenium.WebDriver" Version="4.8.1"/>
<PackageReference Include="Selenium.WebDriver.GeckoDriver" Version="0.32.2"/>
<PackageReference Include="Serilog" Version="2.12.0"/>
<PackageReference Include="Serilog.Sinks.Console" Version="4.1.0"/>
<PackageReference Include="Serilog.Sinks.OpenTelemetry" Version="1.0.0-dev-00113"/>
<PackageReference Include="System.ServiceModel.Syndication" Version="7.0.0"/>
<PackageReference Include="HtmlAgilityPack" Version="1.11.46" />
<PackageReference Include="Selenium.WebDriver" Version="4.8.1" />
<PackageReference Include="Selenium.WebDriver.GeckoDriver" Version="0.32.2" />
<PackageReference Include="Serilog" Version="2.12.0" />
<PackageReference Include="Serilog.Sinks.Console" Version="4.1.0" />
<PackageReference Include="Serilog.Sinks.OpenTelemetry" Version="1.0.0-dev-00113" />
<PackageReference Include="System.ServiceModel.Syndication" Version="7.0.0" />
</ItemGroup>
<PropertyGroup>

View File

@ -8,12 +8,11 @@ namespace Newsbot.Collector.Services.Notifications.Discord;
public class DiscordWebhookClient : IDiscordNotificatioClient
{
private string[] _webhooks;
private readonly string[] _webhooks;
public DiscordWebhookClient(string webhook)
{
_webhooks = new string[] { webhook };
_webhooks = new[] { webhook };
}
public DiscordWebhookClient(string[] webhooks)
@ -23,25 +22,22 @@ public class DiscordWebhookClient : IDiscordNotificatioClient
public void SendMessage(DiscordMessage payload)
{
if (payload.Embeds is not null)
{
MessageValidation.IsEmbedFooterValid(payload.Embeds);
}
if (payload.Embeds is not null) MessageValidation.IsEmbedFooterValid(payload.Embeds);
foreach (var webhook in _webhooks)
{
var jsonRaw = JsonConvert.SerializeObject(payload, Newtonsoft.Json.Formatting.None, new JsonSerializerSettings { NullValueHandling = NullValueHandling.Ignore });
var jsonRaw = JsonConvert.SerializeObject(payload, Formatting.None,
new JsonSerializerSettings { NullValueHandling = NullValueHandling.Ignore });
using StringContent jsonContent = new(jsonRaw, Encoding.UTF8, "application/json");
using var client = new HttpClient();
var resp = client.PostAsync(webhook, jsonContent);
resp.Wait();
// can be 204 or a message, might be 200
Console.WriteLine(resp.Result.StatusCode);
if (resp.Result.StatusCode != HttpStatusCode.NoContent)
{
throw new Exception("Message was not accepted by the sever.");
}
}
}
}

View File

@ -34,6 +34,11 @@ public class DiscordNotificationJobTest
Thumbnail = "https://cdn.arstechnica.net/wp-content/uploads/2023/03/GettyImages-944827400-800x534.jpg",
Description = "Please work",
AuthorName = "No one knows"
},
new IconModel
{
Id = Guid.NewGuid(),
FileName = "https://www.redditstatic.com/desktop2x/img/favicon/android-icon-192x192.png"
});
webhookClient.SendMessage(msg);
}

View File

@ -1,4 +1,6 @@
using Microsoft.Extensions.Configuration;
using Newsbot.Collector.Domain.Models;
using Newsbot.Collector.Domain.Models.Config;
using Newsbot.Collector.Services.Jobs;
namespace Newsbot.Collector.Tests.Jobs;
@ -7,8 +9,12 @@ public class GithubWatcherJobTests
{
private IConfiguration GetConfiguration()
{
var inMemorySettings = new Dictionary<string, string> {
{"ConnectionStrings:database", "Host=localhost;Username=postgres;Password=postgres;Database=postgres;sslmode=disable"}
var inMemorySettings = new Dictionary<string, string>
{
{
"ConnectionStrings:database",
"Host=localhost;Username=postgres;Password=postgres;Database=postgres;sslmode=disable"
}
};
IConfiguration configuration = new ConfigurationBuilder()
@ -26,9 +32,12 @@ public class GithubWatcherJobTests
public void CanPullAFeed()
{
var client = new GithubWatcherJob();
client.Init(new GithubWatcherJobOptions
client.InitAndExecute(new GithubWatcherJobOptions
{
ConnectionString = ConnectionString(),
ConnectionStrings = new ConfigSectionConnectionStrings
{
Database = ConnectionString()
},
FeaturePullCommits = true,
FeaturePullReleases = true
});

View File

@ -1,14 +1,27 @@
using Microsoft.Extensions.Configuration;
using Newsbot.Collector.Domain.Models.Config;
using Newsbot.Collector.Services.Jobs;
using Xunit.Abstractions;
namespace Newsbot.Collector.Tests.Jobs;
public class RssWatcherJobTest
{
private readonly ITestOutputHelper _testOutputHelper;
public RssWatcherJobTest(ITestOutputHelper testOutputHelper)
{
_testOutputHelper = testOutputHelper;
}
private IConfiguration GetConfiguration()
{
var inMemorySettings = new Dictionary<string, string> {
{"ConnectionStrings:database", "Host=localhost;Username=postgres;Password=postgres;Database=postgres;sslmode=disable"}
var inMemorySettings = new Dictionary<string, string>
{
{
"ConnectionStrings:database",
"Host=localhost;Username=postgres;Password=postgres;Database=postgres;sslmode=disable"
}
};
IConfiguration configuration = new ConfigurationBuilder()
@ -25,9 +38,9 @@ public class RssWatcherJobTest
[Fact]
public void CanFindItemsNoDb()
{
var url = "https://www.engadget.com/rss.xml";
const string url = "https://www.engadget.com/rss.xml";
var client = new RssWatcherJob();
var items = client.Collect(url, Guid.NewGuid(), 0);
client.Collect(url, Guid.NewGuid(), 0);
}
[Fact]
@ -35,7 +48,13 @@ public class RssWatcherJobTest
{
var url = "https://www.engadget.com/rss.xml";
var client = new RssWatcherJob();
client.Init(ConnectionString());
client.InitAndExecute(new RssWatcherJobOptions
{
ConnectionStrings = new ConfigSectionConnectionStrings
{
Database = ConnectionString()
}
});
var items = client.Collect(url, Guid.NewGuid(), 0);
client.UpdateDatabase(items);
}
@ -44,20 +63,13 @@ public class RssWatcherJobTest
public void CanReadHtmlDrivenFeedPage()
{
var url = "https://www.howtogeek.com/feed/";
var client = new RssWatcherJob();
client.Init(ConnectionString());
var items = client.Collect(url, Guid.NewGuid(), 0);
Console.WriteLine('k');
}
[Fact]
public void InitAndExecuteTest()
{
var client = new RssWatcherJob();
client.InitAndExecute(new RssWatcherJobOptions
{
ConnectionString = ConnectionString()
ConnectionStrings = new ConfigSectionConnectionStrings
{
Database = ConnectionString()
}
});
}
}

View File

@ -27,4 +27,12 @@
<ProjectReference Include="..\Newsbot.Collector.Database\Newsbot.Collector.Database.csproj" />
</ItemGroup>
<ItemGroup>
<Content Update="appsettings.json">
<ExcludeFromSingleFile>true</ExcludeFromSingleFile>
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<CopyToPublishDirectory>PreserveNewest</CopyToPublishDirectory>
</Content>
</ItemGroup>
</Project>

View File

@ -7,32 +7,74 @@ public class HtmlPageReaderTests
[Fact]
public void BaseSiteContainsRssFeed()
{
var client = new HtmlPageReader("https://dotnettutorials.net/");
var client = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = "https://dotnettutorials.net/"
});
var headClient = new HeadParserClient(client.GetSiteContent());
var feedUri = headClient.GetSiteFeed();
if (feedUri == "")
{
Assert.Fail("Failed to find the RSS feed");
}
if (feedUri == "") Assert.Fail("Failed to find the RSS feed");
}
[Fact]
public void SiteDoesNotReturnRssFeed()
{
var client = new HtmlPageReader("https://www.engadget.com/");
var client = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = "https://www.engadget.com/"
});
var headClient = new HeadParserClient(client.GetSiteContent());
var feedUri = headClient.GetSiteFeed();
if (feedUri == "")
{
Assert.Fail("");
}
if (feedUri == "") Assert.Fail("");
}
[Fact]
public void CanFindBodyOfTheArticle()
{
var client = new HtmlPageReader("https://www.engadget.com/apple-is-convinced-my-dog-is-stalking-me-143100932.html");
var client = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = "https://www.engadget.com/apple-is-convinced-my-dog-is-stalking-me-143100932.html"
});
var c = client.CollectPostContent();
Console.WriteLine(c);
}
[Fact]
public void FindYoutubeChannelId()
{
var url = "https://www.youtube.com/@CityPlannerPlays";
//var b = new BrowserClient();
//var pageSource = b.GetPageSource(url);
var client = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = url
});
client.Parse();
if (client.Data.Header.YoutubeChannelID is null) Assert.Fail("missing youtube id");
}
[Fact]
public void CanExtractHeadersFromReddit()
{
var url = "https://www.reddit.com/";
var client = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = url
});
client.Parse();
if (client.Data.Header.Image == "") Assert.Fail("missing an expected image from the reddit header.");
}
[Fact]
public void CanExtractHeadersFromSubreddit()
{
var url = "https://www.reddit.com/r/ffxiv";
var client = new HtmlPageReader(new HtmlPageReaderOptions
{
Url = url
});
client.Parse();
if (client.Data.Header.Image == "") Assert.Fail("missing an expected image from the reddit header.");
}
}

View File

@ -1,8 +1,8 @@
using Newsbot.Collector.Database.Repositories;
using Newsbot.Collector.Domain.Models;
namespace Newsbot.Collector.Tests.Tables;
public class SourcesTableTests
{
[Fact]

View File

@ -1,10 +0,0 @@
namespace Newsbot.Collector.Tests;
public class UnitTest1
{
[Fact]
public void Test1()
{
}
}

View File

@ -6,20 +6,19 @@ param (
$ErrorActionPreference = 'Stop'
function NewRedditSource {
function New-RedditSource {
param (
[string] $Name,
[string] $Url
)
$urlEncoded = [uri]::EscapeDataString($Url)
$param = "name=$Name&url=$urlEncoded"
$param = "url=$urlEncoded"
$uri = "$ApiServer/api/sources/new/reddit?$param"
$res = Invoke-RestMethod -Method Post -Uri $uri
return $res
}
function NewRssSource {
function New-RssSource {
param (
[string] $Name,
[string] $Url
@ -31,19 +30,18 @@ function NewRssSource {
return $res
}
function NewYoutubeSource {
function New-YoutubeSource {
param (
[string] $Name,
[string] $Url
[Parameter(Required)][string] $Url
)
$urlEncoded = [uri]::EscapeDataString($Url)
[string] $param = "name=$Name&url=$urlEncoded"
[string] $param = "url=$urlEncoded"
[string] $uri = "$ApiServer/api/sources/new/youtube?$param"
$res = Invoke-RestMethod -Method Post -Uri $uri
return $res
}
function NewTwitchSource {
function New-TwitchSource {
param (
[string] $Name
)
@ -81,18 +79,18 @@ function New-Subscription {
# Load Secrets file
$secrets = Get-Content $JsonSecrets -Raw | ConvertFrom-Json
$redditDadJokes = NewRedditSource -Name "dadjokes" -Url "https://reddit.com/r/dadjokes"
$redditSteamDeck = NewRedditSource -Name "steamdeck" -Url "https://reddit.com/r/steamdeck"
$redditDadJokes = New-RedditSource -Name "dadjokes"
$redditSteamDeck = New-RedditSource -Name "steamdeck"
$rssSteamDeck = NewRssSource -Name "Steampowered - Steam Deck" -Url "https://store.steampowered.com/feeds/news/app/1675200/?cc=US&l=english&snr=1_2108_9__2107"
$rssFaysHaremporium = NewRssSource -Name "Fay's Haremporium" -Url "https://blog.nyxstudios.moe/rss/"
$rssPodcastLetsMosley = NewRssSource -Name "Let's Mosley" -Url "https://anchor.fm/s/6c7aa4c4/podcast/rss"
$rssSteamDeck = New-RssSource -Name "Steampowered - Steam Deck" -Url "https://store.steampowered.com/feeds/news/app/1675200/?cc=US&l=english&snr=1_2108_9__2107"
$rssFaysHaremporium = New-RssSource -Name "Fay's Haremporium" -Url "https://blog.nyxstudios.moe/rss/"
$rssPodcastLetsMosley = New-RssSource -Name "Let's Mosley" -Url "https://anchor.fm/s/6c7aa4c4/podcast/rss"
$youtubeGameGrumps = NewYoutubeSource -Name "Game Grumps" -Url "https://www.youtube.com/user/GameGrumps"
$youtubeCityPlannerPlays = NewYoutubeSource -Name "City Planner Plays" -Url "https://www.youtube.com/c/cityplannerplays"
$youtubeGameGrumps = New-YoutubeSource -Url "https://www.youtube.com/user/GameGrumps"
$youtubeCityPlannerPlays = New-YoutubeSource -Url "https://www.youtube.com/c/cityplannerplays"
$youtubeLinusTechTips = New-YoutubeSource -Url "https://www.youtube.com/@LinusTechTips"
$twitchNintendo = NewTwitchSource -Name "Nintendo"
$twitchNintendo.id
$miharuMonitor = New-DiscordWebhook -Server "Miharu Monitor" -Channel "dev" -Url $secrets.MiharuMonitor.dev01
@ -103,4 +101,5 @@ New-Subscription -SourceId $rssFaysHaremporium.id -DiscordWebhookId $miharuMonit
New-Subscription -SourceId $rssPodcastLetsMosley.id -DiscordWebhookId $miharuMonitor.id
New-Subscription -SourceId $youtubeGameGrumps.id -DiscordWebhookId $miharuMonitor.id
New-Subscription -SourceId $youtubeCityPlannerPlays.id -DiscordWebhookId $miharuMonitor.id
New-Subscription -SourceId $youtubeLinusTechTips.id -DiscordWebhookId $miharuMonitor.id
New-Subscription -SourceId $twitchNintendo.id -DiscordWebhookId $miharuMonitor.id