Features/adding youtube (#13)
* Found the meta tags on youtube... in the body and updated the client to pull them out. * Updated namespace on test * I think formatting cleaned this up * Seed migrations have been cleaned up to get my configs out and moving them to a script. * Updates to the ISourcesRepository.cs to allow for new calls to the db. * formatter * Db models updated. Icon now can track sourceID and source can have a youtube id. * Updated api logger to ignore otel if no connection string given. * updated docker init so I can run migrations from the image * seed was updated to reflect the new api changes * Updated the SourcesController.cs to grab icon data. * Added reddit const values * Minor changes to HtmlPageReader.cs * Jobs are now pulling in the config section to bundle values. * Removed youtube api, not needed anymore. * test updates
This commit is contained in:
parent
ac6bdaa184
commit
9be985da0a
@ -31,4 +31,4 @@ COPY --from=publish /app/build /app
|
||||
COPY --from=build ./app/Newsbot.Collector.Database/Migrations/ /app/migrations
|
||||
COPY --from=goose /go/bin/goose /app
|
||||
|
||||
ENTRYPOINT [ "dotnet", "Newsbot.Collector.Api.dll" ]
|
||||
CMD [ "dotnet", "Newsbot.Collector.Api.dll" ]
|
@ -5,6 +5,7 @@ using Newsbot.Collector.Domain.Consts;
|
||||
using Newsbot.Collector.Domain.Dto;
|
||||
using Newsbot.Collector.Domain.Interfaces;
|
||||
using Newsbot.Collector.Domain.Models;
|
||||
using Newsbot.Collector.Services.HtmlParser;
|
||||
|
||||
namespace Newsbot.Collector.Api.Controllers;
|
||||
|
||||
@ -12,6 +13,7 @@ namespace Newsbot.Collector.Api.Controllers;
|
||||
[Route("api/sources")]
|
||||
public class SourcesController : ControllerBase
|
||||
{
|
||||
private readonly IIconsRepository _icons;
|
||||
private readonly ILogger<SourcesController> _logger;
|
||||
|
||||
//private readonly ConnectionStrings _settings;
|
||||
@ -22,6 +24,7 @@ public class SourcesController : ControllerBase
|
||||
_logger = logger;
|
||||
//_settings = settings.Value;
|
||||
_sources = new SourcesTable(settings.Value.Database);
|
||||
_icons = new IconsTable(settings.Value.Database);
|
||||
}
|
||||
|
||||
[HttpGet(Name = "GetSources")]
|
||||
@ -43,11 +46,19 @@ public class SourcesController : ControllerBase
|
||||
}
|
||||
|
||||
[HttpPost("new/reddit")]
|
||||
public SourceDto NewReddit(string name, string url)
|
||||
public SourceDto NewReddit(string name)
|
||||
{
|
||||
var res = _sources.GetByNameAndType(name, SourceTypes.Reddit);
|
||||
if (res.ID != Guid.Empty) return SourceDto.Convert(res);
|
||||
|
||||
var uri = new Uri($"https://reddit.com/r/{name}");
|
||||
|
||||
var pageReader = new HtmlPageReader(new HtmlPageReaderOptions
|
||||
{
|
||||
Url = uri.ToString()
|
||||
});
|
||||
pageReader.Parse();
|
||||
|
||||
var item = _sources.New(new SourceModel
|
||||
{
|
||||
Site = SourceTypes.Reddit,
|
||||
@ -55,8 +66,17 @@ public class SourcesController : ControllerBase
|
||||
Type = SourceTypes.Reddit,
|
||||
Source = "feed",
|
||||
Enabled = true,
|
||||
Url = url,
|
||||
Tags = $"{SourceTypes.Reddit}, {name}"
|
||||
Url = uri.ToString(),
|
||||
Tags = $"{SourceTypes.Reddit},{name}"
|
||||
});
|
||||
|
||||
// Not all subreddits have an Icon, so we only want to add a record when it has one.
|
||||
if (pageReader.Data.Header.Image != "")
|
||||
_icons.New(new IconModel
|
||||
{
|
||||
Id = Guid.NewGuid(),
|
||||
FileName = pageReader.Data.Header.Image,
|
||||
SourceId = item.ID
|
||||
});
|
||||
return SourceDto.Convert(item);
|
||||
}
|
||||
@ -75,27 +95,41 @@ public class SourcesController : ControllerBase
|
||||
Source = "feed",
|
||||
Enabled = true,
|
||||
Url = url,
|
||||
Tags = $"{SourceTypes.Rss}, {name}"
|
||||
Tags = $"{SourceTypes.Rss},{name}"
|
||||
};
|
||||
var item = _sources.New(m);
|
||||
return SourceDto.Convert(item);
|
||||
}
|
||||
|
||||
[HttpPost("new/youtube")]
|
||||
public SourceDto NewYoutube(string name, string url)
|
||||
public SourceDto NewYoutube(string url)
|
||||
{
|
||||
var res = _sources.GetByNameAndType(name, SourceTypes.YouTube);
|
||||
var res = _sources.GetByUrl(url);
|
||||
if (res.ID != Guid.Empty) return SourceDto.Convert(res);
|
||||
|
||||
var htmlClient = new HtmlPageReader(new HtmlPageReaderOptions
|
||||
{
|
||||
Url = url
|
||||
});
|
||||
htmlClient.Parse();
|
||||
|
||||
var item = _sources.New(new SourceModel
|
||||
{
|
||||
Site = SourceTypes.YouTube,
|
||||
Type = SourceTypes.YouTube,
|
||||
Name = name,
|
||||
Name = htmlClient.Data.Header.Title,
|
||||
Source = "feed",
|
||||
Url = url,
|
||||
Url = "feed",
|
||||
Enabled = true,
|
||||
Tags = $"{SourceTypes.YouTube}, {name}"
|
||||
Tags = $"{SourceTypes.YouTube},{htmlClient.Data.Header.Title}",
|
||||
YoutubeId = htmlClient.Data.Header.YoutubeChannelID ?? ""
|
||||
});
|
||||
|
||||
_icons.New(new IconModel
|
||||
{
|
||||
Id = Guid.NewGuid(),
|
||||
FileName = htmlClient.Data.Header.Image,
|
||||
SourceId = item.ID
|
||||
});
|
||||
|
||||
return SourceDto.Convert(item);
|
||||
@ -115,11 +149,48 @@ public class SourcesController : ControllerBase
|
||||
Url = $"https://twitch.tv/{name}",
|
||||
Source = "api",
|
||||
Enabled = true,
|
||||
Tags = $"{SourceTypes.Twitch}, {name}"
|
||||
Tags = $"{SourceTypes.Twitch},{name}"
|
||||
});
|
||||
return SourceDto.Convert(item);
|
||||
}
|
||||
|
||||
[HttpPost("new/github")]
|
||||
public SourceDto NewGithub(string url)
|
||||
{
|
||||
if (!url.Contains("github.com")) return new SourceDto();
|
||||
|
||||
var res = _sources.GetByUrl(url);
|
||||
if (res.ID != Guid.Empty) return SourceDto.Convert(res);
|
||||
|
||||
var slice = url.Split('/');
|
||||
|
||||
var pageReader = new HtmlPageReader(new HtmlPageReaderOptions
|
||||
{
|
||||
Url = url
|
||||
});
|
||||
pageReader.Parse();
|
||||
|
||||
var item = _sources.New(new SourceModel
|
||||
{
|
||||
Site = SourceTypes.GitHub,
|
||||
Type = SourceTypes.GitHub,
|
||||
Name = $"{slice[3]}/{slice[4]}",
|
||||
Url = url,
|
||||
Source = "feed",
|
||||
Enabled = true,
|
||||
Tags = $"{SourceTypes.GitHub}, {slice[3]}, {slice[4]}"
|
||||
});
|
||||
|
||||
_icons.New(new IconModel
|
||||
{
|
||||
Id = Guid.NewGuid(),
|
||||
FileName = pageReader.Data.Header.Image,
|
||||
SourceId = item.ID
|
||||
});
|
||||
|
||||
return SourceDto.Convert(item);
|
||||
}
|
||||
|
||||
[HttpGet("{id}")]
|
||||
public SourceDto GetById(Guid id)
|
||||
{
|
||||
|
@ -2,10 +2,12 @@ using Hangfire;
|
||||
using Hangfire.MemoryStorage;
|
||||
using HealthChecks.UI.Client;
|
||||
using Microsoft.AspNetCore.Diagnostics.HealthChecks;
|
||||
using Newsbot.Collector.Api;
|
||||
using Newsbot.Collector.Domain.Consts;
|
||||
using Newsbot.Collector.Domain.Models;
|
||||
using Newsbot.Collector.Services.Jobs;
|
||||
using Newsbot.Collector.Domain.Models.Config;
|
||||
using Serilog;
|
||||
|
||||
using ILogger = Serilog.ILogger;
|
||||
|
||||
var builder = WebApplication.CreateBuilder(args);
|
||||
@ -17,15 +19,7 @@ builder.Host.UseSerilog();
|
||||
var config = GetConfiguration();
|
||||
builder.Configuration.AddConfiguration(config);
|
||||
|
||||
Log.Logger = new LoggerConfiguration()
|
||||
.WriteTo.Console()
|
||||
.WriteTo.OpenTelemetry(
|
||||
config.GetValue<string>(ConfigConnectionStringConst.OpenTelemetry) ?? "",
|
||||
resourceAttributes: new Dictionary<string, object>
|
||||
{
|
||||
{ "service.name", "newsbot-collector-api" }
|
||||
})
|
||||
.CreateLogger();
|
||||
Log.Logger = GetLogger(config);
|
||||
|
||||
Log.Information("Starting up");
|
||||
// Configure Hangfire
|
||||
@ -43,6 +37,11 @@ builder.Services.AddSwaggerGen();
|
||||
|
||||
builder.Services.Configure<ConnectionStrings>(config.GetSection("ConnectionStrings"));
|
||||
|
||||
builder.Services.Configure<ConfigSectionConnectionStrings>(config.GetSection(ConfigSectionsConst.ConnectionStrings));
|
||||
builder.Services.Configure<ConfigSectionRssModel>(config.GetSection(ConfigSectionsConst.Rss));
|
||||
builder.Services.Configure<ConfigSectionYoutubeModel>(config.GetSection(ConfigSectionsConst.Youtube));
|
||||
//builder.Services.Configure<
|
||||
|
||||
var app = builder.Build();
|
||||
|
||||
// Configure the HTTP request pipeline.
|
||||
@ -55,7 +54,7 @@ if (config.GetValue<bool>("EnableSwagger"))
|
||||
app.UseHttpsRedirection();
|
||||
|
||||
app.UseHangfireDashboard();
|
||||
SetupRecurringJobs(config, Log.Logger);
|
||||
BackgroundJobs.SetupRecurringJobs(config);
|
||||
|
||||
app.UseAuthorization();
|
||||
|
||||
@ -77,21 +76,22 @@ static IConfiguration GetConfiguration()
|
||||
.Build();
|
||||
}
|
||||
|
||||
static void SetupRecurringJobs(IConfiguration configuration, ILogger logger)
|
||||
static ILogger GetLogger(IConfiguration configuration)
|
||||
{
|
||||
//RecurringJob.AddOrUpdate<HelloWorldJob>("Example", x => x.InitAndExecute(new HelloWorldJobOptions
|
||||
//{
|
||||
// Message = "Hello from the background!"
|
||||
//}), "0/1 * * * *");
|
||||
var otel = configuration.GetValue<string>(ConfigConnectionStringConst.OpenTelemetry) ?? "";
|
||||
|
||||
RecurringJob.AddOrUpdate<RssWatcherJob>("RSS", x => x.InitAndExecute(new RssWatcherJobOptions
|
||||
{
|
||||
ConnectionString = configuration.GetSection(ConfigConnectionStringConst.Database).Value ?? ""
|
||||
}), "15 0-23 * * *");
|
||||
if (otel == "")
|
||||
return Log.Logger = new LoggerConfiguration()
|
||||
.WriteTo.Console()
|
||||
.CreateLogger();
|
||||
|
||||
RecurringJob.AddOrUpdate<DiscordNotificationJob>("Discord Alerts", x =>
|
||||
x.InitAndExecute(new DiscordNotificationJobOptions
|
||||
return Log.Logger = new LoggerConfiguration()
|
||||
.WriteTo.Console()
|
||||
.WriteTo.OpenTelemetry(
|
||||
otel,
|
||||
resourceAttributes: new Dictionary<string, object>
|
||||
{
|
||||
DatabaseConnectionString = configuration.GetSection(ConfigConnectionStringConst.Database).Value ?? ""
|
||||
}), "5/10 * * * *");
|
||||
{ "service.name", "newsbot-collector-api" }
|
||||
})
|
||||
.CreateLogger();
|
||||
}
|
@ -6,45 +6,27 @@ SELECT 'up SQL query';
|
||||
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
||||
|
||||
-- Final Fantasy XIV Entries
|
||||
INSERT INTO sources VALUES
|
||||
(uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - NA', 'scrape', 'ffxiv', 'a', TRUE, 'https://na.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, na, lodestone');
|
||||
INSERT INTO sources VALUES
|
||||
(uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - JP', 'scrape', 'ffxiv', 'a', FALSE, 'https://jp.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, jp, lodestone');
|
||||
INSERT INTO sources VALUES
|
||||
(uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - EU', 'scrape', 'ffxiv', 'a', FALSE, 'https://eu.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, eu, lodestone');
|
||||
INSERT INTO sources VALUES
|
||||
(uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - FR', 'scrape', 'ffxiv', 'a', FALSE, 'https://fr.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, fr, lodestone');
|
||||
INSERT INTO sources VALUES
|
||||
(uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - DE', 'scrape', 'ffxiv', 'a', FALSE, 'https://de.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, de, lodestone');
|
||||
|
||||
-- Reddit Entries
|
||||
INSERT INTO sources VALUES
|
||||
(uuid_generate_v4(), 'reddit', 'dadjokes', 'feed', 'reddit', 'a', TRUE, 'https://reddit.com/r/dadjokes', 'reddit, dadjokes');
|
||||
INSERT INTO sources VALUES
|
||||
(uuid_generate_v4(), 'reddit', 'steamdeck', 'feed', 'reddit', 'a', TRUE, 'https://reddit.com/r/steamdeck', 'reddit, steam deck, steam, deck');
|
||||
|
||||
-- Youtube Entries
|
||||
INSERT INTO sources VALUES
|
||||
(uuid_generate_v4(), 'youtube', 'Game Grumps', 'feed', 'youtube', 'a', TRUE, 'https://www.youtube.com/user/GameGrumps', 'youtube, game grumps, game, grumps');
|
||||
|
||||
-- RSS Entries
|
||||
INSERT INTO sources VALUES
|
||||
(uuid_generate_v4(), 'steampowered', 'steam deck', 'feed', 'rss', 'a', TRUE, 'https://store.steampowered.com/feeds/news/app/1675200/?cc=US&l=english&snr=1_2108_9__2107', 'rss, steampowered, steam, deck, steam deck');
|
||||
|
||||
-- Twitch Entries
|
||||
INSERT INTO sources VALUES
|
||||
(uuid_generate_v4(), 'twitch', 'Nintendo', 'api', 'twitch', 'a', TRUE, 'https://twitch.tv/nintendo', 'twitch, nintendo');
|
||||
|
||||
INSERT INTO sources
|
||||
VALUES (uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - NA', 'scrape', 'ffxiv', 'a', TRUE,
|
||||
'https://na.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, na, lodestone');
|
||||
INSERT INTO sources
|
||||
VALUES (uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - JP', 'scrape', 'ffxiv', 'a', FALSE,
|
||||
'https://jp.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, jp, lodestone');
|
||||
INSERT INTO sources
|
||||
VALUES (uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - EU', 'scrape', 'ffxiv', 'a', FALSE,
|
||||
'https://eu.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, eu, lodestone');
|
||||
INSERT INTO sources
|
||||
VALUES (uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - FR', 'scrape', 'ffxiv', 'a', FALSE,
|
||||
'https://fr.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, fr, lodestone');
|
||||
INSERT INTO sources
|
||||
VALUES (uuid_generate_v4(), 'ffxiv', 'Final Fantasy XIV - DE', 'scrape', 'ffxiv', 'a', FALSE,
|
||||
'https://de.finalfantasyxiv.com/lodestone/', 'ffxiv, final, fantasy, xiv, de, lodestone');
|
||||
-- +goose StatementEnd
|
||||
|
||||
-- +goose Down
|
||||
-- +goose StatementBegin
|
||||
--SELECT 'down SQL query';
|
||||
|
||||
DELETE FROM sources where source = 'reddit' and name = 'dadjokes';
|
||||
DELETE FROM sources where source = 'reddit' and name = 'steamdeck';
|
||||
DELETE FROM sources where source = 'ffxiv';
|
||||
DELETE FROM sources WHERE source = 'twitch' and name = 'Nintendo';
|
||||
DELETE FROM sources WHERE source = 'youtube' and name = 'Game Grumps';
|
||||
DELETE FROM SOURCES WHERE source = 'rss' and name = 'steam deck';
|
||||
DELETE
|
||||
FROM sources
|
||||
where source = 'ffxiv';
|
||||
-- +goose StatementEnd
|
||||
|
@ -1,20 +0,0 @@
|
||||
-- +goose Up
|
||||
-- +goose StatementBegin
|
||||
INSERT INTO sources VALUES (
|
||||
uuid_generate_v4(),
|
||||
'rss',
|
||||
'Let''s Mosley',
|
||||
'feed',
|
||||
'rss',
|
||||
'podcast',
|
||||
TRUE,
|
||||
'https://anchor.fm/s/6c7aa4c4/podcast/rss',
|
||||
'rss,let''s mosley,fitnes,coach',
|
||||
FALSE);
|
||||
|
||||
-- +goose StatementEnd
|
||||
|
||||
-- +goose Down
|
||||
-- +goose StatementBegin
|
||||
DELETE FROM sources Where type = 'rss' And Name = 'Let''s Mosley'
|
||||
-- +goose StatementEnd
|
@ -9,7 +9,7 @@ namespace Newsbot.Collector.Database.Repositories;
|
||||
|
||||
public class SourcesTable : ISourcesRepository
|
||||
{
|
||||
private string _connectionString;
|
||||
private readonly string _connectionString;
|
||||
|
||||
public SourcesTable(string connectionString)
|
||||
{
|
||||
@ -19,25 +19,16 @@ public class SourcesTable : ISourcesRepository
|
||||
public SourcesTable(IConfiguration configuration)
|
||||
{
|
||||
var connstr = configuration.GetConnectionString("database");
|
||||
if (connstr is null)
|
||||
{
|
||||
connstr = "";
|
||||
}
|
||||
if (connstr is null) connstr = "";
|
||||
_connectionString = connstr;
|
||||
}
|
||||
|
||||
private IDbConnection OpenConnection(string connectionString)
|
||||
{
|
||||
var conn = new NpgsqlConnection(_connectionString);
|
||||
conn.Open();
|
||||
return conn;
|
||||
}
|
||||
|
||||
public SourceModel New(SourceModel model)
|
||||
{
|
||||
model.ID = Guid.NewGuid();
|
||||
using var conn = OpenConnection(_connectionString);
|
||||
var query = "Insert Into Sources (ID, Site, Name, Source, Type, Value, Enabled, Url, Tags) Values (@id ,@site,@name,@source,@type,@value,@enabled,@url,@tags);";
|
||||
var query =
|
||||
"Insert Into Sources (ID, Site, Name, Source, Type, Value, Enabled, Url, Tags, YoutubeId) Values (@id ,@site,@name,@source,@type,@value,@enabled,@url,@tags,@youtubeid);";
|
||||
conn.Execute(query, new
|
||||
{
|
||||
id = model.ID,
|
||||
@ -48,7 +39,8 @@ public class SourcesTable : ISourcesRepository
|
||||
model.Value,
|
||||
model.Enabled,
|
||||
model.Url,
|
||||
model.Tags
|
||||
model.Tags,
|
||||
model.YoutubeId
|
||||
});
|
||||
return model;
|
||||
}
|
||||
@ -61,10 +53,7 @@ public class SourcesTable : ISourcesRepository
|
||||
{
|
||||
id = ID
|
||||
});
|
||||
if (res.Count() == 0)
|
||||
{
|
||||
return new SourceModel();
|
||||
}
|
||||
if (res.Count() == 0) return new SourceModel();
|
||||
return res.First();
|
||||
}
|
||||
|
||||
@ -83,10 +72,7 @@ public class SourcesTable : ISourcesRepository
|
||||
name = Name
|
||||
});
|
||||
|
||||
if (res.Count() == 0)
|
||||
{
|
||||
return new SourceModel();
|
||||
}
|
||||
if (res.Count() == 0) return new SourceModel();
|
||||
return res.First();
|
||||
}
|
||||
|
||||
@ -96,14 +82,24 @@ public class SourcesTable : ISourcesRepository
|
||||
var query = "Select * from Sources WHERE name = @name and type = @type;";
|
||||
var res = conn.Query<SourceModel>(query, new
|
||||
{
|
||||
name = name,
|
||||
type = type
|
||||
name, type
|
||||
});
|
||||
|
||||
if (res.Count() == 0)
|
||||
{
|
||||
return new SourceModel();
|
||||
if (res.Count() == 0) return new SourceModel();
|
||||
return res.First();
|
||||
}
|
||||
|
||||
public SourceModel GetByUrl(string url)
|
||||
{
|
||||
using var conn = OpenConnection(_connectionString);
|
||||
var query = "Select * from Sources WHERE url = @url;";
|
||||
var res = conn.Query<SourceModel>(query, new
|
||||
{
|
||||
url
|
||||
});
|
||||
|
||||
if (res.ToList().Count == 0) return new SourceModel();
|
||||
|
||||
return res.First();
|
||||
}
|
||||
|
||||
@ -115,8 +111,7 @@ public class SourcesTable : ISourcesRepository
|
||||
Fetch Next @count Rows Only;";
|
||||
return conn.Query<SourceModel>(query, new
|
||||
{
|
||||
page = page * count,
|
||||
count = count
|
||||
page = page * count, count
|
||||
}).ToList();
|
||||
}
|
||||
|
||||
@ -126,8 +121,7 @@ public class SourcesTable : ISourcesRepository
|
||||
var query = "Select * From Sources where Source = @source Limit @limit;";
|
||||
return conn.Query<SourceModel>(query, new
|
||||
{
|
||||
source = source,
|
||||
limit = limit
|
||||
source, limit
|
||||
}).ToList();
|
||||
}
|
||||
|
||||
@ -137,28 +131,44 @@ public class SourcesTable : ISourcesRepository
|
||||
var query = "Select * From Sources where Type = @type Limit @limit;";
|
||||
return conn.Query<SourceModel>(query, new
|
||||
{
|
||||
type = type,
|
||||
limit = limit
|
||||
type, limit
|
||||
}).ToList();
|
||||
}
|
||||
public int Disable(Guid ID)
|
||||
|
||||
public int Disable(Guid id)
|
||||
{
|
||||
using var conn = OpenConnection(_connectionString);
|
||||
var query = "Update Sources Set Enabled = FALSE where ID = @id;";
|
||||
return conn.Execute(query, new
|
||||
{
|
||||
id = ID
|
||||
id
|
||||
});
|
||||
}
|
||||
|
||||
public int Enable(Guid ID)
|
||||
public int Enable(Guid id)
|
||||
{
|
||||
using var conn = OpenConnection(_connectionString);
|
||||
var query = "Update Sources Set Enabled = TRUE where ID = @id;";
|
||||
return conn.Execute(query, new
|
||||
{
|
||||
id = ID
|
||||
id
|
||||
});
|
||||
}
|
||||
|
||||
public int UpdateYoutubeId(Guid id, string youtubeId)
|
||||
{
|
||||
using var conn = OpenConnection(_connectionString);
|
||||
var query = "Update Sources Set youtubeid = @youtubeId where ID = @id;";
|
||||
return conn.Execute(query, new
|
||||
{
|
||||
id, youtubeId
|
||||
});
|
||||
}
|
||||
|
||||
private IDbConnection OpenConnection(string connectionString)
|
||||
{
|
||||
var conn = new NpgsqlConnection(_connectionString);
|
||||
conn.Open();
|
||||
return conn;
|
||||
}
|
||||
}
|
@ -1,9 +0,0 @@
|
||||
namespace Newsbot.Collector.Domain.Consts;
|
||||
|
||||
public class ConfigRedditConst
|
||||
{
|
||||
public const string IsEnabled = "Reddit:IsEnabled";
|
||||
public const string PullHot = "Reddit:PullHot";
|
||||
public const string PullNsfw = "Reddit:PullNsfw";
|
||||
public const string PullTop = "Reddit:PullTop";
|
||||
}
|
@ -1,4 +1,3 @@
|
||||
using System.Globalization;
|
||||
using Newsbot.Collector.Domain.Models;
|
||||
|
||||
namespace Newsbot.Collector.Domain.Interfaces;
|
||||
@ -10,9 +9,11 @@ public interface ISourcesRepository
|
||||
public SourceModel GetByID(string ID);
|
||||
public SourceModel GetByName(string name);
|
||||
public SourceModel GetByNameAndType(string name, string type);
|
||||
SourceModel GetByUrl(string url);
|
||||
public List<SourceModel> List(int page, int count);
|
||||
public List<SourceModel> ListBySource(string source, int limit);
|
||||
public List<SourceModel> ListByType(string type, int limit = 25);
|
||||
public int Disable(Guid ID);
|
||||
public int Enable(Guid ID);
|
||||
public int Disable(Guid id);
|
||||
public int Enable(Guid id);
|
||||
public int UpdateYoutubeId(Guid id, string youtubeId);
|
||||
}
|
@ -42,9 +42,10 @@ public class DiscordWebHookModel
|
||||
|
||||
public class IconModel
|
||||
{
|
||||
public Guid ID { get; set; }
|
||||
public Guid Id { get; set; }
|
||||
public string FileName { get; set; } = "";
|
||||
public string Site { get; set; } = "";
|
||||
public Guid SourceId { get; set; }
|
||||
}
|
||||
|
||||
public class SettingModel
|
||||
@ -61,7 +62,7 @@ public class SourceModel
|
||||
public string Site { get; set; } = "";
|
||||
public string Name { get; set; } = "";
|
||||
|
||||
// Source use to deinfe the worker to query with but moving to Type as it was not used really.
|
||||
// Source use to define the worker to query with but moving to Type as it was not used really.
|
||||
public string Source { get; set; } = "";
|
||||
public string Type { get; set; } = "";
|
||||
public string Value { get; set; } = "";
|
||||
@ -69,6 +70,7 @@ public class SourceModel
|
||||
public string Url { get; set; } = "";
|
||||
public string Tags { get; set; } = "";
|
||||
public bool Deleted { get; set; }
|
||||
public string YoutubeId { get; set; } = "";
|
||||
}
|
||||
|
||||
public class SubscriptionModel
|
||||
|
@ -5,12 +5,11 @@ namespace Newsbot.Collector.Services.HtmlParser;
|
||||
|
||||
public class HeadParserClient
|
||||
{
|
||||
private const string XPathMetaTag = "//head/meta";
|
||||
private const string XPathHeadMetaTag = "//head/meta";
|
||||
private const string XPathBodyMetaTag = "//body/meta";
|
||||
private const string XPathLinkTag = "//head/link";
|
||||
|
||||
public HeadParserModel Data { get; set; }
|
||||
|
||||
private string _htmlContent;
|
||||
private readonly string _htmlContent;
|
||||
|
||||
public HeadParserClient(string htmlContent, bool useBrowser = false)
|
||||
{
|
||||
@ -18,6 +17,8 @@ public class HeadParserClient
|
||||
Data = new HeadParserModel();
|
||||
}
|
||||
|
||||
public HeadParserModel Data { get; set; }
|
||||
|
||||
public void Parse()
|
||||
{
|
||||
Data.Title = GetMetaTitle();
|
||||
@ -36,9 +37,23 @@ public class HeadParserClient
|
||||
var htmlDoc = new HtmlDocument();
|
||||
htmlDoc.LoadHtml(_htmlContent);
|
||||
|
||||
var tags = htmlDoc.DocumentNode.SelectNodes(XPathMetaTag).ToList();
|
||||
var allTags = new List<HtmlNode>();
|
||||
|
||||
return tags;
|
||||
var headerTags = htmlDoc.DocumentNode.SelectNodes(XPathHeadMetaTag).ToList();
|
||||
allTags.AddRange(headerTags);
|
||||
|
||||
try
|
||||
{
|
||||
var bodyTags = htmlDoc.DocumentNode.SelectNodes(XPathBodyMetaTag).ToList();
|
||||
allTags.AddRange(bodyTags);
|
||||
}
|
||||
catch
|
||||
{
|
||||
// no tags found in the body and that's ok.
|
||||
// we check the body thanks to Youtube.
|
||||
}
|
||||
|
||||
return allTags;
|
||||
}
|
||||
|
||||
private List<HtmlNode> CollectLinkTags()
|
||||
@ -53,13 +68,13 @@ public class HeadParserClient
|
||||
{
|
||||
foreach (var meta in html)
|
||||
{
|
||||
if (meta.Attributes.Count == 0) continue;
|
||||
;
|
||||
//Console.WriteLine($"Name={meta.Attributes[0].Name} & Value={meta.Attributes[0].Value}");
|
||||
if (meta.Attributes[0].Value.Contains(Tag) == false)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if (meta.Attributes[0].Value.Contains(Tag) == false) continue;
|
||||
return meta.Attributes[1].Value;
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
@ -68,62 +83,62 @@ public class HeadParserClient
|
||||
foreach (var tag in tags)
|
||||
{
|
||||
var res = GetTagValue(tag, htmlTags);
|
||||
if (res == "")
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if (res == "") continue;
|
||||
return res;
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
public string GetMetaTitle()
|
||||
{
|
||||
var htmlTags = CollectMetaTags();
|
||||
string[] tags = new string[] { "twitter:title", "og:title", "title" };
|
||||
string[] tags = { "twitter:title", "og:title", "title" };
|
||||
return FindFirstResult(tags, htmlTags);
|
||||
}
|
||||
|
||||
public string GetMetaDescription()
|
||||
{
|
||||
var htmlTags = CollectMetaTags();
|
||||
string[] tags = new string[] { "twitter:description", "og:description", "description" };
|
||||
string[] tags = { "twitter:description", "og:description", "description" };
|
||||
return FindFirstResult(tags, htmlTags);
|
||||
}
|
||||
|
||||
public string GetMetaImage()
|
||||
{
|
||||
var htmlTags = CollectMetaTags();
|
||||
string[] tags = new string[] { "twitter:image", "og:image", "image" };
|
||||
string[] tags = { "twitter:image", "og:image", "image" };
|
||||
return FindFirstResult(tags, htmlTags);
|
||||
}
|
||||
|
||||
public string GetMetaUrl()
|
||||
{
|
||||
var htmlTags = CollectMetaTags();
|
||||
string[] tags = new string[] { "twitter:url", "og:url", "url" };
|
||||
string[] tags = { "twitter:url", "og:url", "url" };
|
||||
return FindFirstResult(tags, htmlTags);
|
||||
}
|
||||
|
||||
public string GetMetaPageType()
|
||||
{
|
||||
var htmlTags = CollectMetaTags();
|
||||
string[] tags = new string[] { "og:type", "type" };
|
||||
string[] tags = { "og:type", "type" };
|
||||
return FindFirstResult(tags, htmlTags);
|
||||
}
|
||||
|
||||
public string GetMetaColorTheme()
|
||||
{
|
||||
var htmlTags = CollectMetaTags();
|
||||
string[] tags = new string[] { "theme-color" };
|
||||
string[] tags = { "theme-color" };
|
||||
return FindFirstResult(tags, htmlTags);
|
||||
}
|
||||
|
||||
public string GetYouTubeChannelId()
|
||||
{
|
||||
var htmlTags = CollectMetaTags();
|
||||
string[] tags = new string[] { "channelId" };
|
||||
return FindFirstResult(tags, htmlTags);
|
||||
string[] tags = { "og:url", "channelId" };
|
||||
var results = FindFirstResult(tags, htmlTags);
|
||||
var id = results.Replace("https://www.youtube.com/channel/", "");
|
||||
return id;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@ -134,25 +149,20 @@ public class HeadParserClient
|
||||
public string GetSiteFeed()
|
||||
{
|
||||
var htmlTags = CollectLinkTags();
|
||||
var tags = new string[] { "alternate" };
|
||||
var tags = new[] { "alternate" };
|
||||
try
|
||||
{
|
||||
var attr = FindFirstAttribute(tags, htmlTags);
|
||||
foreach (var item in attr)
|
||||
{
|
||||
if (item.Name != "href")
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if (item.Name != "href") continue;
|
||||
|
||||
var uri = item.Value;
|
||||
if (uri.StartsWith("//"))
|
||||
{
|
||||
uri = uri.Replace("//", "https://");
|
||||
}
|
||||
if (uri.StartsWith("//")) uri = uri.Replace("//", "https://");
|
||||
|
||||
return uri;
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
catch
|
||||
@ -165,7 +175,6 @@ public class HeadParserClient
|
||||
private HtmlAttributeCollection FindFirstAttribute(string[] tags, List<HtmlNode> htmlTags)
|
||||
{
|
||||
foreach (var tag in tags)
|
||||
{
|
||||
try
|
||||
{
|
||||
var res = GetValidAttribute(tag, htmlTags);
|
||||
@ -175,7 +184,7 @@ public class HeadParserClient
|
||||
{
|
||||
// Nothing was found in the given tag but we will keep looking till we finish all the entries.
|
||||
}
|
||||
}
|
||||
|
||||
throw new MissingHeaderValueException("Unable to find the requested value");
|
||||
}
|
||||
|
||||
@ -183,12 +192,10 @@ public class HeadParserClient
|
||||
{
|
||||
foreach (var meta in html)
|
||||
{
|
||||
if (meta.Attributes[0].Value.Contains(Tag) == false)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if (meta.Attributes[0].Value.Contains(Tag) == false) continue;
|
||||
return meta.Attributes;
|
||||
}
|
||||
|
||||
throw new MissingHeaderValueException("Site does not expose requested tag.");
|
||||
}
|
||||
}
|
@ -1,25 +1,32 @@
|
||||
using HtmlAgilityPack;
|
||||
using Newsbot.Collector.Domain.Exceptions;
|
||||
|
||||
namespace Newsbot.Collector.Services.HtmlParser;
|
||||
|
||||
public class HtmlPageReaderOptions
|
||||
{
|
||||
public string? Url { get; init; }
|
||||
public string? SourceCode { get; init; }
|
||||
}
|
||||
|
||||
public class HtmlPageReader
|
||||
{
|
||||
private readonly HeadParserClient _headClient;
|
||||
private readonly string _siteContent;
|
||||
|
||||
public HtmlData Data { get; set; }
|
||||
|
||||
private HeadParserClient _headClient;
|
||||
|
||||
private string _siteContent;
|
||||
|
||||
public HtmlPageReader(string pageUrl)
|
||||
public HtmlPageReader(HtmlPageReaderOptions options)
|
||||
{
|
||||
_siteContent = ReadSiteContent(pageUrl);
|
||||
_headClient = new HeadParserClient(_siteContent);
|
||||
if (options.SourceCode is not null) _siteContent = options.SourceCode;
|
||||
|
||||
if (options.Url is not null) _siteContent = ReadSiteContent(options.Url);
|
||||
|
||||
if (_siteContent is null) throw new Exception("SiteContent was not filled and expected.");
|
||||
|
||||
_headClient = new HeadParserClient(_siteContent);
|
||||
Data = new HtmlData();
|
||||
}
|
||||
|
||||
public HtmlData Data { get; set; }
|
||||
|
||||
public void Parse()
|
||||
{
|
||||
_headClient.Parse();
|
||||
@ -32,7 +39,7 @@ public class HtmlPageReader
|
||||
var html = client.GetStringAsync(url);
|
||||
html.Wait();
|
||||
|
||||
var content = html.Result.ToString();
|
||||
var content = html.Result;
|
||||
return content;
|
||||
}
|
||||
|
||||
@ -47,24 +54,14 @@ public class HtmlPageReader
|
||||
htmlDoc.LoadHtml(_siteContent);
|
||||
var links = htmlDoc.DocumentNode.SelectNodes("//div[contains(@class, 'article-text')]").ToList();
|
||||
|
||||
if (links.Count == 0)
|
||||
{
|
||||
throw new Exception("Unable to parse body. Tag is unkown.");
|
||||
}
|
||||
if (links.Count == 0) throw new Exception("Unable to parse body. Tag is unknown.");
|
||||
|
||||
if (links.Count >= 2)
|
||||
{
|
||||
throw new Exception("Too many results back for the body");
|
||||
}
|
||||
if (links.Count >= 2) throw new Exception("Too many results back for the body");
|
||||
|
||||
var content = new List<string>();
|
||||
foreach (var item in links[0].ChildNodes)
|
||||
{
|
||||
if (item.Name == "p")
|
||||
{
|
||||
content.Add(item.InnerText);
|
||||
}
|
||||
}
|
||||
//var content = new List<string>();
|
||||
//foreach (var item in links[0].ChildNodes)
|
||||
// if (item.Name == "p")
|
||||
// content.Add(item.InnerText);
|
||||
|
||||
return links;
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
using Newsbot.Collector.Database.Repositories;
|
||||
using Newsbot.Collector.Domain.Interfaces;
|
||||
using Newsbot.Collector.Domain.Models;
|
||||
using Newsbot.Collector.Domain.Models.Config;
|
||||
using Newsbot.Collector.Services.Notifications.Discord;
|
||||
using Serilog;
|
||||
|
||||
@ -8,7 +9,9 @@ namespace Newsbot.Collector.Services.Jobs;
|
||||
|
||||
public class DiscordNotificationJobOptions
|
||||
{
|
||||
public string? DatabaseConnectionString { get; set; }
|
||||
public ConfigSectionConnectionStrings? ConnectionStrings { get; set; }
|
||||
public ConfigSectionNotificationsDiscord? Config { get; set; }
|
||||
|
||||
}
|
||||
|
||||
public class DiscordNotificationJob
|
||||
@ -19,6 +22,7 @@ public class DiscordNotificationJob
|
||||
private IDiscordWebHooksRepository _webhook;
|
||||
private ISourcesRepository _sources;
|
||||
private ISubscriptionRepository _subs;
|
||||
private IIconsRepository _icons;
|
||||
|
||||
public DiscordNotificationJob()
|
||||
{
|
||||
@ -27,15 +31,21 @@ public class DiscordNotificationJob
|
||||
_webhook = new DiscordWebhooksTable("");
|
||||
_sources = new SourcesTable("");
|
||||
_subs = new SubscriptionsTable("");
|
||||
_icons = new IconsTable("");
|
||||
}
|
||||
|
||||
public void InitAndExecute(DiscordNotificationJobOptions options)
|
||||
{
|
||||
_queue = new DiscordQueueTable(options.DatabaseConnectionString ?? "");
|
||||
_article = new ArticlesTable(options.DatabaseConnectionString ?? "");
|
||||
_webhook = new DiscordWebhooksTable(options.DatabaseConnectionString ?? "");
|
||||
_sources = new SourcesTable(options.DatabaseConnectionString ?? "");
|
||||
_subs = new SubscriptionsTable(options.DatabaseConnectionString ?? "");
|
||||
options.ConnectionStrings ??= new ConfigSectionConnectionStrings();
|
||||
options.Config ??= new ConfigSectionNotificationsDiscord();
|
||||
|
||||
_queue = new DiscordQueueTable(options.ConnectionStrings.Database ?? "");
|
||||
_article = new ArticlesTable(options.ConnectionStrings.Database ?? "");
|
||||
_webhook = new DiscordWebhooksTable(options.ConnectionStrings.Database ?? "");
|
||||
_sources = new SourcesTable(options.ConnectionStrings.Database ?? "");
|
||||
_subs = new SubscriptionsTable(options.ConnectionStrings.Database ?? "");
|
||||
_icons = new IconsTable(options.ConnectionStrings.Database ?? "");
|
||||
|
||||
|
||||
Execute();
|
||||
}
|
||||
@ -59,6 +69,8 @@ public class DiscordNotificationJob
|
||||
continue;
|
||||
}
|
||||
|
||||
var sourceIcon = _icons.GetBySourceId(sourceDetails.ID);
|
||||
|
||||
// Find all the subscriptions for that source
|
||||
var allSubscriptions = _subs.ListBySourceID(sourceDetails.ID);
|
||||
|
||||
@ -74,7 +86,7 @@ public class DiscordNotificationJob
|
||||
var client = new DiscordWebhookClient(discordDetails.Url);
|
||||
try
|
||||
{
|
||||
client.SendMessage(GenerateDiscordMessage(sourceDetails, articleDetails));
|
||||
client.SendMessage(GenerateDiscordMessage(sourceDetails, articleDetails, sourceIcon));
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
@ -89,7 +101,7 @@ public class DiscordNotificationJob
|
||||
}
|
||||
}
|
||||
|
||||
public DiscordMessage GenerateDiscordMessage(SourceModel source, ArticlesModel article)
|
||||
public DiscordMessage GenerateDiscordMessage(SourceModel source, ArticlesModel article, IconModel icon)
|
||||
{
|
||||
var embed = new DiscordMessageEmbed
|
||||
{
|
||||
@ -99,6 +111,7 @@ public class DiscordNotificationJob
|
||||
Author = new DiscordMessageEmbedAuthor
|
||||
{
|
||||
Name = article.AuthorName,
|
||||
IconUrl = icon.FileName
|
||||
},
|
||||
Footer = new DiscordMessageEmbedFooter
|
||||
{
|
||||
|
@ -1,18 +1,23 @@
|
||||
using System.ServiceModel.Syndication;
|
||||
using System.Xml;
|
||||
using Newsbot.Collector.Database.Repositories;
|
||||
using Newsbot.Collector.Domain.Consts;
|
||||
using Newsbot.Collector.Domain.Interfaces;
|
||||
using Newsbot.Collector.Domain.Models;
|
||||
using Newsbot.Collector.Domain.Models.Config;
|
||||
using Newsbot.Collector.Services.HtmlParser;
|
||||
|
||||
namespace Newsbot.Collector.Services.Jobs;
|
||||
|
||||
public class GithubWatcherJobOptions
|
||||
{
|
||||
public string ConnectionString { get; set; } = "";
|
||||
public ConfigSectionConnectionStrings? ConnectionStrings { get; set; }
|
||||
|
||||
//public string ConnectionString { get; set; } = "";
|
||||
public bool FeaturePullReleases { get; set; } = false;
|
||||
|
||||
public bool FeaturePullCommits { get; set; } = false;
|
||||
public bool PullIssues { get; set; } = false;
|
||||
//public bool PullIssues { get; set; } = false;
|
||||
}
|
||||
|
||||
public class GithubWatcherJob
|
||||
@ -28,24 +33,24 @@ public class GithubWatcherJob
|
||||
_source = new SourcesTable("");
|
||||
}
|
||||
|
||||
public void Init(GithubWatcherJobOptions options)
|
||||
{
|
||||
_articles = new ArticlesTable(options.ConnectionString);
|
||||
_queue = new DiscordQueueTable(options.ConnectionString);
|
||||
_source = new SourcesTable(options.ConnectionString);
|
||||
}
|
||||
|
||||
public void InitAndExecute(GithubWatcherJobOptions options)
|
||||
{
|
||||
Init(options);
|
||||
options.ConnectionStrings ??= new ConfigSectionConnectionStrings();
|
||||
_articles = new ArticlesTable(options.ConnectionStrings.Database ?? "");
|
||||
_queue = new DiscordQueueTable(options.ConnectionStrings.Database ?? "");
|
||||
_source = new SourcesTable(options.ConnectionStrings.Database ?? "");
|
||||
|
||||
Execute();
|
||||
}
|
||||
|
||||
private void Execute()
|
||||
{
|
||||
_source.ListBySource(SourceTypes.GitHub, 25);
|
||||
|
||||
// query sources for things to pull
|
||||
var items = new List<ArticlesModel>();
|
||||
|
||||
|
||||
items.AddRange(Collect(new Uri("https://github.com/jtom38/dvb")));
|
||||
|
||||
// query */commits/master.atom
|
||||
@ -56,7 +61,7 @@ public class GithubWatcherJob
|
||||
{
|
||||
var items = new List<ArticlesModel>();
|
||||
|
||||
Guid placeHolderId = Guid.NewGuid();
|
||||
var placeHolderId = Guid.NewGuid();
|
||||
// query */release.atom
|
||||
items.AddRange(CollectItems($"{url.AbsoluteUri}/releases.atom", placeHolderId));
|
||||
items.AddRange(CollectItems($"{url.AbsoluteUri}/master.atom", placeHolderId));
|
||||
@ -75,12 +80,12 @@ public class GithubWatcherJob
|
||||
{
|
||||
var itemUrl = item.Links[0].Uri.AbsoluteUri;
|
||||
var exits = _articles.GetByUrl(itemUrl);
|
||||
if (exits.ID != Guid.Empty)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if (exits.ID != Guid.Empty) continue;
|
||||
|
||||
var parser = new HtmlPageReader(itemUrl);
|
||||
var parser = new HtmlPageReader(new HtmlPageReaderOptions
|
||||
{
|
||||
Url = itemUrl
|
||||
});
|
||||
parser.Parse();
|
||||
|
||||
try
|
||||
@ -104,6 +109,7 @@ public class GithubWatcherJob
|
||||
Console.WriteLine(e);
|
||||
}
|
||||
}
|
||||
|
||||
return items;
|
||||
}
|
||||
}
|
@ -4,6 +4,7 @@ using Newsbot.Collector.Database.Repositories;
|
||||
using Newsbot.Collector.Domain.Consts;
|
||||
using Newsbot.Collector.Domain.Interfaces;
|
||||
using Newsbot.Collector.Domain.Models;
|
||||
using Newsbot.Collector.Domain.Models.Config;
|
||||
using Newsbot.Collector.Services.HtmlParser;
|
||||
using Serilog;
|
||||
|
||||
@ -11,13 +12,18 @@ namespace Newsbot.Collector.Services.Jobs;
|
||||
|
||||
public class RssWatcherJobOptions
|
||||
{
|
||||
public string? ConnectionString { get; init; }
|
||||
public string? OpenTelemetry { get; init; }
|
||||
//public string? ConnectionString { get; init; }
|
||||
//public string? OpenTelemetry { get; init; }
|
||||
|
||||
public ConfigSectionConnectionStrings? ConnectionStrings { get; set; }
|
||||
public ConfigSectionRssModel? Config { get; set; }
|
||||
}
|
||||
|
||||
// This class was made to work with Hangfire and it does not support constructors.
|
||||
public class RssWatcherJob
|
||||
{
|
||||
private const string JobName = "RssWatcherJob";
|
||||
|
||||
private IArticlesRepository _articles;
|
||||
private ILogger _logger;
|
||||
private IDiscordQueueRepository _queue;
|
||||
@ -28,62 +34,53 @@ public class RssWatcherJob
|
||||
_articles = new ArticlesTable("");
|
||||
_queue = new DiscordQueueTable("");
|
||||
_source = new SourcesTable("");
|
||||
_logger = GetLogger("");
|
||||
_logger = JobLogger.GetLogger("", JobName);
|
||||
}
|
||||
|
||||
public void InitAndExecute(RssWatcherJobOptions options)
|
||||
{
|
||||
Init(options);
|
||||
options.ConnectionStrings ??= new ConfigSectionConnectionStrings();
|
||||
options.Config ??= new ConfigSectionRssModel();
|
||||
|
||||
_logger.Information("RssWatcherJob - Job was triggered");
|
||||
_logger.Information("RssWatcherJob - Setting up the job");
|
||||
_articles = new ArticlesTable(options.ConnectionStrings.Database ?? "");
|
||||
_queue = new DiscordQueueTable(options.ConnectionStrings.Database ?? "");
|
||||
_source = new SourcesTable(options.ConnectionStrings.Database ?? "");
|
||||
_logger = JobLogger.GetLogger(options.ConnectionStrings.OpenTelemetry ?? "", JobName);
|
||||
|
||||
_logger.Information($"{JobName} - Job was triggered");
|
||||
if (!options.Config.IsEnabled)
|
||||
{
|
||||
_logger.Information($"{JobName} - Going to exit because feature flag is off.");
|
||||
return;
|
||||
}
|
||||
|
||||
_logger.Information($"{JobName} - Setting up the job");
|
||||
|
||||
Execute();
|
||||
}
|
||||
|
||||
private ILogger GetLogger(string connectionString)
|
||||
{
|
||||
return Log.Logger = new LoggerConfiguration()
|
||||
.WriteTo.Console()
|
||||
.WriteTo.OpenTelemetry(
|
||||
connectionString,
|
||||
resourceAttributes: new Dictionary<string, object>
|
||||
{
|
||||
{ "service.name", "newsbot-collector-api" },
|
||||
{ "Job", "RssWatcherJob" }
|
||||
})
|
||||
.CreateLogger();
|
||||
}
|
||||
|
||||
public void Init(RssWatcherJobOptions options)
|
||||
{
|
||||
_articles = new ArticlesTable(options.ConnectionString ?? "");
|
||||
_queue = new DiscordQueueTable(options.ConnectionString ?? "");
|
||||
_source = new SourcesTable(options.ConnectionString ?? "");
|
||||
_logger = GetLogger(options.OpenTelemetry ?? "");
|
||||
}
|
||||
|
||||
public void Execute()
|
||||
{
|
||||
var articles = new List<ArticlesModel>();
|
||||
|
||||
_logger.Information("RssWatcherJob - Requesting sources");
|
||||
_logger.Information($"{JobName} - Requesting sources");
|
||||
var sources = _source.ListByType(SourceTypes.Rss);
|
||||
_logger.Information($"RssWatcherJob - Got {sources.Count} back");
|
||||
_logger.Information($"{JobName} - Got {sources.Count} back");
|
||||
|
||||
foreach (var source in sources)
|
||||
{
|
||||
_logger.Information($"RssWatcherJob - Starting to process '{source.Name}'");
|
||||
_logger.Information("RssWatcherJob - Starting to request feed to be processed");
|
||||
_logger.Information($"{JobName} - Starting to process '{source.Name}'");
|
||||
_logger.Information($"{JobName} - Starting to request feed to be processed");
|
||||
var results = Collect(source.Url, source.ID);
|
||||
|
||||
_logger.Information($"RssWatcherJob - Collected {results.Count} posts");
|
||||
_logger.Information($"{JobName} - Collected {results.Count} posts");
|
||||
articles.AddRange(results);
|
||||
}
|
||||
|
||||
_logger.Information("RssWatcherJob - Sending posts over to the database");
|
||||
_logger.Information($"{JobName} - Sending posts over to the database");
|
||||
UpdateDatabase(articles);
|
||||
|
||||
_logger.Information("RssWatcherJob - Done!");
|
||||
_logger.Information($"{JobName} - Done!");
|
||||
}
|
||||
|
||||
public List<ArticlesModel> Collect(string url, Guid sourceId, int sleep = 3000)
|
||||
@ -101,7 +98,10 @@ public class RssWatcherJob
|
||||
// If we have, skip and save the site bandwidth
|
||||
if (IsThisUrlKnown(articleUrl)) continue;
|
||||
|
||||
var meta = new HtmlPageReader(articleUrl);
|
||||
var meta = new HtmlPageReader(new HtmlPageReaderOptions
|
||||
{
|
||||
Url = articleUrl
|
||||
});
|
||||
meta.Parse();
|
||||
|
||||
var article = new ArticlesModel
|
||||
|
@ -1,19 +1,18 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\Newsbot.Collector.Domain\Newsbot.Collector.Domain.csproj"/>
|
||||
<ProjectReference Include="..\Newsbot.Collector.Database\Newsbot.Collector.Database.csproj"/>
|
||||
<ProjectReference Include="..\Newsbot.Collector.Domain\Newsbot.Collector.Domain.csproj" />
|
||||
<ProjectReference Include="..\Newsbot.Collector.Database\Newsbot.Collector.Database.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Google.Apis.YouTube.v3" Version="1.60.0.2945"/>
|
||||
<PackageReference Include="HtmlAgilityPack" Version="1.11.46"/>
|
||||
<PackageReference Include="Selenium.WebDriver" Version="4.8.1"/>
|
||||
<PackageReference Include="Selenium.WebDriver.GeckoDriver" Version="0.32.2"/>
|
||||
<PackageReference Include="Serilog" Version="2.12.0"/>
|
||||
<PackageReference Include="Serilog.Sinks.Console" Version="4.1.0"/>
|
||||
<PackageReference Include="Serilog.Sinks.OpenTelemetry" Version="1.0.0-dev-00113"/>
|
||||
<PackageReference Include="System.ServiceModel.Syndication" Version="7.0.0"/>
|
||||
<PackageReference Include="HtmlAgilityPack" Version="1.11.46" />
|
||||
<PackageReference Include="Selenium.WebDriver" Version="4.8.1" />
|
||||
<PackageReference Include="Selenium.WebDriver.GeckoDriver" Version="0.32.2" />
|
||||
<PackageReference Include="Serilog" Version="2.12.0" />
|
||||
<PackageReference Include="Serilog.Sinks.Console" Version="4.1.0" />
|
||||
<PackageReference Include="Serilog.Sinks.OpenTelemetry" Version="1.0.0-dev-00113" />
|
||||
<PackageReference Include="System.ServiceModel.Syndication" Version="7.0.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<PropertyGroup>
|
||||
|
@ -8,12 +8,11 @@ namespace Newsbot.Collector.Services.Notifications.Discord;
|
||||
|
||||
public class DiscordWebhookClient : IDiscordNotificatioClient
|
||||
{
|
||||
|
||||
private string[] _webhooks;
|
||||
private readonly string[] _webhooks;
|
||||
|
||||
public DiscordWebhookClient(string webhook)
|
||||
{
|
||||
_webhooks = new string[] { webhook };
|
||||
_webhooks = new[] { webhook };
|
||||
}
|
||||
|
||||
public DiscordWebhookClient(string[] webhooks)
|
||||
@ -23,25 +22,22 @@ public class DiscordWebhookClient : IDiscordNotificatioClient
|
||||
|
||||
public void SendMessage(DiscordMessage payload)
|
||||
{
|
||||
if (payload.Embeds is not null)
|
||||
{
|
||||
MessageValidation.IsEmbedFooterValid(payload.Embeds);
|
||||
}
|
||||
if (payload.Embeds is not null) MessageValidation.IsEmbedFooterValid(payload.Embeds);
|
||||
|
||||
foreach (var webhook in _webhooks)
|
||||
{
|
||||
var jsonRaw = JsonConvert.SerializeObject(payload, Newtonsoft.Json.Formatting.None, new JsonSerializerSettings { NullValueHandling = NullValueHandling.Ignore });
|
||||
var jsonRaw = JsonConvert.SerializeObject(payload, Formatting.None,
|
||||
new JsonSerializerSettings { NullValueHandling = NullValueHandling.Ignore });
|
||||
using StringContent jsonContent = new(jsonRaw, Encoding.UTF8, "application/json");
|
||||
|
||||
using var client = new HttpClient();
|
||||
var resp = client.PostAsync(webhook, jsonContent);
|
||||
resp.Wait();
|
||||
|
||||
// can be 204 or a message, might be 200
|
||||
Console.WriteLine(resp.Result.StatusCode);
|
||||
if (resp.Result.StatusCode != HttpStatusCode.NoContent)
|
||||
{
|
||||
throw new Exception("Message was not accepted by the sever.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -34,6 +34,11 @@ public class DiscordNotificationJobTest
|
||||
Thumbnail = "https://cdn.arstechnica.net/wp-content/uploads/2023/03/GettyImages-944827400-800x534.jpg",
|
||||
Description = "Please work",
|
||||
AuthorName = "No one knows"
|
||||
},
|
||||
new IconModel
|
||||
{
|
||||
Id = Guid.NewGuid(),
|
||||
FileName = "https://www.redditstatic.com/desktop2x/img/favicon/android-icon-192x192.png"
|
||||
});
|
||||
webhookClient.SendMessage(msg);
|
||||
}
|
||||
|
@ -1,4 +1,6 @@
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Newsbot.Collector.Domain.Models;
|
||||
using Newsbot.Collector.Domain.Models.Config;
|
||||
using Newsbot.Collector.Services.Jobs;
|
||||
|
||||
namespace Newsbot.Collector.Tests.Jobs;
|
||||
@ -7,8 +9,12 @@ public class GithubWatcherJobTests
|
||||
{
|
||||
private IConfiguration GetConfiguration()
|
||||
{
|
||||
var inMemorySettings = new Dictionary<string, string> {
|
||||
{"ConnectionStrings:database", "Host=localhost;Username=postgres;Password=postgres;Database=postgres;sslmode=disable"}
|
||||
var inMemorySettings = new Dictionary<string, string>
|
||||
{
|
||||
{
|
||||
"ConnectionStrings:database",
|
||||
"Host=localhost;Username=postgres;Password=postgres;Database=postgres;sslmode=disable"
|
||||
}
|
||||
};
|
||||
|
||||
IConfiguration configuration = new ConfigurationBuilder()
|
||||
@ -26,9 +32,12 @@ public class GithubWatcherJobTests
|
||||
public void CanPullAFeed()
|
||||
{
|
||||
var client = new GithubWatcherJob();
|
||||
client.Init(new GithubWatcherJobOptions
|
||||
client.InitAndExecute(new GithubWatcherJobOptions
|
||||
{
|
||||
ConnectionString = ConnectionString(),
|
||||
ConnectionStrings = new ConfigSectionConnectionStrings
|
||||
{
|
||||
Database = ConnectionString()
|
||||
},
|
||||
FeaturePullCommits = true,
|
||||
FeaturePullReleases = true
|
||||
});
|
||||
|
@ -1,14 +1,27 @@
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Newsbot.Collector.Domain.Models.Config;
|
||||
using Newsbot.Collector.Services.Jobs;
|
||||
using Xunit.Abstractions;
|
||||
|
||||
namespace Newsbot.Collector.Tests.Jobs;
|
||||
|
||||
public class RssWatcherJobTest
|
||||
{
|
||||
private readonly ITestOutputHelper _testOutputHelper;
|
||||
|
||||
public RssWatcherJobTest(ITestOutputHelper testOutputHelper)
|
||||
{
|
||||
_testOutputHelper = testOutputHelper;
|
||||
}
|
||||
|
||||
private IConfiguration GetConfiguration()
|
||||
{
|
||||
var inMemorySettings = new Dictionary<string, string> {
|
||||
{"ConnectionStrings:database", "Host=localhost;Username=postgres;Password=postgres;Database=postgres;sslmode=disable"}
|
||||
var inMemorySettings = new Dictionary<string, string>
|
||||
{
|
||||
{
|
||||
"ConnectionStrings:database",
|
||||
"Host=localhost;Username=postgres;Password=postgres;Database=postgres;sslmode=disable"
|
||||
}
|
||||
};
|
||||
|
||||
IConfiguration configuration = new ConfigurationBuilder()
|
||||
@ -25,9 +38,9 @@ public class RssWatcherJobTest
|
||||
[Fact]
|
||||
public void CanFindItemsNoDb()
|
||||
{
|
||||
var url = "https://www.engadget.com/rss.xml";
|
||||
const string url = "https://www.engadget.com/rss.xml";
|
||||
var client = new RssWatcherJob();
|
||||
var items = client.Collect(url, Guid.NewGuid(), 0);
|
||||
client.Collect(url, Guid.NewGuid(), 0);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
@ -35,7 +48,13 @@ public class RssWatcherJobTest
|
||||
{
|
||||
var url = "https://www.engadget.com/rss.xml";
|
||||
var client = new RssWatcherJob();
|
||||
client.Init(ConnectionString());
|
||||
client.InitAndExecute(new RssWatcherJobOptions
|
||||
{
|
||||
ConnectionStrings = new ConfigSectionConnectionStrings
|
||||
{
|
||||
Database = ConnectionString()
|
||||
}
|
||||
});
|
||||
var items = client.Collect(url, Guid.NewGuid(), 0);
|
||||
client.UpdateDatabase(items);
|
||||
}
|
||||
@ -44,20 +63,13 @@ public class RssWatcherJobTest
|
||||
public void CanReadHtmlDrivenFeedPage()
|
||||
{
|
||||
var url = "https://www.howtogeek.com/feed/";
|
||||
var client = new RssWatcherJob();
|
||||
client.Init(ConnectionString());
|
||||
var items = client.Collect(url, Guid.NewGuid(), 0);
|
||||
Console.WriteLine('k');
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void InitAndExecuteTest()
|
||||
{
|
||||
var client = new RssWatcherJob();
|
||||
client.InitAndExecute(new RssWatcherJobOptions
|
||||
{
|
||||
ConnectionString = ConnectionString()
|
||||
ConnectionStrings = new ConfigSectionConnectionStrings
|
||||
{
|
||||
Database = ConnectionString()
|
||||
}
|
||||
});
|
||||
|
||||
}
|
||||
}
|
@ -27,4 +27,12 @@
|
||||
<ProjectReference Include="..\Newsbot.Collector.Database\Newsbot.Collector.Database.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Content Update="appsettings.json">
|
||||
<ExcludeFromSingleFile>true</ExcludeFromSingleFile>
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
<CopyToPublishDirectory>PreserveNewest</CopyToPublishDirectory>
|
||||
</Content>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
@ -7,32 +7,74 @@ public class HtmlPageReaderTests
|
||||
[Fact]
|
||||
public void BaseSiteContainsRssFeed()
|
||||
{
|
||||
var client = new HtmlPageReader("https://dotnettutorials.net/");
|
||||
var client = new HtmlPageReader(new HtmlPageReaderOptions
|
||||
{
|
||||
Url = "https://dotnettutorials.net/"
|
||||
});
|
||||
var headClient = new HeadParserClient(client.GetSiteContent());
|
||||
var feedUri = headClient.GetSiteFeed();
|
||||
if (feedUri == "")
|
||||
{
|
||||
Assert.Fail("Failed to find the RSS feed");
|
||||
}
|
||||
if (feedUri == "") Assert.Fail("Failed to find the RSS feed");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SiteDoesNotReturnRssFeed()
|
||||
{
|
||||
var client = new HtmlPageReader("https://www.engadget.com/");
|
||||
var client = new HtmlPageReader(new HtmlPageReaderOptions
|
||||
{
|
||||
Url = "https://www.engadget.com/"
|
||||
});
|
||||
var headClient = new HeadParserClient(client.GetSiteContent());
|
||||
var feedUri = headClient.GetSiteFeed();
|
||||
if (feedUri == "")
|
||||
{
|
||||
Assert.Fail("");
|
||||
}
|
||||
if (feedUri == "") Assert.Fail("");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CanFindBodyOfTheArticle()
|
||||
{
|
||||
var client = new HtmlPageReader("https://www.engadget.com/apple-is-convinced-my-dog-is-stalking-me-143100932.html");
|
||||
var client = new HtmlPageReader(new HtmlPageReaderOptions
|
||||
{
|
||||
Url = "https://www.engadget.com/apple-is-convinced-my-dog-is-stalking-me-143100932.html"
|
||||
});
|
||||
var c = client.CollectPostContent();
|
||||
Console.WriteLine(c);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void FindYoutubeChannelId()
|
||||
{
|
||||
var url = "https://www.youtube.com/@CityPlannerPlays";
|
||||
//var b = new BrowserClient();
|
||||
//var pageSource = b.GetPageSource(url);
|
||||
|
||||
var client = new HtmlPageReader(new HtmlPageReaderOptions
|
||||
{
|
||||
Url = url
|
||||
});
|
||||
client.Parse();
|
||||
if (client.Data.Header.YoutubeChannelID is null) Assert.Fail("missing youtube id");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CanExtractHeadersFromReddit()
|
||||
{
|
||||
var url = "https://www.reddit.com/";
|
||||
var client = new HtmlPageReader(new HtmlPageReaderOptions
|
||||
{
|
||||
Url = url
|
||||
});
|
||||
client.Parse();
|
||||
if (client.Data.Header.Image == "") Assert.Fail("missing an expected image from the reddit header.");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CanExtractHeadersFromSubreddit()
|
||||
{
|
||||
var url = "https://www.reddit.com/r/ffxiv";
|
||||
var client = new HtmlPageReader(new HtmlPageReaderOptions
|
||||
{
|
||||
Url = url
|
||||
});
|
||||
client.Parse();
|
||||
if (client.Data.Header.Image == "") Assert.Fail("missing an expected image from the reddit header.");
|
||||
}
|
||||
}
|
@ -1,8 +1,8 @@
|
||||
|
||||
|
||||
using Newsbot.Collector.Database.Repositories;
|
||||
using Newsbot.Collector.Domain.Models;
|
||||
|
||||
namespace Newsbot.Collector.Tests.Tables;
|
||||
|
||||
public class SourcesTableTests
|
||||
{
|
||||
[Fact]
|
||||
|
@ -1,10 +0,0 @@
|
||||
namespace Newsbot.Collector.Tests;
|
||||
|
||||
public class UnitTest1
|
||||
{
|
||||
[Fact]
|
||||
public void Test1()
|
||||
{
|
||||
|
||||
}
|
||||
}
|
33
seed.ps1
33
seed.ps1
@ -6,20 +6,19 @@ param (
|
||||
|
||||
$ErrorActionPreference = 'Stop'
|
||||
|
||||
function NewRedditSource {
|
||||
function New-RedditSource {
|
||||
param (
|
||||
[string] $Name,
|
||||
[string] $Url
|
||||
)
|
||||
$urlEncoded = [uri]::EscapeDataString($Url)
|
||||
|
||||
$param = "name=$Name&url=$urlEncoded"
|
||||
$param = "url=$urlEncoded"
|
||||
$uri = "$ApiServer/api/sources/new/reddit?$param"
|
||||
$res = Invoke-RestMethod -Method Post -Uri $uri
|
||||
return $res
|
||||
}
|
||||
|
||||
function NewRssSource {
|
||||
function New-RssSource {
|
||||
param (
|
||||
[string] $Name,
|
||||
[string] $Url
|
||||
@ -31,19 +30,18 @@ function NewRssSource {
|
||||
return $res
|
||||
}
|
||||
|
||||
function NewYoutubeSource {
|
||||
function New-YoutubeSource {
|
||||
param (
|
||||
[string] $Name,
|
||||
[string] $Url
|
||||
[Parameter(Required)][string] $Url
|
||||
)
|
||||
$urlEncoded = [uri]::EscapeDataString($Url)
|
||||
[string] $param = "name=$Name&url=$urlEncoded"
|
||||
[string] $param = "url=$urlEncoded"
|
||||
[string] $uri = "$ApiServer/api/sources/new/youtube?$param"
|
||||
$res = Invoke-RestMethod -Method Post -Uri $uri
|
||||
return $res
|
||||
}
|
||||
|
||||
function NewTwitchSource {
|
||||
function New-TwitchSource {
|
||||
param (
|
||||
[string] $Name
|
||||
)
|
||||
@ -81,18 +79,18 @@ function New-Subscription {
|
||||
# Load Secrets file
|
||||
$secrets = Get-Content $JsonSecrets -Raw | ConvertFrom-Json
|
||||
|
||||
$redditDadJokes = NewRedditSource -Name "dadjokes" -Url "https://reddit.com/r/dadjokes"
|
||||
$redditSteamDeck = NewRedditSource -Name "steamdeck" -Url "https://reddit.com/r/steamdeck"
|
||||
$redditDadJokes = New-RedditSource -Name "dadjokes"
|
||||
$redditSteamDeck = New-RedditSource -Name "steamdeck"
|
||||
|
||||
$rssSteamDeck = NewRssSource -Name "Steampowered - Steam Deck" -Url "https://store.steampowered.com/feeds/news/app/1675200/?cc=US&l=english&snr=1_2108_9__2107"
|
||||
$rssFaysHaremporium = NewRssSource -Name "Fay's Haremporium" -Url "https://blog.nyxstudios.moe/rss/"
|
||||
$rssPodcastLetsMosley = NewRssSource -Name "Let's Mosley" -Url "https://anchor.fm/s/6c7aa4c4/podcast/rss"
|
||||
$rssSteamDeck = New-RssSource -Name "Steampowered - Steam Deck" -Url "https://store.steampowered.com/feeds/news/app/1675200/?cc=US&l=english&snr=1_2108_9__2107"
|
||||
$rssFaysHaremporium = New-RssSource -Name "Fay's Haremporium" -Url "https://blog.nyxstudios.moe/rss/"
|
||||
$rssPodcastLetsMosley = New-RssSource -Name "Let's Mosley" -Url "https://anchor.fm/s/6c7aa4c4/podcast/rss"
|
||||
|
||||
$youtubeGameGrumps = NewYoutubeSource -Name "Game Grumps" -Url "https://www.youtube.com/user/GameGrumps"
|
||||
$youtubeCityPlannerPlays = NewYoutubeSource -Name "City Planner Plays" -Url "https://www.youtube.com/c/cityplannerplays"
|
||||
$youtubeGameGrumps = New-YoutubeSource -Url "https://www.youtube.com/user/GameGrumps"
|
||||
$youtubeCityPlannerPlays = New-YoutubeSource -Url "https://www.youtube.com/c/cityplannerplays"
|
||||
$youtubeLinusTechTips = New-YoutubeSource -Url "https://www.youtube.com/@LinusTechTips"
|
||||
|
||||
$twitchNintendo = NewTwitchSource -Name "Nintendo"
|
||||
$twitchNintendo.id
|
||||
|
||||
$miharuMonitor = New-DiscordWebhook -Server "Miharu Monitor" -Channel "dev" -Url $secrets.MiharuMonitor.dev01
|
||||
|
||||
@ -103,4 +101,5 @@ New-Subscription -SourceId $rssFaysHaremporium.id -DiscordWebhookId $miharuMonit
|
||||
New-Subscription -SourceId $rssPodcastLetsMosley.id -DiscordWebhookId $miharuMonitor.id
|
||||
New-Subscription -SourceId $youtubeGameGrumps.id -DiscordWebhookId $miharuMonitor.id
|
||||
New-Subscription -SourceId $youtubeCityPlannerPlays.id -DiscordWebhookId $miharuMonitor.id
|
||||
New-Subscription -SourceId $youtubeLinusTechTips.id -DiscordWebhookId $miharuMonitor.id
|
||||
New-Subscription -SourceId $twitchNintendo.id -DiscordWebhookId $miharuMonitor.id
|
||||
|
Loading…
Reference in New Issue
Block a user