James Tombleson
9be985da0a
* Found the meta tags on youtube... in the body and updated the client to pull them out. * Updated namespace on test * I think formatting cleaned this up * Seed migrations have been cleaned up to get my configs out and moving them to a script. * Updates to the ISourcesRepository.cs to allow for new calls to the db. * formatter * Db models updated. Icon now can track sourceID and source can have a youtube id. * Updated api logger to ignore otel if no connection string given. * updated docker init so I can run migrations from the image * seed was updated to reflect the new api changes * Updated the SourcesController.cs to grab icon data. * Added reddit const values * Minor changes to HtmlPageReader.cs * Jobs are now pulling in the config section to bundle values. * Removed youtube api, not needed anymore. * test updates
80 lines
2.3 KiB
C#
80 lines
2.3 KiB
C#
using Newsbot.Collector.Services.HtmlParser;
|
|
|
|
namespace Newsbot.Collector.Tests.Services;
|
|
|
|
public class HtmlPageReaderTests
|
|
{
|
|
[Fact]
|
|
public void BaseSiteContainsRssFeed()
|
|
{
|
|
var client = new HtmlPageReader(new HtmlPageReaderOptions
|
|
{
|
|
Url = "https://dotnettutorials.net/"
|
|
});
|
|
var headClient = new HeadParserClient(client.GetSiteContent());
|
|
var feedUri = headClient.GetSiteFeed();
|
|
if (feedUri == "") Assert.Fail("Failed to find the RSS feed");
|
|
}
|
|
|
|
[Fact]
|
|
public void SiteDoesNotReturnRssFeed()
|
|
{
|
|
var client = new HtmlPageReader(new HtmlPageReaderOptions
|
|
{
|
|
Url = "https://www.engadget.com/"
|
|
});
|
|
var headClient = new HeadParserClient(client.GetSiteContent());
|
|
var feedUri = headClient.GetSiteFeed();
|
|
if (feedUri == "") Assert.Fail("");
|
|
}
|
|
|
|
[Fact]
|
|
public void CanFindBodyOfTheArticle()
|
|
{
|
|
var client = new HtmlPageReader(new HtmlPageReaderOptions
|
|
{
|
|
Url = "https://www.engadget.com/apple-is-convinced-my-dog-is-stalking-me-143100932.html"
|
|
});
|
|
var c = client.CollectPostContent();
|
|
Console.WriteLine(c);
|
|
}
|
|
|
|
[Fact]
|
|
public void FindYoutubeChannelId()
|
|
{
|
|
var url = "https://www.youtube.com/@CityPlannerPlays";
|
|
//var b = new BrowserClient();
|
|
//var pageSource = b.GetPageSource(url);
|
|
|
|
var client = new HtmlPageReader(new HtmlPageReaderOptions
|
|
{
|
|
Url = url
|
|
});
|
|
client.Parse();
|
|
if (client.Data.Header.YoutubeChannelID is null) Assert.Fail("missing youtube id");
|
|
}
|
|
|
|
[Fact]
|
|
public void CanExtractHeadersFromReddit()
|
|
{
|
|
var url = "https://www.reddit.com/";
|
|
var client = new HtmlPageReader(new HtmlPageReaderOptions
|
|
{
|
|
Url = url
|
|
});
|
|
client.Parse();
|
|
if (client.Data.Header.Image == "") Assert.Fail("missing an expected image from the reddit header.");
|
|
}
|
|
|
|
[Fact]
|
|
public void CanExtractHeadersFromSubreddit()
|
|
{
|
|
var url = "https://www.reddit.com/r/ffxiv";
|
|
var client = new HtmlPageReader(new HtmlPageReaderOptions
|
|
{
|
|
Url = url
|
|
});
|
|
client.Parse();
|
|
if (client.Data.Header.Image == "") Assert.Fail("missing an expected image from the reddit header.");
|
|
}
|
|
} |