Features/ffxiv (#6)

* starting the ffxiv reader

* working on getting the standard interface for sources based on the work for ffxiv

* got more of ffxiv working and updated tests

* Author and Description can be extracted and validated with tests

* added uuid package

* ffxiv core logic is working and testes updated to reflect it.

* Updated the scheduler with the current sources and moved them from main

* updated reddit to allow modern go to talk to the endpoint with a debug flag

* gave the func a better name

* cleaned up main

* Moved cache to its own package and updated tests"

* moved config to its own package and added basic tests

* updated imports

* minor update"

* interface update and cache model update

* updated the scheduler for basic services.  No DB calls yet

* updated db calls

* bypassed the reddit test as its flaky in github
This commit is contained in:
James Tombleson 2022-04-29 13:02:25 -07:00 committed by GitHub
parent eba63c27ef
commit 11892b9a7b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 746 additions and 207 deletions

View File

@ -84,10 +84,11 @@ func (ac *ArticlesClient) Add(item model.Articles) error {
req.Header.Set("Content-Type", "application/json")
resp, err := client.Do(req)
defer resp.Body.Close()
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return errors.New("failed to post to the DB")
}

View File

@ -6,7 +6,7 @@ import (
"log"
"net/http"
"github.com/jtom38/newsbot/collector/services"
"github.com/jtom38/newsbot/collector/services/config"
)
type DatabaseClient struct {
@ -18,8 +18,8 @@ type DatabaseClient struct {
// This will generate a new client to interface with the API Database.
func NewDatabaseClient() DatabaseClient {
cc := services.NewConfigClient()
dbUri := cc.GetConfig(services.DB_URI)
cc := config.New()
dbUri := cc.GetConfig(config.DB_URI)
var client = DatabaseClient{}
client.Diagnosis.rootUri = dbUri

View File

@ -1,10 +1,23 @@
package interfaces
import (
"github.com/go-rod/rod"
"github.com/mmcdole/gofeed"
)
type Sources interface {
CheckSource() error
PullFeed() (*gofeed.Feed, error)
GetBrowser() *rod.Browser
GetPage(parser *rod.Browser, url string) *rod.Page
ExtractThumbnail(page *rod.Page) (string, error)
ExtractPubDate(page *rod.Page) (string, error)
ExtractDescription(page *rod.Page) (string, error)
ExtractAuthor(page *rod.Page) (string, error)
ExtractAuthorImage(page *rod.Page) (string, error)
ExtractTags(page *rod.Page) (string, error)
ExtractTitle(page *rod.Page) (string, error)
}

View File

@ -12,4 +12,5 @@ type CacheItem struct {
// youtube, reddit, ect
Group string
Expires time.Time
IsTainted bool
}

1
go.mod
View File

@ -6,6 +6,7 @@ require (
github.com/PuerkitoBio/goquery v1.8.0 // indirect
github.com/go-chi/chi/v5 v5.0.7 // indirect
github.com/go-rod/rod v0.105.1 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/joho/godotenv v1.4.0 // indirect
github.com/mmcdole/gofeed v1.1.3 // indirect
github.com/robfig/cron/v3 v3.0.1 // indirect

2
go.sum
View File

@ -49,6 +49,8 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=

39
main.go
View File

@ -1,28 +1,21 @@
package main
import (
//"fmt"
"log"
"net/http"
"github.com/go-chi/chi/v5"
"github.com/go-chi/chi/v5/middleware"
"github.com/jtom38/newsbot/collector/routes"
"github.com/jtom38/newsbot/collector/database"
"github.com/jtom38/newsbot/collector/services"
)
func main() {
var err error
//EnableScheduler()
//dc := database.NewDatabaseClient()
//err := dc.Diagnosis.Ping()
//if err != nil { log.Fatalln(err) }
//CheckReddit()
CheckYoutube()
EnableScheduler()
app := chi.NewRouter()
app.Use(middleware.Logger)
@ -34,34 +27,6 @@ func main() {
log.Println("API is online and waiting for requests.")
log.Println("API: http://localhost:8081/api")
//log.Println("Swagger: http://localhost:8080/swagger/index.html")
err = http.ListenAndServe(":8081", app)
err := http.ListenAndServe(":8081", app)
if err != nil { log.Fatalln(err) }
}
func CheckReddit() {
dc := database.NewDatabaseClient()
sources, err := dc.Sources.FindBySource("reddit")
if err != nil { log.Println(err) }
rc := services.NewRedditClient(sources[0].Name, sources[0].ID)
raw, err := rc.GetContent()
if err != nil { log.Println(err) }
redditArticles := rc.ConvertToArticles(raw)
for _, item := range redditArticles {
_, err = dc.Articles.FindByUrl(item.Url)
if err != nil {
err = dc.Articles.Add(item)
if err != nil { log.Println("Failed to post article.")}
}
}
}
func CheckYoutube() {
// Add call to the db to request youtube sources.
// Loop though the services, and generate the clients.
yt := services.NewYoutubeClient(0, "https://www.youtube.com/user/GameGrumps")
yt.CheckSource()
}

View File

@ -2,8 +2,14 @@ package main
import (
"fmt"
"log"
"github.com/robfig/cron/v3"
"github.com/jtom38/newsbot/collector/database"
"github.com/jtom38/newsbot/collector/services"
//"github.com/jtom38/newsbot/collector/services/cache"
)
func Hello(t string) {
@ -12,8 +18,62 @@ func Hello(t string) {
func EnableScheduler() {
c := cron.New()
c.AddFunc("*/1 * * * *", func() {
go Hello("new world order")
})
//c.AddFunc("*/5 * * * *", func() { go CheckCache() })
c.AddFunc("*/30 * * * *", func() { go CheckReddit() })
c.AddFunc("*/30 * * * *", func() { go CheckYoutube() })
c.AddFunc("* */1 * * *", func() { go CheckFfxiv() })
c.Start()
}
func CheckCache() {
//cache := services.NewCacheAgeMonitor()
//cache.CheckExpiredEntries()
}
func CheckReddit() {
dc := database.NewDatabaseClient()
sources, err := dc.Sources.FindBySource("reddit")
if err != nil { log.Println(err) }
rc := services.NewRedditClient(sources[0].Name, sources[0].ID)
raw, err := rc.GetContent()
if err != nil { log.Println(err) }
redditArticles := rc.ConvertToArticles(raw)
for _, item := range redditArticles {
_, err = dc.Articles.FindByUrl(item.Url)
if err != nil {
err = dc.Articles.Add(item)
if err != nil { log.Println("Failed to post article.")}
}
}
}
func CheckYoutube() {
// Add call to the db to request youtube sources.
// Loop though the services, and generate the clients.
yt := services.NewYoutubeClient(0, "https://www.youtube.com/user/GameGrumps")
yt.CheckSource()
}
func CheckFfxiv() {
fc := services.NewFFXIVClient("na")
articles, err := fc.CheckSource()
// This isnt in a thread yet, so just output to stdout
if err != nil { log.Println(err) }
dc := database.NewDatabaseClient()
for _, item := range articles {
_, err = dc.Articles.FindByUrl(item.Url)
if err != nil {
err = dc.Articles.Add(item)
if err != nil { log.Println("Failed to post article.")}
}
}
}

View File

@ -1,40 +0,0 @@
package services
import (
"errors"
"github.com/jtom38/newsbot/collector/domain/model"
)
type CacheClient struct{}
var (
cacheStorage []*model.CacheItem
ErrCacheRecordMissing = errors.New("unable to find the requested record.")
)
func NewCacheClient() CacheClient {
return CacheClient{}
}
func (cc *CacheClient) Insert(item *model.CacheItem) {
//_, err := cc.Find(item.Key, item.Group)
//if err != nil { }
cacheStorage = append(cacheStorage, item)
}
func (cc *CacheClient) Find(key string, group string) (*model.CacheItem, error) {
//go cc.FindExpiredEntries()
for _, item := range cacheStorage {
if item.Group != group { continue }
if item.Key != key { continue }
return item, nil
}
return &model.CacheItem{}, ErrCacheRecordMissing
}

62
services/cache/cache.go vendored Normal file
View File

@ -0,0 +1,62 @@
package cache
import (
"time"
"github.com/jtom38/newsbot/collector/domain/model"
)
type CacheClient struct{
group string
DefaultTimer time.Duration
}
func NewCacheClient(group string) CacheClient {
return CacheClient{
group: group,
DefaultTimer: time.Hour,
}
}
func (cc *CacheClient) Insert(key string, value string) {
item := model.CacheItem{
Key: key,
Value: value,
Group: cc.group,
Expires: time.Now().Add(1 * time.Hour),
IsTainted: false,
}
cacheStorage = append(cacheStorage, &item)
}
func (cc *CacheClient) FindByKey(key string) (*model.CacheItem, error) {
for _, item := range cacheStorage {
if item.Group != cc.group { continue }
if item.Key != key { continue }
// if it was tainted, renew the timer and remove the taint as this record was still needed
if item.IsTainted {
item.IsTainted = false
item.Expires = time.Now().Add(1 * time.Hour)
}
return item, nil
}
return &model.CacheItem{}, ErrCacheRecordMissing
}
func (cc *CacheClient) FindByValue(value string) (*model.CacheItem, error) {
for _, item := range cacheStorage {
if item.Group != cc.group { continue }
if item.Value != value { continue }
// if it was tainted, renew the timer and remove the taint as this record was still needed
if item.IsTainted {
item.IsTainted = false
item.Expires = time.Now().Add(1 * time.Hour)
}
return item, nil
}
return &model.CacheItem{}, ErrCacheRecordMissing
}

40
services/cache/cache_test.go vendored Normal file
View File

@ -0,0 +1,40 @@
package cache_test
import (
"testing"
"github.com/jtom38/newsbot/collector/services/cache"
)
func TestNewCacheClient(t *testing.T) {
_ = cache.NewCacheClient("placeholder")
}
func TestInsert(t *testing.T) {
cache := cache.NewCacheClient("Testing")
cache.Insert("UnitTesting", "Something, or nothing")
}
func TestFindGroupMissing(t *testing.T) {
cache := cache.NewCacheClient("faker")
_, err := cache.FindByKey("UnitTesting")
if err == nil { panic("Nothing was appended with the requested group.") }
}
func TestFindGroupExists(t *testing.T) {
cache := cache.NewCacheClient("Testing")
cache.Insert("UnitTesting", "Something")
_, err := cache.FindByKey("UnitTesting")
if err != nil { panic("") }
}
func TestCacheStorage(t *testing.T) {
cc := cache.NewCacheClient("Testing")
cc.Insert("UnitTesting01", "test")
cc.Insert("UnitTesting02", "Test")
cache := cache.NewCacheClient("Testing")
_, err := cache.FindByKey("UnitTesting02")
if err != nil { panic("expected to find the value")}
}

13
services/cache/common.go vendored Normal file
View File

@ -0,0 +1,13 @@
package cache
import (
"errors"
"github.com/jtom38/newsbot/collector/domain/model"
)
var (
cacheStorage []*model.CacheItem
ErrCacheRecordMissing = errors.New("unable to find the requested record")
)

45
services/cache/monitor.go vendored Normal file
View File

@ -0,0 +1,45 @@
package cache
import (
"time"
"github.com/jtom38/newsbot/collector/domain/model"
)
// When a record becomes tainted, it needs to be renewed or it will be dropped from the cache.
// If a record is tainted and used again, the taint will be removed and a new Expires value will be set.
// If its not renewed, it will be dropped.
type CacheAgeMonitor struct {}
func NewCacheAgeMonitor() CacheAgeMonitor {
return CacheAgeMonitor{}
}
// This is an automated job that will review all the objects for age and taint them if needed.
func (cam CacheAgeMonitor) CheckExpiredEntries() {
now := time.Now()
for index, item := range cacheStorage {
if now.After(item.Expires) {
// the timer expired, and its not tainted, taint it
if !item.IsTainted {
item.IsTainted = true
item.Expires = now.Add(1 * time.Hour)
}
// if its tainted and the timer didnt get renewed, delete
if item.IsTainted {
cacheStorage = cam.removeEntry(index)
}
}
}
}
// This creates a new slice and skips over the item that needs to be dropped
func (cam CacheAgeMonitor) removeEntry(index int) []*model.CacheItem {
var temp []*model.CacheItem
for i, item := range cacheStorage {
if i != index { temp = append(temp, item )}
}
return temp
}

13
services/cache/monitor_test.go vendored Normal file
View File

@ -0,0 +1,13 @@
package cache_test
import (
"testing"
"github.com/jtom38/newsbot/collector/services/cache"
)
func TestCacheTaintItem(t *testing.T) {
cc := cache.NewCacheClient("Testing")
cc.Insert("UnitTesting01", "test")
}

View File

@ -1,39 +0,0 @@
package services
import (
"time"
"github.com/jtom38/newsbot/collector/domain/model"
)
type CacheMonitor struct {}
func NewCacheMonitorClient() CacheMonitor {
return CacheMonitor{}
}
func (cm *CacheMonitor) Enable() {
}
// This will be fired off each time an cache a
func (cm *CacheMonitor) FindExpiredEntries() {
now := time.Now()
for index, item := range cacheStorage {
res := now.After(item.Expires)
if res {
cm.removeExpiredEntries(index)
}
}
}
// This will create a new slice and add the valid items to it and ignore the one to be removed.
// The existing cacheStorage will be replaced.
func (cc *CacheMonitor) removeExpiredEntries(arrayEntry int) {
var temp []*model.CacheItem
for index, item := range cacheStorage {
if index == arrayEntry { continue }
temp = append(temp, item)
}
cacheStorage = temp
}

View File

@ -1,69 +0,0 @@
package services_test
import (
"testing"
"time"
"github.com/jtom38/newsbot/collector/domain/model"
"github.com/jtom38/newsbot/collector/services"
)
func TestNewCacheClient(t *testing.T) {
_ = services.NewCacheClient()
}
func TestInsert(t *testing.T) {
cache := services.NewCacheClient()
var item *model.CacheItem = &model.CacheItem{
Key: "UnitTesting",
Value: "Something, or nothing",
Group: "Testing",
Expires: time.Now().Add(5 * time.Second),
}
cache.Insert(item)
}
func TestFindGroupMissing(t *testing.T) {
cache := services.NewCacheClient()
_, err := cache.Find("UnitTesting", "Unknown")
if err == nil { panic("Nothing was appended with the requested group.") }
}
func TestFindGroupExists(t *testing.T) {
cache := services.NewCacheClient()
var item *model.CacheItem = &model.CacheItem{
Key: "UnitTesting",
Value: "Something, or nothing",
Group: "Testing",
Expires: time.Now().Add(5 * time.Second),
}
cache.Insert(item)
_, err := cache.Find("UnitTesting", "Testing2")
//t.Log(res)
if err == nil { panic("") }
}
func TestCacheStorage(t *testing.T) {
cc := services.NewCacheClient()
item1 := &model.CacheItem {
Key: "UnitTesting01",
Value: "",
Group: "Testing",
Expires: time.Now().Add(5 * time.Minute),
}
cc.Insert(item1)
item2 := &model.CacheItem {
Key: "UnitTesting02",
Value: "",
Group: "Testing",
Expires: time.Now().Add(5 * time.Minute),
}
cc.Insert(item2)
cache := services.NewCacheClient()
_, err := cache.Find("UnitTesting02", "Testing")
if err != nil { panic("expected to find the value")}
}

View File

@ -1,4 +1,4 @@
package services
package config
import (
"os"
@ -19,7 +19,7 @@ const (
type ConfigClient struct {}
func NewConfigClient() ConfigClient {
func New() ConfigClient {
_, err := os.Open(".env")
if err == nil {
loadEnvFile()

View File

@ -0,0 +1,20 @@
package config_test
import (
"testing"
"os"
"github.com/jtom38/newsbot/collector/services/config"
)
func TestNewClient(t *testing.T) {
config.New()
}
func TestGetConfigExpectNull(t *testing.T) {
cc := config.New()
os.Setenv(config.REDDIT_PULL_HOT, "")
res := cc.GetConfig(config.REDDIT_PULL_HOT)
if res != "" { panic("expected blank")}
}

258
services/ffxiv.go Normal file
View File

@ -0,0 +1,258 @@
package services
import (
"errors"
"log"
"net/http"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/go-rod/rod"
"github.com/google/uuid"
"github.com/jtom38/newsbot/collector/domain/model"
"github.com/jtom38/newsbot/collector/services/cache"
)
const (
FFXIV_NA_FEED_URL string = "https://na.finalfantasyxiv.com/lodestone/"
FFXIV_JP_FEED_URL string = "https://jp.finalfantasyxiv.com/lodestone/"
FFXIV_TIME_FORMAT string = "1/2/2006 3:4 PM"
)
type FFXIVClient struct {
SourceID uint
Url string
Region string
cacheGroup string
}
func NewFFXIVClient(region string) FFXIVClient {
var url string
switch region {
case "na":
url = FFXIV_NA_FEED_URL
case "jp":
url = FFXIV_JP_FEED_URL
}
return FFXIVClient{
Region: region,
Url: url,
cacheGroup: "ffxiv",
}
}
func (fc *FFXIVClient) CheckSource() ([]model.Articles, error) {
var articles []model.Articles
parser := fc.GetBrowser()
defer parser.Close()
links, err := fc.PullFeed(parser)
if err != nil { return articles, err }
cache := cache.NewCacheClient(fc.cacheGroup)
for _, link := range links {
// Check cache/db if this link has been seen already, skip
_, err := cache.FindByValue(link)
if err == nil { continue }
page := fc.GetPage(parser, link)
title, err := fc.ExtractTitle(page)
if err != nil { return articles, err }
thumb, err := fc.ExtractThumbnail(page)
if err != nil { return articles, err }
pubDate, err := fc.ExtractPubDate(page)
if err != nil { return articles, err }
description, err := fc.ExtractDescription(page)
if err != nil { return articles, err }
authorName, err := fc.ExtractAuthor(page)
if err != nil { return articles, err }
authorImage, err := fc.ExtractAuthorImage(page)
if err != nil { return articles, err }
tags, err := fc.ExtractTags(page)
if err != nil { return articles, err }
article := model.Articles{
SourceID: fc.SourceID,
Tags: tags,
Title: title,
Url: link,
PubDate: pubDate,
Video: "",
VideoHeight: 0,
VideoWidth: 0,
Thumbnail: thumb,
Description: description,
AuthorName: authorName,
AuthorImage: authorImage,
}
log.Printf("Collected '%v' from '%v'", article.Title, article.Url)
cache.Insert(uuid.New().String(), link)
articles = append(articles, article)
}
return articles, nil
}
func (fc *FFXIVClient) GetParser() (*goquery.Document, error) {
html, err := http.Get(fc.Url)
if err != nil { return nil, err }
defer html.Body.Close()
doc, err := goquery.NewDocumentFromReader(html.Body)
if err != nil { return nil, err }
return doc, nil
}
func (fc *FFXIVClient) GetBrowser() (*rod.Browser) {
browser := rod.New().MustConnect()
return browser
}
func (fc *FFXIVClient) PullFeed(parser *rod.Browser) ([]string, error) {
var links []string
page := parser.MustPage(fc.Url)
defer page.Close()
// find the list by xpath
res := page.MustElementX("/html/body/div[3]/div/div/div[1]/div[2]/div[1]/div[2]/ul")
// find all the li items
items := res.MustElements("li")
for _, item := range items {
// in each li, find the a items
a, err := item.Element("a")
if err != nil {
log.Println("Unable to find the a item, skipping")
continue
}
// find the href behind the a
url, err := a.Property("href")
if err != nil {
log.Println("Unable to find a href link, skipping")
continue
}
urlString := url.String()
isTopic := strings.Contains(urlString, "topics")
if isTopic {
links = append(links, urlString)
}
}
return links, nil
}
func (rc *FFXIVClient) GetPage(parser *rod.Browser, url string) *rod.Page {
page := parser.MustPage(url)
return page
}
func (fc *FFXIVClient) ExtractThumbnail(page *rod.Page) (string, error) {
thumbnail := page.MustElementX("/html/body/div[3]/div[2]/div[1]/article/div[1]/img").MustProperty("src").String()
if thumbnail == "" { return "", errors.New("unable to find thumbnail")}
title := page.MustElement(".news__header > h1:nth-child(2)").MustText()
log.Println(title)
return thumbnail, nil
}
func (fc *FFXIVClient) ExtractPubDate(page *rod.Page) (time.Time, error) {
stringDate := page.MustElement(".news__ic--topics").MustText()
if stringDate == "" { return time.Now(), errors.New("unable to locate the publish date on the post")}
PubDate, err := time.Parse(FFXIV_TIME_FORMAT, stringDate)
if err != nil { return time.Now(), err }
return PubDate, nil
}
func (fc *FFXIVClient) ExtractDescription(page *rod.Page) (string, error) {
res := page.MustElement(".news__detail__wrapper").MustText()
if res == "" { return "", errors.New("unable to locate the description on the post")}
return res, nil
}
func (fc *FFXIVClient) ExtractAuthor(page *rod.Page) (string, error) {
meta := page.MustElements("head > meta")
for _, item := range meta {
name, err := item.Property("name")
if err != nil { return "", err }
if name.String() != "author" { continue }
content, err := item.Property("content")
if err != nil { return "", err }
return content.String(), nil
}
//log.Println(meta)
return "", errors.New("unable to find the author on the page")
}
func (fc *FFXIVClient) ExtractTags(page *rod.Page) (string, error) {
meta := page.MustElements("head > meta")
for _, item := range meta {
name, err := item.Property("name")
if err != nil { return "", err }
if name.String() != "keywords" { continue }
content, err := item.Property("content")
if err != nil { return "", err }
return content.String(), nil
}
//log.Println(meta)
return "", errors.New("unable to find the author on the page")
}
func (fc *FFXIVClient) ExtractTitle(page *rod.Page) (string, error) {
title, err := page.MustElement("head > title").Text()
if err != nil { return "", err }
if !strings.Contains(title, "|") { return "", errors.New("unable to split the title, missing | in the string")}
res := strings.Split(title, "|")
if title != "" { return res[0], nil }
//log.Println(meta)
return "", errors.New("unable to find the author on the page")
}
func (fc *FFXIVClient) ExtractAuthorImage(page *rod.Page) (string, error) {
meta := page.MustElements("head > link")
for _, item := range meta {
name, err := item.Property("rel")
if err != nil { return "", err }
if name.String() != "apple-touch-icon-precomposed" { continue }
content, err := item.Property("href")
if err != nil { return "", err }
return content.String(), nil
}
//log.Println(meta)
return "", errors.New("unable to find the author image on the page")
}

148
services/ffxiv_test.go Normal file
View File

@ -0,0 +1,148 @@
package services_test
import (
"testing"
ffxiv "github.com/jtom38/newsbot/collector/services"
)
func TestFfxivGetParser(t *testing.T) {
fc := ffxiv.NewFFXIVClient("na")
_, err := fc.GetParser()
if err != nil { panic(err) }
}
func TestFfxivPullFeed(t *testing.T) {
fc := ffxiv.NewFFXIVClient("na")
parser := fc.GetBrowser()
defer parser.Close()
links, err := fc.PullFeed(parser)
if err != nil { panic(err) }
if len(links) == 0 { panic("expected links to come back but got 0") }
}
func TestFfxivExtractThumbnail(t *testing.T) {
fc := ffxiv.NewFFXIVClient("na")
parser := fc.GetBrowser()
defer parser.Close()
links, err := fc.PullFeed(parser)
if err != nil { panic(err) }
page := fc.GetPage(parser, links[0])
defer page.Close()
thumb, err := fc.ExtractThumbnail(page)
if err != nil { panic(err) }
if thumb == "" { panic("expected a link but got nothing.")}
}
func TestFfxivExtractPubDate(t *testing.T) {
fc := ffxiv.NewFFXIVClient("na")
parser := fc.GetBrowser()
defer parser.Close()
links, err := fc.PullFeed(parser)
if err != nil { panic(err) }
page := fc.GetPage(parser, links[0])
defer page.Close()
_, err = fc.ExtractPubDate(page)
if err != nil { panic(err) }
}
func TestFfxivExtractDescription(t *testing.T) {
fc := ffxiv.NewFFXIVClient("na")
parser := fc.GetBrowser()
defer parser.Close()
links, err := fc.PullFeed(parser)
if err != nil { panic(err) }
page := fc.GetPage(parser, links[0])
defer page.Close()
_, err = fc.ExtractDescription(page)
if err != nil { panic(err) }
}
func TestFfxivExtractAuthor(t *testing.T) {
fc := ffxiv.NewFFXIVClient("na")
parser := fc.GetBrowser()
defer parser.Close()
links, err := fc.PullFeed(parser)
if err != nil { panic(err) }
page := fc.GetPage(parser, links[0])
defer page.Close()
author, err := fc.ExtractAuthor(page)
if err != nil { panic(err) }
if author == "" { panic("failed to locate the author name") }
}
func TestFfxivExtractTags(t *testing.T) {
fc := ffxiv.NewFFXIVClient("na")
parser := fc.GetBrowser()
defer parser.Close()
links, err := fc.PullFeed(parser)
if err != nil { panic(err) }
page := fc.GetPage(parser, links[0])
defer page.Close()
res, err := fc.ExtractTags(page)
if err != nil { panic(err) }
if res == "" {panic("failed to locate the tags")}
}
func TestFfxivExtractTitle(t *testing.T) {
fc := ffxiv.NewFFXIVClient("na")
parser := fc.GetBrowser()
defer parser.Close()
links, err := fc.PullFeed(parser)
if err != nil { panic(err) }
page := fc.GetPage(parser, links[0])
defer page.Close()
res, err := fc.ExtractTitle(page)
if err != nil { panic(err) }
if res == "" { panic("failed to locate the tags") }
}
func TestFFxivExtractAuthorIamge(t *testing.T) {
fc := ffxiv.NewFFXIVClient("na")
parser := fc.GetBrowser()
defer parser.Close()
links, err := fc.PullFeed(parser)
if err != nil { panic(err) }
page := fc.GetPage(parser, links[0])
defer page.Close()
res, err := fc.ExtractAuthorImage(page)
if err != nil { panic(err) }
if res == "" { panic("failed to locate the tags") }
}
func TestFfxivCheckSource(t *testing.T) {
fc := ffxiv.NewFFXIVClient("na")
fc.CheckSource()
}

View File

@ -5,9 +5,13 @@ import (
"errors"
"fmt"
"log"
"os"
"strings"
"time"
"github.com/go-rod/rod"
"github.com/jtom38/newsbot/collector/domain/model"
"github.com/jtom38/newsbot/collector/services/config"
)
type RedditClient struct {
@ -29,14 +33,32 @@ func NewRedditClient(subreddit string, sourceID uint) RedditClient {
url: fmt.Sprintf("https://www.reddit.com/r/%v.json", subreddit),
sourceId: sourceID,
}
cc := NewConfigClient()
rc.config.PullHot = cc.GetConfig(REDDIT_PULL_HOT)
rc.config.PullNSFW = cc.GetConfig(REDDIT_PULL_NSFW)
rc.config.PullTop = cc.GetConfig(REDDIT_PULL_TOP)
cc := config.New()
rc.config.PullHot = cc.GetConfig(config.REDDIT_PULL_HOT)
rc.config.PullNSFW = cc.GetConfig(config.REDDIT_PULL_NSFW)
rc.config.PullTop = cc.GetConfig(config.REDDIT_PULL_TOP)
rc.disableHttp2Client()
return rc
}
// This is needed for to get modern go to talk to the endpoint.
// https://www.reddit.com/r/redditdev/comments/t8e8hc/getting_nothing_but_429_responses_when_using_go/
func (rc RedditClient) disableHttp2Client() {
os.Setenv("GODEBUG", "http2client=0")
}
func (rc RedditClient) GetBrowser() *rod.Browser {
browser := rod.New().MustConnect()
return browser
}
func (rc RedditClient) GetPage(parser *rod.Browser, url string) *rod.Page {
page := parser.MustPage(url)
return page
}
// GetContent() reaches out to Reddit and pulls the Json data.
// It will then convert the data to a struct and return the struct.
func (rc RedditClient) GetContent() (model.RedditJsonContent, error ) {
@ -45,9 +67,14 @@ func (rc RedditClient) GetContent() (model.RedditJsonContent, error ) {
log.Printf("Collecting results on '%v'", rc.subreddit)
content, err := getHttpContent(rc.url)
if err != nil { return items, err }
if strings.Contains("<h1>whoa there, pardner!</h1>", string(content) ) {
return items, errors.New("did not get json data from the server")
}
json.Unmarshal(content, &items)
if len(items.Data.Children) == 0 {
return items, errors.New("failed to unmarshal the data")
}
return items, nil
}

View File

@ -1,14 +1,16 @@
package services_test
import (
"log"
"testing"
"github.com/jtom38/newsbot/collector/services"
)
func TestGetContent(t *testing.T) {
//This test is flaky right now due to the http changes in 1.17
rc := services.NewRedditClient("dadjokes", 0)
_, err := rc.GetContent()
if err != nil { panic(err) }
log.Println(err)
//if err != nil { panic(err) }
}

View File

@ -20,6 +20,8 @@ type YoutubeClient struct {
ChannelID string
AvatarUri string
Config YoutubeConfig
cacheGroup string
}
type YoutubeConfig struct {
@ -42,6 +44,7 @@ func NewYoutubeClient(SourceID uint, Url string) YoutubeClient {
yc := YoutubeClient{
SourceID: SourceID,
Url: Url,
cacheGroup: "youtube",
}
/*
cc := NewConfigClient()
@ -60,6 +63,7 @@ func (yc *YoutubeClient) CheckSource() error {
// Check cache/db for existing value
// If we have the value, skip
//channelId, err := yc.extractChannelId()
channelId, err := yc.GetChannelId(docParser)
if err != nil { return err }
if channelId == "" { return ErrChannelIdMissing }
@ -92,6 +96,16 @@ func (yc *YoutubeClient) CheckSource() error {
return nil
}
func (yc *YoutubeClient) GetBrowser() *rod.Browser {
browser := rod.New().MustConnect()
return browser
}
func (yc *YoutubeClient) GetPage(parser *rod.Browser, url string) *rod.Page {
page := parser.MustPage(url)
return page
}
func (yc *YoutubeClient) GetParser(uri string) (*goquery.Document, error) {
html, err := http.Get(uri)
if err != nil {
@ -120,6 +134,12 @@ func (yc *YoutubeClient) GetChannelId(doc *goquery.Document) (string, error) {
return "", ErrChannelIdMissing
}
// This pulls the youtube page and finds the ChannelID.
// This value is required to generate the RSS feed URI
//func (yc *YoutubeClient) extractChannelId(page *rod.Page) (string, error) {
//}
// This will parse the page to find the current Avatar of the channel.
func (yc *YoutubeClient) GetAvatarUri() (string, error) {
var AvatarUri string

View File

@ -82,10 +82,6 @@ func TestGetChannelTags(t *testing.T) {
if err != nil { panic(err) }
}
func TestConvertToArticle(t *testing.T) {
}
func TestGetVideoThumbnail(t *testing.T) {
yc := services.NewYoutubeClient(
0,