Features/ffxiv (#6)
* starting the ffxiv reader * working on getting the standard interface for sources based on the work for ffxiv * got more of ffxiv working and updated tests * Author and Description can be extracted and validated with tests * added uuid package * ffxiv core logic is working and testes updated to reflect it. * Updated the scheduler with the current sources and moved them from main * updated reddit to allow modern go to talk to the endpoint with a debug flag * gave the func a better name * cleaned up main * Moved cache to its own package and updated tests" * moved config to its own package and added basic tests * updated imports * minor update" * interface update and cache model update * updated the scheduler for basic services. No DB calls yet * updated db calls * bypassed the reddit test as its flaky in github
This commit is contained in:
parent
eba63c27ef
commit
11892b9a7b
@ -84,10 +84,11 @@ func (ac *ArticlesClient) Add(item model.Articles) error {
|
|||||||
|
|
||||||
req.Header.Set("Content-Type", "application/json")
|
req.Header.Set("Content-Type", "application/json")
|
||||||
resp, err := client.Do(req)
|
resp, err := client.Do(req)
|
||||||
defer resp.Body.Close()
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
if resp.StatusCode != 200 {
|
if resp.StatusCode != 200 {
|
||||||
return errors.New("failed to post to the DB")
|
return errors.New("failed to post to the DB")
|
||||||
}
|
}
|
||||||
|
@ -6,7 +6,7 @@ import (
|
|||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
|
||||||
"github.com/jtom38/newsbot/collector/services"
|
"github.com/jtom38/newsbot/collector/services/config"
|
||||||
)
|
)
|
||||||
|
|
||||||
type DatabaseClient struct {
|
type DatabaseClient struct {
|
||||||
@ -18,8 +18,8 @@ type DatabaseClient struct {
|
|||||||
|
|
||||||
// This will generate a new client to interface with the API Database.
|
// This will generate a new client to interface with the API Database.
|
||||||
func NewDatabaseClient() DatabaseClient {
|
func NewDatabaseClient() DatabaseClient {
|
||||||
cc := services.NewConfigClient()
|
cc := config.New()
|
||||||
dbUri := cc.GetConfig(services.DB_URI)
|
dbUri := cc.GetConfig(config.DB_URI)
|
||||||
|
|
||||||
var client = DatabaseClient{}
|
var client = DatabaseClient{}
|
||||||
client.Diagnosis.rootUri = dbUri
|
client.Diagnosis.rootUri = dbUri
|
||||||
|
@ -1,10 +1,23 @@
|
|||||||
package interfaces
|
package interfaces
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"github.com/go-rod/rod"
|
||||||
"github.com/mmcdole/gofeed"
|
"github.com/mmcdole/gofeed"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Sources interface {
|
type Sources interface {
|
||||||
CheckSource() error
|
CheckSource() error
|
||||||
PullFeed() (*gofeed.Feed, error)
|
PullFeed() (*gofeed.Feed, error)
|
||||||
}
|
|
||||||
|
GetBrowser() *rod.Browser
|
||||||
|
GetPage(parser *rod.Browser, url string) *rod.Page
|
||||||
|
|
||||||
|
ExtractThumbnail(page *rod.Page) (string, error)
|
||||||
|
ExtractPubDate(page *rod.Page) (string, error)
|
||||||
|
ExtractDescription(page *rod.Page) (string, error)
|
||||||
|
ExtractAuthor(page *rod.Page) (string, error)
|
||||||
|
ExtractAuthorImage(page *rod.Page) (string, error)
|
||||||
|
ExtractTags(page *rod.Page) (string, error)
|
||||||
|
ExtractTitle(page *rod.Page) (string, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -12,4 +12,5 @@ type CacheItem struct {
|
|||||||
// youtube, reddit, ect
|
// youtube, reddit, ect
|
||||||
Group string
|
Group string
|
||||||
Expires time.Time
|
Expires time.Time
|
||||||
|
IsTainted bool
|
||||||
}
|
}
|
1
go.mod
1
go.mod
@ -6,6 +6,7 @@ require (
|
|||||||
github.com/PuerkitoBio/goquery v1.8.0 // indirect
|
github.com/PuerkitoBio/goquery v1.8.0 // indirect
|
||||||
github.com/go-chi/chi/v5 v5.0.7 // indirect
|
github.com/go-chi/chi/v5 v5.0.7 // indirect
|
||||||
github.com/go-rod/rod v0.105.1 // indirect
|
github.com/go-rod/rod v0.105.1 // indirect
|
||||||
|
github.com/google/uuid v1.3.0 // indirect
|
||||||
github.com/joho/godotenv v1.4.0 // indirect
|
github.com/joho/godotenv v1.4.0 // indirect
|
||||||
github.com/mmcdole/gofeed v1.1.3 // indirect
|
github.com/mmcdole/gofeed v1.1.3 // indirect
|
||||||
github.com/robfig/cron/v3 v3.0.1 // indirect
|
github.com/robfig/cron/v3 v3.0.1 // indirect
|
||||||
|
2
go.sum
2
go.sum
@ -49,6 +49,8 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
|
|||||||
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||||
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||||
|
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
|
||||||
|
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||||
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
|
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
|
||||||
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
|
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
|
||||||
github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
|
github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
|
||||||
|
39
main.go
39
main.go
@ -1,28 +1,21 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
//"fmt"
|
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
|
||||||
|
|
||||||
"github.com/go-chi/chi/v5"
|
"github.com/go-chi/chi/v5"
|
||||||
"github.com/go-chi/chi/v5/middleware"
|
"github.com/go-chi/chi/v5/middleware"
|
||||||
|
|
||||||
"github.com/jtom38/newsbot/collector/routes"
|
"github.com/jtom38/newsbot/collector/routes"
|
||||||
"github.com/jtom38/newsbot/collector/database"
|
|
||||||
"github.com/jtom38/newsbot/collector/services"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
var err error
|
|
||||||
//EnableScheduler()
|
|
||||||
//dc := database.NewDatabaseClient()
|
//dc := database.NewDatabaseClient()
|
||||||
//err := dc.Diagnosis.Ping()
|
//err := dc.Diagnosis.Ping()
|
||||||
//if err != nil { log.Fatalln(err) }
|
//if err != nil { log.Fatalln(err) }
|
||||||
|
|
||||||
//CheckReddit()
|
EnableScheduler()
|
||||||
CheckYoutube()
|
|
||||||
|
|
||||||
app := chi.NewRouter()
|
app := chi.NewRouter()
|
||||||
app.Use(middleware.Logger)
|
app.Use(middleware.Logger)
|
||||||
@ -34,34 +27,6 @@ func main() {
|
|||||||
log.Println("API is online and waiting for requests.")
|
log.Println("API is online and waiting for requests.")
|
||||||
log.Println("API: http://localhost:8081/api")
|
log.Println("API: http://localhost:8081/api")
|
||||||
//log.Println("Swagger: http://localhost:8080/swagger/index.html")
|
//log.Println("Swagger: http://localhost:8080/swagger/index.html")
|
||||||
err = http.ListenAndServe(":8081", app)
|
err := http.ListenAndServe(":8081", app)
|
||||||
if err != nil { log.Fatalln(err) }
|
if err != nil { log.Fatalln(err) }
|
||||||
}
|
|
||||||
|
|
||||||
func CheckReddit() {
|
|
||||||
dc := database.NewDatabaseClient()
|
|
||||||
sources, err := dc.Sources.FindBySource("reddit")
|
|
||||||
if err != nil { log.Println(err) }
|
|
||||||
|
|
||||||
rc := services.NewRedditClient(sources[0].Name, sources[0].ID)
|
|
||||||
raw, err := rc.GetContent()
|
|
||||||
if err != nil { log.Println(err) }
|
|
||||||
|
|
||||||
redditArticles := rc.ConvertToArticles(raw)
|
|
||||||
|
|
||||||
for _, item := range redditArticles {
|
|
||||||
_, err = dc.Articles.FindByUrl(item.Url)
|
|
||||||
if err != nil {
|
|
||||||
err = dc.Articles.Add(item)
|
|
||||||
if err != nil { log.Println("Failed to post article.")}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func CheckYoutube() {
|
|
||||||
// Add call to the db to request youtube sources.
|
|
||||||
|
|
||||||
// Loop though the services, and generate the clients.
|
|
||||||
yt := services.NewYoutubeClient(0, "https://www.youtube.com/user/GameGrumps")
|
|
||||||
yt.CheckSource()
|
|
||||||
}
|
}
|
68
scheduler.go
68
scheduler.go
@ -2,8 +2,14 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log"
|
||||||
|
|
||||||
"github.com/robfig/cron/v3"
|
"github.com/robfig/cron/v3"
|
||||||
|
|
||||||
|
"github.com/jtom38/newsbot/collector/database"
|
||||||
|
"github.com/jtom38/newsbot/collector/services"
|
||||||
|
//"github.com/jtom38/newsbot/collector/services/cache"
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func Hello(t string) {
|
func Hello(t string) {
|
||||||
@ -12,8 +18,62 @@ func Hello(t string) {
|
|||||||
|
|
||||||
func EnableScheduler() {
|
func EnableScheduler() {
|
||||||
c := cron.New()
|
c := cron.New()
|
||||||
c.AddFunc("*/1 * * * *", func() {
|
|
||||||
go Hello("new world order")
|
//c.AddFunc("*/5 * * * *", func() { go CheckCache() })
|
||||||
})
|
c.AddFunc("*/30 * * * *", func() { go CheckReddit() })
|
||||||
|
c.AddFunc("*/30 * * * *", func() { go CheckYoutube() })
|
||||||
|
c.AddFunc("* */1 * * *", func() { go CheckFfxiv() })
|
||||||
|
|
||||||
c.Start()
|
c.Start()
|
||||||
|
}
|
||||||
|
|
||||||
|
func CheckCache() {
|
||||||
|
//cache := services.NewCacheAgeMonitor()
|
||||||
|
//cache.CheckExpiredEntries()
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func CheckReddit() {
|
||||||
|
dc := database.NewDatabaseClient()
|
||||||
|
sources, err := dc.Sources.FindBySource("reddit")
|
||||||
|
if err != nil { log.Println(err) }
|
||||||
|
|
||||||
|
rc := services.NewRedditClient(sources[0].Name, sources[0].ID)
|
||||||
|
raw, err := rc.GetContent()
|
||||||
|
if err != nil { log.Println(err) }
|
||||||
|
|
||||||
|
redditArticles := rc.ConvertToArticles(raw)
|
||||||
|
|
||||||
|
for _, item := range redditArticles {
|
||||||
|
_, err = dc.Articles.FindByUrl(item.Url)
|
||||||
|
if err != nil {
|
||||||
|
err = dc.Articles.Add(item)
|
||||||
|
if err != nil { log.Println("Failed to post article.")}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func CheckYoutube() {
|
||||||
|
// Add call to the db to request youtube sources.
|
||||||
|
|
||||||
|
// Loop though the services, and generate the clients.
|
||||||
|
yt := services.NewYoutubeClient(0, "https://www.youtube.com/user/GameGrumps")
|
||||||
|
yt.CheckSource()
|
||||||
|
}
|
||||||
|
|
||||||
|
func CheckFfxiv() {
|
||||||
|
fc := services.NewFFXIVClient("na")
|
||||||
|
articles, err := fc.CheckSource()
|
||||||
|
|
||||||
|
// This isnt in a thread yet, so just output to stdout
|
||||||
|
if err != nil { log.Println(err) }
|
||||||
|
|
||||||
|
dc := database.NewDatabaseClient()
|
||||||
|
for _, item := range articles {
|
||||||
|
_, err = dc.Articles.FindByUrl(item.Url)
|
||||||
|
if err != nil {
|
||||||
|
err = dc.Articles.Add(item)
|
||||||
|
if err != nil { log.Println("Failed to post article.")}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
@ -1,40 +0,0 @@
|
|||||||
package services
|
|
||||||
|
|
||||||
import (
|
|
||||||
"errors"
|
|
||||||
|
|
||||||
"github.com/jtom38/newsbot/collector/domain/model"
|
|
||||||
)
|
|
||||||
|
|
||||||
type CacheClient struct{}
|
|
||||||
|
|
||||||
var (
|
|
||||||
cacheStorage []*model.CacheItem
|
|
||||||
|
|
||||||
ErrCacheRecordMissing = errors.New("unable to find the requested record.")
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
func NewCacheClient() CacheClient {
|
|
||||||
return CacheClient{}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (cc *CacheClient) Insert(item *model.CacheItem) {
|
|
||||||
//_, err := cc.Find(item.Key, item.Group)
|
|
||||||
//if err != nil { }
|
|
||||||
cacheStorage = append(cacheStorage, item)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (cc *CacheClient) Find(key string, group string) (*model.CacheItem, error) {
|
|
||||||
//go cc.FindExpiredEntries()
|
|
||||||
|
|
||||||
for _, item := range cacheStorage {
|
|
||||||
if item.Group != group { continue }
|
|
||||||
|
|
||||||
if item.Key != key { continue }
|
|
||||||
|
|
||||||
return item, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return &model.CacheItem{}, ErrCacheRecordMissing
|
|
||||||
}
|
|
62
services/cache/cache.go
vendored
Normal file
62
services/cache/cache.go
vendored
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
package cache
|
||||||
|
|
||||||
|
import (
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/jtom38/newsbot/collector/domain/model"
|
||||||
|
)
|
||||||
|
|
||||||
|
type CacheClient struct{
|
||||||
|
group string
|
||||||
|
DefaultTimer time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewCacheClient(group string) CacheClient {
|
||||||
|
return CacheClient{
|
||||||
|
group: group,
|
||||||
|
DefaultTimer: time.Hour,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (cc *CacheClient) Insert(key string, value string) {
|
||||||
|
item := model.CacheItem{
|
||||||
|
Key: key,
|
||||||
|
Value: value,
|
||||||
|
Group: cc.group,
|
||||||
|
Expires: time.Now().Add(1 * time.Hour),
|
||||||
|
IsTainted: false,
|
||||||
|
}
|
||||||
|
cacheStorage = append(cacheStorage, &item)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (cc *CacheClient) FindByKey(key string) (*model.CacheItem, error) {
|
||||||
|
for _, item := range cacheStorage {
|
||||||
|
if item.Group != cc.group { continue }
|
||||||
|
if item.Key != key { continue }
|
||||||
|
|
||||||
|
// if it was tainted, renew the timer and remove the taint as this record was still needed
|
||||||
|
if item.IsTainted {
|
||||||
|
item.IsTainted = false
|
||||||
|
item.Expires = time.Now().Add(1 * time.Hour)
|
||||||
|
}
|
||||||
|
return item, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return &model.CacheItem{}, ErrCacheRecordMissing
|
||||||
|
}
|
||||||
|
|
||||||
|
func (cc *CacheClient) FindByValue(value string) (*model.CacheItem, error) {
|
||||||
|
for _, item := range cacheStorage {
|
||||||
|
if item.Group != cc.group { continue }
|
||||||
|
if item.Value != value { continue }
|
||||||
|
|
||||||
|
// if it was tainted, renew the timer and remove the taint as this record was still needed
|
||||||
|
if item.IsTainted {
|
||||||
|
item.IsTainted = false
|
||||||
|
item.Expires = time.Now().Add(1 * time.Hour)
|
||||||
|
}
|
||||||
|
return item, nil
|
||||||
|
}
|
||||||
|
return &model.CacheItem{}, ErrCacheRecordMissing
|
||||||
|
}
|
||||||
|
|
40
services/cache/cache_test.go
vendored
Normal file
40
services/cache/cache_test.go
vendored
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
package cache_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/jtom38/newsbot/collector/services/cache"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestNewCacheClient(t *testing.T) {
|
||||||
|
_ = cache.NewCacheClient("placeholder")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInsert(t *testing.T) {
|
||||||
|
cache := cache.NewCacheClient("Testing")
|
||||||
|
cache.Insert("UnitTesting", "Something, or nothing")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFindGroupMissing(t *testing.T) {
|
||||||
|
cache := cache.NewCacheClient("faker")
|
||||||
|
_, err := cache.FindByKey("UnitTesting")
|
||||||
|
if err == nil { panic("Nothing was appended with the requested group.") }
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFindGroupExists(t *testing.T) {
|
||||||
|
cache := cache.NewCacheClient("Testing")
|
||||||
|
cache.Insert("UnitTesting", "Something")
|
||||||
|
_, err := cache.FindByKey("UnitTesting")
|
||||||
|
if err != nil { panic("") }
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCacheStorage(t *testing.T) {
|
||||||
|
cc := cache.NewCacheClient("Testing")
|
||||||
|
cc.Insert("UnitTesting01", "test")
|
||||||
|
cc.Insert("UnitTesting02", "Test")
|
||||||
|
|
||||||
|
cache := cache.NewCacheClient("Testing")
|
||||||
|
_, err := cache.FindByKey("UnitTesting02")
|
||||||
|
if err != nil { panic("expected to find the value")}
|
||||||
|
}
|
||||||
|
|
13
services/cache/common.go
vendored
Normal file
13
services/cache/common.go
vendored
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
package cache
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
|
||||||
|
"github.com/jtom38/newsbot/collector/domain/model"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
cacheStorage []*model.CacheItem
|
||||||
|
|
||||||
|
ErrCacheRecordMissing = errors.New("unable to find the requested record")
|
||||||
|
)
|
45
services/cache/monitor.go
vendored
Normal file
45
services/cache/monitor.go
vendored
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
package cache
|
||||||
|
|
||||||
|
import (
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/jtom38/newsbot/collector/domain/model"
|
||||||
|
)
|
||||||
|
|
||||||
|
// When a record becomes tainted, it needs to be renewed or it will be dropped from the cache.
|
||||||
|
// If a record is tainted and used again, the taint will be removed and a new Expires value will be set.
|
||||||
|
// If its not renewed, it will be dropped.
|
||||||
|
type CacheAgeMonitor struct {}
|
||||||
|
|
||||||
|
func NewCacheAgeMonitor() CacheAgeMonitor {
|
||||||
|
return CacheAgeMonitor{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is an automated job that will review all the objects for age and taint them if needed.
|
||||||
|
func (cam CacheAgeMonitor) CheckExpiredEntries() {
|
||||||
|
now := time.Now()
|
||||||
|
for index, item := range cacheStorage {
|
||||||
|
if now.After(item.Expires) {
|
||||||
|
|
||||||
|
// the timer expired, and its not tainted, taint it
|
||||||
|
if !item.IsTainted {
|
||||||
|
item.IsTainted = true
|
||||||
|
item.Expires = now.Add(1 * time.Hour)
|
||||||
|
}
|
||||||
|
|
||||||
|
// if its tainted and the timer didnt get renewed, delete
|
||||||
|
if item.IsTainted {
|
||||||
|
cacheStorage = cam.removeEntry(index)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This creates a new slice and skips over the item that needs to be dropped
|
||||||
|
func (cam CacheAgeMonitor) removeEntry(index int) []*model.CacheItem {
|
||||||
|
var temp []*model.CacheItem
|
||||||
|
for i, item := range cacheStorage {
|
||||||
|
if i != index { temp = append(temp, item )}
|
||||||
|
}
|
||||||
|
return temp
|
||||||
|
}
|
13
services/cache/monitor_test.go
vendored
Normal file
13
services/cache/monitor_test.go
vendored
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
package cache_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/jtom38/newsbot/collector/services/cache"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestCacheTaintItem(t *testing.T) {
|
||||||
|
cc := cache.NewCacheClient("Testing")
|
||||||
|
cc.Insert("UnitTesting01", "test")
|
||||||
|
|
||||||
|
}
|
@ -1,39 +0,0 @@
|
|||||||
package services
|
|
||||||
|
|
||||||
import (
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/jtom38/newsbot/collector/domain/model"
|
|
||||||
)
|
|
||||||
|
|
||||||
type CacheMonitor struct {}
|
|
||||||
|
|
||||||
func NewCacheMonitorClient() CacheMonitor {
|
|
||||||
return CacheMonitor{}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (cm *CacheMonitor) Enable() {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
// This will be fired off each time an cache a
|
|
||||||
func (cm *CacheMonitor) FindExpiredEntries() {
|
|
||||||
now := time.Now()
|
|
||||||
for index, item := range cacheStorage {
|
|
||||||
res := now.After(item.Expires)
|
|
||||||
if res {
|
|
||||||
cm.removeExpiredEntries(index)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// This will create a new slice and add the valid items to it and ignore the one to be removed.
|
|
||||||
// The existing cacheStorage will be replaced.
|
|
||||||
func (cc *CacheMonitor) removeExpiredEntries(arrayEntry int) {
|
|
||||||
var temp []*model.CacheItem
|
|
||||||
for index, item := range cacheStorage {
|
|
||||||
if index == arrayEntry { continue }
|
|
||||||
temp = append(temp, item)
|
|
||||||
}
|
|
||||||
cacheStorage = temp
|
|
||||||
}
|
|
@ -1,69 +0,0 @@
|
|||||||
package services_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"testing"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/jtom38/newsbot/collector/domain/model"
|
|
||||||
"github.com/jtom38/newsbot/collector/services"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestNewCacheClient(t *testing.T) {
|
|
||||||
_ = services.NewCacheClient()
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestInsert(t *testing.T) {
|
|
||||||
cache := services.NewCacheClient()
|
|
||||||
var item *model.CacheItem = &model.CacheItem{
|
|
||||||
Key: "UnitTesting",
|
|
||||||
Value: "Something, or nothing",
|
|
||||||
Group: "Testing",
|
|
||||||
Expires: time.Now().Add(5 * time.Second),
|
|
||||||
}
|
|
||||||
cache.Insert(item)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestFindGroupMissing(t *testing.T) {
|
|
||||||
cache := services.NewCacheClient()
|
|
||||||
_, err := cache.Find("UnitTesting", "Unknown")
|
|
||||||
if err == nil { panic("Nothing was appended with the requested group.") }
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestFindGroupExists(t *testing.T) {
|
|
||||||
cache := services.NewCacheClient()
|
|
||||||
var item *model.CacheItem = &model.CacheItem{
|
|
||||||
Key: "UnitTesting",
|
|
||||||
Value: "Something, or nothing",
|
|
||||||
Group: "Testing",
|
|
||||||
Expires: time.Now().Add(5 * time.Second),
|
|
||||||
}
|
|
||||||
cache.Insert(item)
|
|
||||||
_, err := cache.Find("UnitTesting", "Testing2")
|
|
||||||
//t.Log(res)
|
|
||||||
if err == nil { panic("") }
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
func TestCacheStorage(t *testing.T) {
|
|
||||||
cc := services.NewCacheClient()
|
|
||||||
|
|
||||||
item1 := &model.CacheItem {
|
|
||||||
Key: "UnitTesting01",
|
|
||||||
Value: "",
|
|
||||||
Group: "Testing",
|
|
||||||
Expires: time.Now().Add(5 * time.Minute),
|
|
||||||
}
|
|
||||||
cc.Insert(item1)
|
|
||||||
|
|
||||||
item2 := &model.CacheItem {
|
|
||||||
Key: "UnitTesting02",
|
|
||||||
Value: "",
|
|
||||||
Group: "Testing",
|
|
||||||
Expires: time.Now().Add(5 * time.Minute),
|
|
||||||
}
|
|
||||||
cc.Insert(item2)
|
|
||||||
|
|
||||||
cache := services.NewCacheClient()
|
|
||||||
_, err := cache.Find("UnitTesting02", "Testing")
|
|
||||||
if err != nil { panic("expected to find the value")}
|
|
||||||
}
|
|
@ -1,4 +1,4 @@
|
|||||||
package services
|
package config
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
@ -19,7 +19,7 @@ const (
|
|||||||
|
|
||||||
type ConfigClient struct {}
|
type ConfigClient struct {}
|
||||||
|
|
||||||
func NewConfigClient() ConfigClient {
|
func New() ConfigClient {
|
||||||
_, err := os.Open(".env")
|
_, err := os.Open(".env")
|
||||||
if err == nil {
|
if err == nil {
|
||||||
loadEnvFile()
|
loadEnvFile()
|
20
services/config/config_test.go
Normal file
20
services/config/config_test.go
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
package config_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/jtom38/newsbot/collector/services/config"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestNewClient(t *testing.T) {
|
||||||
|
config.New()
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetConfigExpectNull(t *testing.T) {
|
||||||
|
cc := config.New()
|
||||||
|
os.Setenv(config.REDDIT_PULL_HOT, "")
|
||||||
|
res := cc.GetConfig(config.REDDIT_PULL_HOT)
|
||||||
|
if res != "" { panic("expected blank")}
|
||||||
|
|
||||||
|
}
|
258
services/ffxiv.go
Normal file
258
services/ffxiv.go
Normal file
@ -0,0 +1,258 @@
|
|||||||
|
package services
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/PuerkitoBio/goquery"
|
||||||
|
"github.com/go-rod/rod"
|
||||||
|
"github.com/google/uuid"
|
||||||
|
|
||||||
|
"github.com/jtom38/newsbot/collector/domain/model"
|
||||||
|
"github.com/jtom38/newsbot/collector/services/cache"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
FFXIV_NA_FEED_URL string = "https://na.finalfantasyxiv.com/lodestone/"
|
||||||
|
FFXIV_JP_FEED_URL string = "https://jp.finalfantasyxiv.com/lodestone/"
|
||||||
|
|
||||||
|
FFXIV_TIME_FORMAT string = "1/2/2006 3:4 PM"
|
||||||
|
)
|
||||||
|
|
||||||
|
type FFXIVClient struct {
|
||||||
|
SourceID uint
|
||||||
|
Url string
|
||||||
|
Region string
|
||||||
|
|
||||||
|
cacheGroup string
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewFFXIVClient(region string) FFXIVClient {
|
||||||
|
var url string
|
||||||
|
|
||||||
|
switch region {
|
||||||
|
case "na":
|
||||||
|
url = FFXIV_NA_FEED_URL
|
||||||
|
case "jp":
|
||||||
|
url = FFXIV_JP_FEED_URL
|
||||||
|
}
|
||||||
|
|
||||||
|
return FFXIVClient{
|
||||||
|
Region: region,
|
||||||
|
Url: url,
|
||||||
|
cacheGroup: "ffxiv",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fc *FFXIVClient) CheckSource() ([]model.Articles, error) {
|
||||||
|
var articles []model.Articles
|
||||||
|
|
||||||
|
parser := fc.GetBrowser()
|
||||||
|
defer parser.Close()
|
||||||
|
|
||||||
|
links, err := fc.PullFeed(parser)
|
||||||
|
if err != nil { return articles, err }
|
||||||
|
|
||||||
|
cache := cache.NewCacheClient(fc.cacheGroup)
|
||||||
|
|
||||||
|
for _, link := range links {
|
||||||
|
// Check cache/db if this link has been seen already, skip
|
||||||
|
_, err := cache.FindByValue(link)
|
||||||
|
if err == nil { continue }
|
||||||
|
|
||||||
|
|
||||||
|
page := fc.GetPage(parser, link)
|
||||||
|
|
||||||
|
title, err := fc.ExtractTitle(page)
|
||||||
|
if err != nil { return articles, err }
|
||||||
|
|
||||||
|
thumb, err := fc.ExtractThumbnail(page)
|
||||||
|
if err != nil { return articles, err }
|
||||||
|
|
||||||
|
pubDate, err := fc.ExtractPubDate(page)
|
||||||
|
if err != nil { return articles, err }
|
||||||
|
|
||||||
|
description, err := fc.ExtractDescription(page)
|
||||||
|
if err != nil { return articles, err }
|
||||||
|
|
||||||
|
authorName, err := fc.ExtractAuthor(page)
|
||||||
|
if err != nil { return articles, err }
|
||||||
|
|
||||||
|
authorImage, err := fc.ExtractAuthorImage(page)
|
||||||
|
if err != nil { return articles, err }
|
||||||
|
|
||||||
|
tags, err := fc.ExtractTags(page)
|
||||||
|
if err != nil { return articles, err }
|
||||||
|
|
||||||
|
article := model.Articles{
|
||||||
|
SourceID: fc.SourceID,
|
||||||
|
Tags: tags,
|
||||||
|
Title: title,
|
||||||
|
Url: link,
|
||||||
|
PubDate: pubDate,
|
||||||
|
Video: "",
|
||||||
|
VideoHeight: 0,
|
||||||
|
VideoWidth: 0,
|
||||||
|
Thumbnail: thumb,
|
||||||
|
Description: description,
|
||||||
|
AuthorName: authorName,
|
||||||
|
AuthorImage: authorImage,
|
||||||
|
}
|
||||||
|
log.Printf("Collected '%v' from '%v'", article.Title, article.Url)
|
||||||
|
|
||||||
|
cache.Insert(uuid.New().String(), link)
|
||||||
|
|
||||||
|
articles = append(articles, article)
|
||||||
|
}
|
||||||
|
|
||||||
|
return articles, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fc *FFXIVClient) GetParser() (*goquery.Document, error) {
|
||||||
|
html, err := http.Get(fc.Url)
|
||||||
|
if err != nil { return nil, err }
|
||||||
|
defer html.Body.Close()
|
||||||
|
|
||||||
|
doc, err := goquery.NewDocumentFromReader(html.Body)
|
||||||
|
if err != nil { return nil, err }
|
||||||
|
return doc, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fc *FFXIVClient) GetBrowser() (*rod.Browser) {
|
||||||
|
browser := rod.New().MustConnect()
|
||||||
|
return browser
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fc *FFXIVClient) PullFeed(parser *rod.Browser) ([]string, error) {
|
||||||
|
var links []string
|
||||||
|
|
||||||
|
page := parser.MustPage(fc.Url)
|
||||||
|
defer page.Close()
|
||||||
|
|
||||||
|
// find the list by xpath
|
||||||
|
res := page.MustElementX("/html/body/div[3]/div/div/div[1]/div[2]/div[1]/div[2]/ul")
|
||||||
|
|
||||||
|
// find all the li items
|
||||||
|
items := res.MustElements("li")
|
||||||
|
|
||||||
|
for _, item := range items {
|
||||||
|
// in each li, find the a items
|
||||||
|
a, err := item.Element("a")
|
||||||
|
if err != nil {
|
||||||
|
log.Println("Unable to find the a item, skipping")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// find the href behind the a
|
||||||
|
url, err := a.Property("href")
|
||||||
|
if err != nil {
|
||||||
|
log.Println("Unable to find a href link, skipping")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
urlString := url.String()
|
||||||
|
isTopic := strings.Contains(urlString, "topics")
|
||||||
|
if isTopic {
|
||||||
|
links = append(links, urlString)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return links, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rc *FFXIVClient) GetPage(parser *rod.Browser, url string) *rod.Page {
|
||||||
|
page := parser.MustPage(url)
|
||||||
|
return page
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fc *FFXIVClient) ExtractThumbnail(page *rod.Page) (string, error) {
|
||||||
|
thumbnail := page.MustElementX("/html/body/div[3]/div[2]/div[1]/article/div[1]/img").MustProperty("src").String()
|
||||||
|
if thumbnail == "" { return "", errors.New("unable to find thumbnail")}
|
||||||
|
|
||||||
|
title := page.MustElement(".news__header > h1:nth-child(2)").MustText()
|
||||||
|
log.Println(title)
|
||||||
|
|
||||||
|
return thumbnail, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fc *FFXIVClient) ExtractPubDate(page *rod.Page) (time.Time, error) {
|
||||||
|
stringDate := page.MustElement(".news__ic--topics").MustText()
|
||||||
|
if stringDate == "" { return time.Now(), errors.New("unable to locate the publish date on the post")}
|
||||||
|
|
||||||
|
PubDate, err := time.Parse(FFXIV_TIME_FORMAT, stringDate)
|
||||||
|
if err != nil { return time.Now(), err }
|
||||||
|
|
||||||
|
return PubDate, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fc *FFXIVClient) ExtractDescription(page *rod.Page) (string, error) {
|
||||||
|
res := page.MustElement(".news__detail__wrapper").MustText()
|
||||||
|
if res == "" { return "", errors.New("unable to locate the description on the post")}
|
||||||
|
|
||||||
|
return res, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fc *FFXIVClient) ExtractAuthor(page *rod.Page) (string, error) {
|
||||||
|
meta := page.MustElements("head > meta")
|
||||||
|
for _, item := range meta {
|
||||||
|
name, err := item.Property("name")
|
||||||
|
if err != nil { return "", err }
|
||||||
|
|
||||||
|
if name.String() != "author" { continue }
|
||||||
|
content, err := item.Property("content")
|
||||||
|
if err != nil { return "", err }
|
||||||
|
|
||||||
|
return content.String(), nil
|
||||||
|
}
|
||||||
|
//log.Println(meta)
|
||||||
|
return "", errors.New("unable to find the author on the page")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fc *FFXIVClient) ExtractTags(page *rod.Page) (string, error) {
|
||||||
|
meta := page.MustElements("head > meta")
|
||||||
|
for _, item := range meta {
|
||||||
|
name, err := item.Property("name")
|
||||||
|
if err != nil { return "", err }
|
||||||
|
|
||||||
|
if name.String() != "keywords" { continue }
|
||||||
|
content, err := item.Property("content")
|
||||||
|
if err != nil { return "", err }
|
||||||
|
|
||||||
|
return content.String(), nil
|
||||||
|
}
|
||||||
|
//log.Println(meta)
|
||||||
|
return "", errors.New("unable to find the author on the page")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fc *FFXIVClient) ExtractTitle(page *rod.Page) (string, error) {
|
||||||
|
title, err := page.MustElement("head > title").Text()
|
||||||
|
if err != nil { return "", err }
|
||||||
|
|
||||||
|
if !strings.Contains(title, "|") { return "", errors.New("unable to split the title, missing | in the string")}
|
||||||
|
|
||||||
|
res := strings.Split(title, "|")
|
||||||
|
if title != "" { return res[0], nil }
|
||||||
|
|
||||||
|
//log.Println(meta)
|
||||||
|
return "", errors.New("unable to find the author on the page")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fc *FFXIVClient) ExtractAuthorImage(page *rod.Page) (string, error) {
|
||||||
|
meta := page.MustElements("head > link")
|
||||||
|
for _, item := range meta {
|
||||||
|
name, err := item.Property("rel")
|
||||||
|
if err != nil { return "", err }
|
||||||
|
|
||||||
|
if name.String() != "apple-touch-icon-precomposed" { continue }
|
||||||
|
content, err := item.Property("href")
|
||||||
|
if err != nil { return "", err }
|
||||||
|
|
||||||
|
return content.String(), nil
|
||||||
|
}
|
||||||
|
//log.Println(meta)
|
||||||
|
return "", errors.New("unable to find the author image on the page")
|
||||||
|
}
|
||||||
|
|
148
services/ffxiv_test.go
Normal file
148
services/ffxiv_test.go
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
package services_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
ffxiv "github.com/jtom38/newsbot/collector/services"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFfxivGetParser(t *testing.T) {
|
||||||
|
fc := ffxiv.NewFFXIVClient("na")
|
||||||
|
_, err := fc.GetParser()
|
||||||
|
if err != nil { panic(err) }
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFfxivPullFeed(t *testing.T) {
|
||||||
|
fc := ffxiv.NewFFXIVClient("na")
|
||||||
|
|
||||||
|
parser := fc.GetBrowser()
|
||||||
|
defer parser.Close()
|
||||||
|
|
||||||
|
links, err := fc.PullFeed(parser)
|
||||||
|
if err != nil { panic(err) }
|
||||||
|
if len(links) == 0 { panic("expected links to come back but got 0") }
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFfxivExtractThumbnail(t *testing.T) {
|
||||||
|
fc := ffxiv.NewFFXIVClient("na")
|
||||||
|
|
||||||
|
parser := fc.GetBrowser()
|
||||||
|
defer parser.Close()
|
||||||
|
|
||||||
|
links, err := fc.PullFeed(parser)
|
||||||
|
if err != nil { panic(err) }
|
||||||
|
|
||||||
|
page := fc.GetPage(parser, links[0])
|
||||||
|
defer page.Close()
|
||||||
|
|
||||||
|
thumb, err := fc.ExtractThumbnail(page)
|
||||||
|
if err != nil { panic(err) }
|
||||||
|
if thumb == "" { panic("expected a link but got nothing.")}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFfxivExtractPubDate(t *testing.T) {
|
||||||
|
fc := ffxiv.NewFFXIVClient("na")
|
||||||
|
|
||||||
|
parser := fc.GetBrowser()
|
||||||
|
defer parser.Close()
|
||||||
|
|
||||||
|
links, err := fc.PullFeed(parser)
|
||||||
|
if err != nil { panic(err) }
|
||||||
|
|
||||||
|
page := fc.GetPage(parser, links[0])
|
||||||
|
defer page.Close()
|
||||||
|
|
||||||
|
_, err = fc.ExtractPubDate(page)
|
||||||
|
if err != nil { panic(err) }
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFfxivExtractDescription(t *testing.T) {
|
||||||
|
fc := ffxiv.NewFFXIVClient("na")
|
||||||
|
|
||||||
|
parser := fc.GetBrowser()
|
||||||
|
defer parser.Close()
|
||||||
|
|
||||||
|
links, err := fc.PullFeed(parser)
|
||||||
|
if err != nil { panic(err) }
|
||||||
|
|
||||||
|
page := fc.GetPage(parser, links[0])
|
||||||
|
defer page.Close()
|
||||||
|
|
||||||
|
_, err = fc.ExtractDescription(page)
|
||||||
|
if err != nil { panic(err) }
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFfxivExtractAuthor(t *testing.T) {
|
||||||
|
fc := ffxiv.NewFFXIVClient("na")
|
||||||
|
|
||||||
|
parser := fc.GetBrowser()
|
||||||
|
defer parser.Close()
|
||||||
|
|
||||||
|
links, err := fc.PullFeed(parser)
|
||||||
|
if err != nil { panic(err) }
|
||||||
|
|
||||||
|
page := fc.GetPage(parser, links[0])
|
||||||
|
defer page.Close()
|
||||||
|
|
||||||
|
author, err := fc.ExtractAuthor(page)
|
||||||
|
if err != nil { panic(err) }
|
||||||
|
if author == "" { panic("failed to locate the author name") }
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFfxivExtractTags(t *testing.T) {
|
||||||
|
fc := ffxiv.NewFFXIVClient("na")
|
||||||
|
|
||||||
|
parser := fc.GetBrowser()
|
||||||
|
defer parser.Close()
|
||||||
|
|
||||||
|
links, err := fc.PullFeed(parser)
|
||||||
|
if err != nil { panic(err) }
|
||||||
|
|
||||||
|
page := fc.GetPage(parser, links[0])
|
||||||
|
defer page.Close()
|
||||||
|
|
||||||
|
res, err := fc.ExtractTags(page)
|
||||||
|
if err != nil { panic(err) }
|
||||||
|
if res == "" {panic("failed to locate the tags")}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFfxivExtractTitle(t *testing.T) {
|
||||||
|
fc := ffxiv.NewFFXIVClient("na")
|
||||||
|
|
||||||
|
parser := fc.GetBrowser()
|
||||||
|
defer parser.Close()
|
||||||
|
|
||||||
|
links, err := fc.PullFeed(parser)
|
||||||
|
if err != nil { panic(err) }
|
||||||
|
|
||||||
|
page := fc.GetPage(parser, links[0])
|
||||||
|
defer page.Close()
|
||||||
|
|
||||||
|
res, err := fc.ExtractTitle(page)
|
||||||
|
if err != nil { panic(err) }
|
||||||
|
if res == "" { panic("failed to locate the tags") }
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFFxivExtractAuthorIamge(t *testing.T) {
|
||||||
|
fc := ffxiv.NewFFXIVClient("na")
|
||||||
|
|
||||||
|
parser := fc.GetBrowser()
|
||||||
|
defer parser.Close()
|
||||||
|
|
||||||
|
links, err := fc.PullFeed(parser)
|
||||||
|
if err != nil { panic(err) }
|
||||||
|
|
||||||
|
page := fc.GetPage(parser, links[0])
|
||||||
|
defer page.Close()
|
||||||
|
|
||||||
|
res, err := fc.ExtractAuthorImage(page)
|
||||||
|
if err != nil { panic(err) }
|
||||||
|
if res == "" { panic("failed to locate the tags") }
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFfxivCheckSource(t *testing.T) {
|
||||||
|
fc := ffxiv.NewFFXIVClient("na")
|
||||||
|
fc.CheckSource()
|
||||||
|
|
||||||
|
}
|
@ -5,9 +5,13 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/go-rod/rod"
|
||||||
"github.com/jtom38/newsbot/collector/domain/model"
|
"github.com/jtom38/newsbot/collector/domain/model"
|
||||||
|
"github.com/jtom38/newsbot/collector/services/config"
|
||||||
)
|
)
|
||||||
|
|
||||||
type RedditClient struct {
|
type RedditClient struct {
|
||||||
@ -29,14 +33,32 @@ func NewRedditClient(subreddit string, sourceID uint) RedditClient {
|
|||||||
url: fmt.Sprintf("https://www.reddit.com/r/%v.json", subreddit),
|
url: fmt.Sprintf("https://www.reddit.com/r/%v.json", subreddit),
|
||||||
sourceId: sourceID,
|
sourceId: sourceID,
|
||||||
}
|
}
|
||||||
cc := NewConfigClient()
|
cc := config.New()
|
||||||
rc.config.PullHot = cc.GetConfig(REDDIT_PULL_HOT)
|
rc.config.PullHot = cc.GetConfig(config.REDDIT_PULL_HOT)
|
||||||
rc.config.PullNSFW = cc.GetConfig(REDDIT_PULL_NSFW)
|
rc.config.PullNSFW = cc.GetConfig(config.REDDIT_PULL_NSFW)
|
||||||
rc.config.PullTop = cc.GetConfig(REDDIT_PULL_TOP)
|
rc.config.PullTop = cc.GetConfig(config.REDDIT_PULL_TOP)
|
||||||
|
|
||||||
|
rc.disableHttp2Client()
|
||||||
|
|
||||||
return rc
|
return rc
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This is needed for to get modern go to talk to the endpoint.
|
||||||
|
// https://www.reddit.com/r/redditdev/comments/t8e8hc/getting_nothing_but_429_responses_when_using_go/
|
||||||
|
func (rc RedditClient) disableHttp2Client() {
|
||||||
|
os.Setenv("GODEBUG", "http2client=0")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rc RedditClient) GetBrowser() *rod.Browser {
|
||||||
|
browser := rod.New().MustConnect()
|
||||||
|
return browser
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rc RedditClient) GetPage(parser *rod.Browser, url string) *rod.Page {
|
||||||
|
page := parser.MustPage(url)
|
||||||
|
return page
|
||||||
|
}
|
||||||
|
|
||||||
// GetContent() reaches out to Reddit and pulls the Json data.
|
// GetContent() reaches out to Reddit and pulls the Json data.
|
||||||
// It will then convert the data to a struct and return the struct.
|
// It will then convert the data to a struct and return the struct.
|
||||||
func (rc RedditClient) GetContent() (model.RedditJsonContent, error ) {
|
func (rc RedditClient) GetContent() (model.RedditJsonContent, error ) {
|
||||||
@ -45,9 +67,14 @@ func (rc RedditClient) GetContent() (model.RedditJsonContent, error ) {
|
|||||||
log.Printf("Collecting results on '%v'", rc.subreddit)
|
log.Printf("Collecting results on '%v'", rc.subreddit)
|
||||||
content, err := getHttpContent(rc.url)
|
content, err := getHttpContent(rc.url)
|
||||||
if err != nil { return items, err }
|
if err != nil { return items, err }
|
||||||
|
if strings.Contains("<h1>whoa there, pardner!</h1>", string(content) ) {
|
||||||
|
return items, errors.New("did not get json data from the server")
|
||||||
|
}
|
||||||
|
|
||||||
json.Unmarshal(content, &items)
|
json.Unmarshal(content, &items)
|
||||||
|
if len(items.Data.Children) == 0 {
|
||||||
|
return items, errors.New("failed to unmarshal the data")
|
||||||
|
}
|
||||||
return items, nil
|
return items, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,14 +1,16 @@
|
|||||||
package services_test
|
package services_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"log"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/jtom38/newsbot/collector/services"
|
"github.com/jtom38/newsbot/collector/services"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestGetContent(t *testing.T) {
|
func TestGetContent(t *testing.T) {
|
||||||
|
//This test is flaky right now due to the http changes in 1.17
|
||||||
rc := services.NewRedditClient("dadjokes", 0)
|
rc := services.NewRedditClient("dadjokes", 0)
|
||||||
_, err := rc.GetContent()
|
_, err := rc.GetContent()
|
||||||
|
log.Println(err)
|
||||||
if err != nil { panic(err) }
|
//if err != nil { panic(err) }
|
||||||
}
|
}
|
@ -20,6 +20,8 @@ type YoutubeClient struct {
|
|||||||
ChannelID string
|
ChannelID string
|
||||||
AvatarUri string
|
AvatarUri string
|
||||||
Config YoutubeConfig
|
Config YoutubeConfig
|
||||||
|
|
||||||
|
cacheGroup string
|
||||||
}
|
}
|
||||||
|
|
||||||
type YoutubeConfig struct {
|
type YoutubeConfig struct {
|
||||||
@ -42,6 +44,7 @@ func NewYoutubeClient(SourceID uint, Url string) YoutubeClient {
|
|||||||
yc := YoutubeClient{
|
yc := YoutubeClient{
|
||||||
SourceID: SourceID,
|
SourceID: SourceID,
|
||||||
Url: Url,
|
Url: Url,
|
||||||
|
cacheGroup: "youtube",
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
cc := NewConfigClient()
|
cc := NewConfigClient()
|
||||||
@ -60,6 +63,7 @@ func (yc *YoutubeClient) CheckSource() error {
|
|||||||
|
|
||||||
// Check cache/db for existing value
|
// Check cache/db for existing value
|
||||||
// If we have the value, skip
|
// If we have the value, skip
|
||||||
|
//channelId, err := yc.extractChannelId()
|
||||||
channelId, err := yc.GetChannelId(docParser)
|
channelId, err := yc.GetChannelId(docParser)
|
||||||
if err != nil { return err }
|
if err != nil { return err }
|
||||||
if channelId == "" { return ErrChannelIdMissing }
|
if channelId == "" { return ErrChannelIdMissing }
|
||||||
@ -92,6 +96,16 @@ func (yc *YoutubeClient) CheckSource() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (yc *YoutubeClient) GetBrowser() *rod.Browser {
|
||||||
|
browser := rod.New().MustConnect()
|
||||||
|
return browser
|
||||||
|
}
|
||||||
|
|
||||||
|
func (yc *YoutubeClient) GetPage(parser *rod.Browser, url string) *rod.Page {
|
||||||
|
page := parser.MustPage(url)
|
||||||
|
return page
|
||||||
|
}
|
||||||
|
|
||||||
func (yc *YoutubeClient) GetParser(uri string) (*goquery.Document, error) {
|
func (yc *YoutubeClient) GetParser(uri string) (*goquery.Document, error) {
|
||||||
html, err := http.Get(uri)
|
html, err := http.Get(uri)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -120,6 +134,12 @@ func (yc *YoutubeClient) GetChannelId(doc *goquery.Document) (string, error) {
|
|||||||
return "", ErrChannelIdMissing
|
return "", ErrChannelIdMissing
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This pulls the youtube page and finds the ChannelID.
|
||||||
|
// This value is required to generate the RSS feed URI
|
||||||
|
//func (yc *YoutubeClient) extractChannelId(page *rod.Page) (string, error) {
|
||||||
|
|
||||||
|
//}
|
||||||
|
|
||||||
// This will parse the page to find the current Avatar of the channel.
|
// This will parse the page to find the current Avatar of the channel.
|
||||||
func (yc *YoutubeClient) GetAvatarUri() (string, error) {
|
func (yc *YoutubeClient) GetAvatarUri() (string, error) {
|
||||||
var AvatarUri string
|
var AvatarUri string
|
||||||
|
@ -82,10 +82,6 @@ func TestGetChannelTags(t *testing.T) {
|
|||||||
if err != nil { panic(err) }
|
if err != nil { panic(err) }
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestConvertToArticle(t *testing.T) {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestGetVideoThumbnail(t *testing.T) {
|
func TestGetVideoThumbnail(t *testing.T) {
|
||||||
yc := services.NewYoutubeClient(
|
yc := services.NewYoutubeClient(
|
||||||
0,
|
0,
|
||||||
|
Loading…
Reference in New Issue
Block a user