Youtube source is now working (#5)

* added extra packages to help with parsing

* getting the core built for Youtube collection.  The feed can be pulled and starting to build the article object

* added some tests, reddit will need more love but youtube is starting off with more tests.  Starting to add Rod to pull missing values from the site

* Added rod to work with browser automation

* Moved the config inside the client as they can change within runtime and need to be refreshed on each client creation

* added more features to collect data per video and tests to support them.

* adding the cache and tests

* moved errors to var values at the top

* youtube is now getting collected and tests have been setup

* resolved an issue with the config

* setting up the first interface, its not used yet

* more updates to the cache service. Not finished yet and could see rework

* added logic to monitor Youtube.  Still basic logic that needs to be wired up to the database
This commit is contained in:
James Tombleson 2022-04-17 07:25:49 -07:00 committed by GitHub
parent e61870ec60
commit eba63c27ef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 636 additions and 21 deletions

View File

@ -18,7 +18,8 @@ type DatabaseClient struct {
// This will generate a new client to interface with the API Database.
func NewDatabaseClient() DatabaseClient {
var dbUri = services.NewConfigClient().GetConfig(services.DB_URI)
cc := services.NewConfigClient()
dbUri := cc.GetConfig(services.DB_URI)
var client = DatabaseClient{}
client.Diagnosis.rootUri = dbUri

View File

@ -0,0 +1,10 @@
package interfaces
import (
"github.com/mmcdole/gofeed"
)
type Sources interface {
CheckSource() error
PullFeed() (*gofeed.Feed, error)
}

15
domain/model/cache.go Normal file
View File

@ -0,0 +1,15 @@
package model
import (
"time"
)
type CacheItem struct {
Key string
Value string
// Group defines what it should be a reference to.
// youtube, reddit, ect
Group string
Expires time.Time
}

3
go.mod
View File

@ -3,7 +3,10 @@ module github.com/jtom38/newsbot/collector
go 1.16
require (
github.com/PuerkitoBio/goquery v1.8.0 // indirect
github.com/go-chi/chi/v5 v5.0.7 // indirect
github.com/go-rod/rod v0.105.1 // indirect
github.com/joho/godotenv v1.4.0 // indirect
github.com/mmcdole/gofeed v1.1.3 // indirect
github.com/robfig/cron/v3 v3.0.1 // indirect
)

38
go.sum
View File

@ -1,7 +1,14 @@
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U=
github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/czasg/go-queue v0.0.0-20211206021309-e3b3e4c4ae3b/go.mod h1:Myb/1g8zHhdc13TwwkSuJ5wShsub1yoPrT+ta8isZQ4=
github.com/czasg/go-queue v0.0.0-20211206102528-0d03e5c8ace8 h1:N1zVvx6PqlP5W99D4wmvEoEf9z7dczVICL/oW/g66vc=
github.com/czasg/go-queue v0.0.0-20211206102528-0d03e5c8ace8/go.mod h1:Myb/1g8zHhdc13TwwkSuJ5wShsub1yoPrT+ta8isZQ4=
@ -21,6 +28,8 @@ github.com/go-pg/pg/v10 v10.10.6 h1:1vNtPZ4Z9dWUw/TjJwOfFUbF5nEq1IkR6yG8Mq/Iwso=
github.com/go-pg/pg/v10 v10.10.6/go.mod h1:GLmFXufrElQHf5uzM3BQlcfwV3nsgnHue5uzjQ6Nqxg=
github.com/go-pg/zerochecker v0.2.0 h1:pp7f72c3DobMWOb2ErtZsnrPaSvHd2W4o9//8HtF4mU=
github.com/go-pg/zerochecker v0.2.0/go.mod h1:NJZ4wKL0NmTtz0GKCoJ8kym6Xn/EQzXRl2OnAe7MmDo=
github.com/go-rod/rod v0.105.1 h1:r0bNmO9siOe13lG6Vbkaak11u48rYmWGl/Hk4MJdOiE=
github.com/go-rod/rod v0.105.1/go.mod h1:Wrnn6HokFHskwaIVke3ML1y/NBVp7XPIeB8eDzR9vuw=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
@ -39,14 +48,25 @@ github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
github.com/joho/godotenv v1.4.0 h1:3l4+N6zfMWnkbPEXKng2o2/MR5mSwTrBih4ZEkkz1lg=
github.com/joho/godotenv v1.4.0/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
github.com/json-iterator/go v1.1.10 h1:Kz6Cvnvv2wGdaG/V8yMvfkmNiXq9Ya2KUv4rouJJr68=
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/mmcdole/gofeed v1.1.3 h1:pdrvMb18jMSLidGp8j0pLvc9IGziX4vbmvVqmLH6z8o=
github.com/mmcdole/gofeed v1.1.3/go.mod h1:QQO3maftbOu+hiVOGOZDRLymqGQCos4zxbA4j89gMrE=
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf h1:sWGE2v+hO0Nd4yFU/S/mDBM5plIU8v/Qhfz41hkDIAI=
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf/go.mod h1:pasqhqstspkosTneA62Nc+2p9SOBBYAPbnmRRWPQ0V8=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 h1:Esafd1046DLDQ0W1YjYsBW+p8U2u7vzgW2SQVmlNazg=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
@ -61,12 +81,16 @@ github.com/robfig/cron v1.2.0 h1:ZjScXvvxeQ63Dbyxy76Fj3AT3Ut0aKsyd2/tl3DTMuQ=
github.com/robfig/cron v1.2.0/go.mod h1:JGuDeoQd7Z6yL4zQhZ3OPEVHB7fL6Ka6skscFHfmt2k=
github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/tmthrgd/go-hex v0.0.0-20190904060850-447a3041c3bc h1:9lRDQMhESg+zvGYmW5DyG0UqvY96Bu5QYsTLvCHdrgo=
github.com/tmthrgd/go-hex v0.0.0-20190904060850-447a3041c3bc/go.mod h1:bciPuU6GHm1iF1pBvUfxfsH0Wmnc2VbpgvbI9ZWuIRs=
github.com/urfave/cli v1.22.3/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
github.com/vmihailenco/bufpool v0.1.11 h1:gOq2WmBrq0i2yW5QJ16ykccQ4wH9UyEsgLm6czKAd94=
github.com/vmihailenco/bufpool v0.1.11/go.mod h1:AFf/MOy3l2CFTKbxwt0mp2MwnqjNEs5H/UxrkA5jxTQ=
github.com/vmihailenco/msgpack/v5 v5.3.4/go.mod h1:7xyJ9e+0+9SaZT0Wt1RGleJXzli6Q/V5KbhBonMG9jc=
@ -76,6 +100,14 @@ github.com/vmihailenco/tagparser v0.1.2 h1:gnjoVuB/kljJ5wICEEOpx98oXMWPLj22G67Vb
github.com/vmihailenco/tagparser v0.1.2/go.mod h1:OeAg3pn3UbLjkWt+rN9oFYB6u/cQgqMEUPoW2WPyhdI=
github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g=
github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds=
github.com/ysmood/goob v0.4.0 h1:HsxXhyLBeGzWXnqVKtmT9qM7EuVs/XOgkX7T6r1o1AQ=
github.com/ysmood/goob v0.4.0/go.mod h1:u6yx7ZhS4Exf2MwciFr6nIM8knHQIE22lFpWHnfql18=
github.com/ysmood/got v0.23.2/go.mod h1:pE1l4LOwOBhQg6A/8IAatkGp7uZjnalzrZolnlhhMgY=
github.com/ysmood/gotrace v0.6.0/go.mod h1:TzhIG7nHDry5//eYZDYcTzuJLYQIkykJzCRIo4/dzQM=
github.com/ysmood/gson v0.7.1 h1:zKL2MTGtynxdBdlZjyGsvEOZ7dkxaY5TH6QhAbTgz0Q=
github.com/ysmood/gson v0.7.1/go.mod h1:3Kzs5zDl21g5F/BlLTNcuAGAYLKt2lV5G8D1zF3RNmg=
github.com/ysmood/leakless v0.7.0 h1:XCGdaPExyoreoQd+H5qgxM3ReNbSPFsEXpSKwbXbwQw=
github.com/ysmood/leakless v0.7.0/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY8q0JvMQ=
golang.org/x/crypto v0.0.0-20180910181607-0e37d006457b/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
@ -86,15 +118,20 @@ golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20201006153459-a7d1128ccaa0/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@ -120,6 +157,7 @@ golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9sn
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=

20
main.go
View File

@ -15,12 +15,14 @@ import (
)
func main() {
var err error
//EnableScheduler()
dc := database.NewDatabaseClient()
err := dc.Diagnosis.Ping()
if err != nil { log.Fatalln(err) }
//dc := database.NewDatabaseClient()
//err := dc.Diagnosis.Ping()
//if err != nil { log.Fatalln(err) }
CheckReddit()
//CheckReddit()
CheckYoutube()
app := chi.NewRouter()
app.Use(middleware.Logger)
@ -41,7 +43,7 @@ func CheckReddit() {
sources, err := dc.Sources.FindBySource("reddit")
if err != nil { log.Println(err) }
rc := services.NewReddit(sources[0].Name, sources[0].ID)
rc := services.NewRedditClient(sources[0].Name, sources[0].ID)
raw, err := rc.GetContent()
if err != nil { log.Println(err) }
@ -54,4 +56,12 @@ func CheckReddit() {
if err != nil { log.Println("Failed to post article.")}
}
}
}
func CheckYoutube() {
// Add call to the db to request youtube sources.
// Loop though the services, and generate the clients.
yt := services.NewYoutubeClient(0, "https://www.youtube.com/user/GameGrumps")
yt.CheckSource()
}

40
services/cache.go Normal file
View File

@ -0,0 +1,40 @@
package services
import (
"errors"
"github.com/jtom38/newsbot/collector/domain/model"
)
type CacheClient struct{}
var (
cacheStorage []*model.CacheItem
ErrCacheRecordMissing = errors.New("unable to find the requested record.")
)
func NewCacheClient() CacheClient {
return CacheClient{}
}
func (cc *CacheClient) Insert(item *model.CacheItem) {
//_, err := cc.Find(item.Key, item.Group)
//if err != nil { }
cacheStorage = append(cacheStorage, item)
}
func (cc *CacheClient) Find(key string, group string) (*model.CacheItem, error) {
//go cc.FindExpiredEntries()
for _, item := range cacheStorage {
if item.Group != group { continue }
if item.Key != key { continue }
return item, nil
}
return &model.CacheItem{}, ErrCacheRecordMissing
}

39
services/cacheMonitor.go Normal file
View File

@ -0,0 +1,39 @@
package services
import (
"time"
"github.com/jtom38/newsbot/collector/domain/model"
)
type CacheMonitor struct {}
func NewCacheMonitorClient() CacheMonitor {
return CacheMonitor{}
}
func (cm *CacheMonitor) Enable() {
}
// This will be fired off each time an cache a
func (cm *CacheMonitor) FindExpiredEntries() {
now := time.Now()
for index, item := range cacheStorage {
res := now.After(item.Expires)
if res {
cm.removeExpiredEntries(index)
}
}
}
// This will create a new slice and add the valid items to it and ignore the one to be removed.
// The existing cacheStorage will be replaced.
func (cc *CacheMonitor) removeExpiredEntries(arrayEntry int) {
var temp []*model.CacheItem
for index, item := range cacheStorage {
if index == arrayEntry { continue }
temp = append(temp, item)
}
cacheStorage = temp
}

69
services/cache_test.go Normal file
View File

@ -0,0 +1,69 @@
package services_test
import (
"testing"
"time"
"github.com/jtom38/newsbot/collector/domain/model"
"github.com/jtom38/newsbot/collector/services"
)
func TestNewCacheClient(t *testing.T) {
_ = services.NewCacheClient()
}
func TestInsert(t *testing.T) {
cache := services.NewCacheClient()
var item *model.CacheItem = &model.CacheItem{
Key: "UnitTesting",
Value: "Something, or nothing",
Group: "Testing",
Expires: time.Now().Add(5 * time.Second),
}
cache.Insert(item)
}
func TestFindGroupMissing(t *testing.T) {
cache := services.NewCacheClient()
_, err := cache.Find("UnitTesting", "Unknown")
if err == nil { panic("Nothing was appended with the requested group.") }
}
func TestFindGroupExists(t *testing.T) {
cache := services.NewCacheClient()
var item *model.CacheItem = &model.CacheItem{
Key: "UnitTesting",
Value: "Something, or nothing",
Group: "Testing",
Expires: time.Now().Add(5 * time.Second),
}
cache.Insert(item)
_, err := cache.Find("UnitTesting", "Testing2")
//t.Log(res)
if err == nil { panic("") }
}
func TestCacheStorage(t *testing.T) {
cc := services.NewCacheClient()
item1 := &model.CacheItem {
Key: "UnitTesting01",
Value: "",
Group: "Testing",
Expires: time.Now().Add(5 * time.Minute),
}
cc.Insert(item1)
item2 := &model.CacheItem {
Key: "UnitTesting02",
Value: "",
Group: "Testing",
Expires: time.Now().Add(5 * time.Minute),
}
cc.Insert(item2)
cache := services.NewCacheClient()
_, err := cache.Find("UnitTesting02", "Testing")
if err != nil { panic("expected to find the value")}
}

View File

@ -3,7 +3,7 @@ package services
import (
"os"
"log"
"github.com/joho/godotenv"
)
@ -13,6 +13,8 @@ const (
REDDIT_PULL_TOP = "REDDIT_PULL_TOP"
REDDIT_PULL_HOT = "REDDIT_PULL_HOT"
REDDIT_PULL_NSFW = "REDDIT_PULL_NSFW"
YOUTUBE_DEBUG = "YOUTUBE_DEBUG"
)
type ConfigClient struct {}
@ -26,7 +28,7 @@ func NewConfigClient() ConfigClient {
return ConfigClient{}
}
func (cc ConfigClient) GetConfig(key string) string {
func (cc *ConfigClient) GetConfig(key string) string {
res, filled := os.LookupEnv(key)
if !filled {
log.Printf("Missing the a value for '%v'. Could generate errors.", key)
@ -35,7 +37,7 @@ func (cc ConfigClient) GetConfig(key string) string {
}
// Use this when your ConfigClient has been opened for awhile and you want to ensure you have the most recent env changes.
func (cc ConfigClient) RefreshEnv() {
func (cc *ConfigClient) RefreshEnv() {
loadEnvFile()
}

View File

@ -14,27 +14,26 @@ type RedditClient struct {
subreddit string
url string
sourceId uint
config RedditConfig
}
var (
PULLTOP string
PULLHOT string
PULLNSFW string
)
func init() {
cc := NewConfigClient()
PULLTOP = cc.GetConfig(REDDIT_PULL_TOP)
PULLHOT = cc.GetConfig(REDDIT_PULL_HOT)
PULLNSFW = cc.GetConfig(REDDIT_PULL_NSFW)
type RedditConfig struct {
PullTop string
PullHot string
PullNSFW string
}
func NewReddit(subreddit string, sourceID uint) RedditClient {
func NewRedditClient(subreddit string, sourceID uint) RedditClient {
rc := RedditClient{
subreddit: subreddit,
url: fmt.Sprintf("https://www.reddit.com/r/%v.json", subreddit),
sourceId: sourceID,
}
cc := NewConfigClient()
rc.config.PullHot = cc.GetConfig(REDDIT_PULL_HOT)
rc.config.PullNSFW = cc.GetConfig(REDDIT_PULL_NSFW)
rc.config.PullTop = cc.GetConfig(REDDIT_PULL_TOP)
return rc
}

14
services/reddit_test.go Normal file
View File

@ -0,0 +1,14 @@
package services_test
import (
"testing"
"github.com/jtom38/newsbot/collector/services"
)
func TestGetContent(t *testing.T) {
rc := services.NewRedditClient("dadjokes", 0)
_, err := rc.GetContent()
if err != nil { panic(err) }
}

240
services/youtube.go Normal file
View File

@ -0,0 +1,240 @@
package services
import (
"errors"
"fmt"
"log"
"net/http"
//"strconv"
"github.com/PuerkitoBio/goquery"
"github.com/go-rod/rod"
"github.com/mmcdole/gofeed"
"github.com/jtom38/newsbot/collector/domain/model"
)
type YoutubeClient struct {
SourceID uint
Url string
ChannelID string
AvatarUri string
Config YoutubeConfig
}
type YoutubeConfig struct {
Debug bool
}
var (
// This is a local slice to store what URI's have been seen to remove extra calls to the DB
YoutubeUriCache []*string
ErrThumbnailMissing = errors.New("unable to find the video thumbnail on a youtube video")
ErrTagsMissing = errors.New("unable to find the tags on the video")
ErrAvatarMissing = errors.New("unable to find the avatar image on the page")
ErrChannelIdMissing = errors.New("unable to find the channelId on the requested page")
)
const YOUTUBE_FEED_URL string = "https://www.youtube.com/feeds/videos.xml?channel_id="
func NewYoutubeClient(SourceID uint, Url string) YoutubeClient {
yc := YoutubeClient{
SourceID: SourceID,
Url: Url,
}
/*
cc := NewConfigClient()
debug, err := strconv.ParseBool(cc.GetConfig(YOUTUBE_DEBUG))
if err != nil { panic("'YOUTUBE_DEBUG' was not a bool value")}
yc.Config.Debug = debug
*/
return yc
}
// CheckSource will go and run all the commands needed to process a source.
func (yc *YoutubeClient) CheckSource() error {
docParser, err := yc.GetParser(yc.Url)
if err != nil { return err }
// Check cache/db for existing value
// If we have the value, skip
channelId, err := yc.GetChannelId(docParser)
if err != nil { return err }
if channelId == "" { return ErrChannelIdMissing }
yc.ChannelID = channelId
// Check the cache/db forthe value.
// if we have the value, skip
avatar, err := yc.GetAvatarUri()
if err != nil { return err }
if avatar == "" { return ErrAvatarMissing }
yc.AvatarUri = avatar
feed, err := yc.PullFeed()
if err != nil { return err }
newPosts, err := yc.CheckForNewPosts(feed)
if err != nil { return err }
//TODO post to the API
for _, item := range newPosts {
article := yc.ConvertToArticle(item)
YoutubeUriCache = append(YoutubeUriCache, &item.Link)
// Add the post to local cache
log.Println(article)
}
return nil
}
func (yc *YoutubeClient) GetParser(uri string) (*goquery.Document, error) {
html, err := http.Get(uri)
if err != nil {
log.Println(err)
}
defer html.Body.Close()
doc, err := goquery.NewDocumentFromReader(html.Body)
if err != nil {
return nil, err
}
return doc, nil
}
// This pulls the youtube page and finds the ChannelID.
// This value is required to generate the RSS feed URI
func (yc *YoutubeClient) GetChannelId(doc *goquery.Document) (string, error) {
meta := doc.Find("meta")
for _, item := range meta.Nodes {
if item.Attr[0].Val == "channelId" {
yc.ChannelID = item.Attr[1].Val
return yc.ChannelID, nil
}
}
return "", ErrChannelIdMissing
}
// This will parse the page to find the current Avatar of the channel.
func (yc *YoutubeClient) GetAvatarUri() (string, error) {
var AvatarUri string
browser := rod.New().MustConnect()
page := browser.MustPage(yc.Url)
res := page.MustElement("#channel-header-container > yt-img-shadow:nth-child(1) > img:nth-child(1)").MustAttribute("src")
if *res == "" || res == nil {
return AvatarUri, ErrAvatarMissing
}
AvatarUri = *res
defer browser.Close()
defer page.Close()
return AvatarUri, nil
}
// This will parse and look for the tags that has been defined by the user.
func (yc *YoutubeClient) GetTags(parser *goquery.Document) (string, error) {
meta := parser.Find("meta")
for _, item := range meta.Nodes {
if item.Attr[0].Val == "keywords" {
res := item.Attr[1].Val
return res, nil
}
}
return "", ErrTagsMissing
}
func (yc *YoutubeClient) GetVideoThumbnail(parser *goquery.Document) (string, error) {
meta := parser.Find("meta")
for _, item := range meta.Nodes {
if item.Attr[0].Val == "og:image" {
res := item.Attr[1].Val
return res, nil
}
}
return "", ErrThumbnailMissing
}
// This will pull the RSS feed items and return the results
func (yc *YoutubeClient) PullFeed() (*gofeed.Feed, error) {
feedUri := fmt.Sprintf("%v%v", YOUTUBE_FEED_URL, yc.ChannelID)
fp := gofeed.NewParser()
feed, err := fp.ParseURL(feedUri)
if err != nil {
return nil, err
}
return feed, nil
}
// CheckForNewPosts will talk to the Database to see if it has a record for the posts that have been extracted.
// If the post does not exist, it will be added.
func (yc *YoutubeClient) CheckForNewPosts(feed *gofeed.Feed) ([]*gofeed.Item, error) {
var newPosts []*gofeed.Item
for _, item := range feed.Items {
// Check the cache/db to see if this URI has been seen already
uriExists := yc.CheckUriCache(&item.Link)
if uriExists { continue }
//TODO Check the DB if the cache is not aware
//TODO If the db knew about it, append it to the local cache
// if its new, append it.
newPosts = append(newPosts, item)
}
return newPosts, nil
}
func (yc *YoutubeClient) CheckUriCache(uri *string) bool {
for _, item := range YoutubeUriCache {
if item == uri {
return true
}
}
return false
}
func (yc *YoutubeClient) ConvertToArticle(item *gofeed.Item) model.Articles {
parser, err := yc.GetParser(item.Link)
if err != nil {
log.Printf("Unable to process %v, submit this link as an issue.\n", item.Link)
}
tags, err := yc.GetTags(parser)
if err != nil {
msg := fmt.Sprintf("%v. %v", ErrTagsMissing, item.Link)
log.Println(msg)
}
thumb, err := yc.GetVideoThumbnail(parser)
if err != nil {
msg := fmt.Sprintf("%v. %v", ErrThumbnailMissing, item.Link)
log.Println(msg)
}
var article = model.Articles{
SourceID: yc.SourceID,
Tags: tags,
Title: item.Title,
Url: item.Link,
PubDate: *item.PublishedParsed,
Thumbnail: thumb,
Description: item.Description,
AuthorName: item.Author.Name,
AuthorImage: yc.AvatarUri,
}
return article
}

135
services/youtube_test.go Normal file
View File

@ -0,0 +1,135 @@
package services_test
import (
"testing"
"github.com/jtom38/newsbot/collector/services"
)
func TestGetPageParser(t *testing.T) {
yc := services.NewYoutubeClient(
0,
"https://youtube.com/gamegrumps",
)
_, err := yc.GetParser(yc.Url)
if err != nil { panic(err) }
}
func TestGetChannelId(t *testing.T) {
yc := services.NewYoutubeClient(
0,
"https://youtube.com/gamegrumps",
)
parser, err := yc.GetParser(yc.Url)
if err != nil { panic(err) }
_, err = yc.GetChannelId(parser)
if err != nil { panic(err) }
}
func TestPullFeed(t *testing.T) {
yc := services.NewYoutubeClient(
0,
"https://youtube.com/gamegrumps",
)
parser, err := yc.GetParser(yc.Url)
if err != nil { panic(err) }
_, err = yc.GetChannelId(parser)
if err != nil { panic(err) }
_, err = yc.PullFeed()
if err != nil { panic(err) }
}
func TestGetAvatarUri(t *testing.T) {
yc := services.NewYoutubeClient(
0,
"https://youtube.com/gamegrumps",
)
res, err := yc.GetAvatarUri()
if err != nil { panic(err) }
if res == "" { panic(services.ErrAvatarMissing)}
}
func TestGetVideoTags(t *testing.T) {
yc := services.NewYoutubeClient(
0,
"https://youtube.com/gamegrumps",
)
var videoUri = "https://www.youtube.com/watch?v=k_sQEXOBe68"
parser, err := yc.GetParser(videoUri)
if err != nil { panic(err) }
tags, err := yc.GetTags(parser)
if err == nil && tags == "" { panic("err was empty but value was missing.")}
if err != nil { panic(err) }
}
func TestGetChannelTags(t *testing.T) {
yc := services.NewYoutubeClient(
0,
"https://youtube.com/gamegrumps",
)
parser, err := yc.GetParser(yc.Url)
if err != nil { panic(err) }
tags, err := yc.GetTags(parser)
if err == nil && tags == "" { panic("no err but expected value was missing.")}
if err != nil { panic(err) }
}
func TestConvertToArticle(t *testing.T) {
}
func TestGetVideoThumbnail(t *testing.T) {
yc := services.NewYoutubeClient(
0,
"https://youtube.com/gamegrumps",
)
parser, err := yc.GetParser("https://www.youtube.com/watch?v=k_sQEXOBe68")
if err != nil {panic(err) }
thumb, err := yc.GetVideoThumbnail(parser)
if err == nil && thumb == "" { panic("no err but expected result was missing")}
if err != nil { panic(err) }
}
func TestCheckSource(t *testing.T) {
yc := services.NewYoutubeClient(
0,
"https://youtube.com/gamegrumps",
)
err := yc.CheckSource()
if err != nil { panic(err) }
}
func TestCheckUriCache(t *testing.T) {
yc := services.NewYoutubeClient(
0,
"https://youtube.com/gamegrumps",
)
item := "demo"
services.YoutubeUriCache = append(services.YoutubeUriCache, &item)
res := yc.CheckUriCache(&item)
if res == false { panic("expected a value to come back")}
}
func TestCheckUriCacheFails(t *testing.T) {
yc := services.NewYoutubeClient(
0,
"https://youtube.com/gamegrumps",
)
item := "demo1"
res := yc.CheckUriCache(&item)
if res == true { panic("expected no value to come back")}
}