From eba63c27ef2b85a15778d8f9803528e117f4c38c Mon Sep 17 00:00:00 2001 From: James Tombleson Date: Sun, 17 Apr 2022 07:25:49 -0700 Subject: [PATCH] Youtube source is now working (#5) * added extra packages to help with parsing * getting the core built for Youtube collection. The feed can be pulled and starting to build the article object * added some tests, reddit will need more love but youtube is starting off with more tests. Starting to add Rod to pull missing values from the site * Added rod to work with browser automation * Moved the config inside the client as they can change within runtime and need to be refreshed on each client creation * added more features to collect data per video and tests to support them. * adding the cache and tests * moved errors to var values at the top * youtube is now getting collected and tests have been setup * resolved an issue with the config * setting up the first interface, its not used yet * more updates to the cache service. Not finished yet and could see rework * added logic to monitor Youtube. Still basic logic that needs to be wired up to the database --- database/common.go | 3 +- domain/interfaces/source.go | 10 ++ domain/model/cache.go | 15 +++ go.mod | 3 + go.sum | 38 ++++++ main.go | 20 ++- services/cache.go | 40 ++++++ services/cacheMonitor.go | 39 ++++++ services/cache_test.go | 69 +++++++++++ services/config.go | 8 +- services/reddit.go | 23 ++-- services/reddit_test.go | 14 +++ services/youtube.go | 240 ++++++++++++++++++++++++++++++++++++ services/youtube_test.go | 135 ++++++++++++++++++++ 14 files changed, 636 insertions(+), 21 deletions(-) create mode 100644 domain/interfaces/source.go create mode 100644 domain/model/cache.go create mode 100644 services/cache.go create mode 100644 services/cacheMonitor.go create mode 100644 services/cache_test.go create mode 100644 services/reddit_test.go create mode 100644 services/youtube.go create mode 100644 services/youtube_test.go diff --git a/database/common.go b/database/common.go index 6194be1..7f555ce 100644 --- a/database/common.go +++ b/database/common.go @@ -18,7 +18,8 @@ type DatabaseClient struct { // This will generate a new client to interface with the API Database. func NewDatabaseClient() DatabaseClient { - var dbUri = services.NewConfigClient().GetConfig(services.DB_URI) + cc := services.NewConfigClient() + dbUri := cc.GetConfig(services.DB_URI) var client = DatabaseClient{} client.Diagnosis.rootUri = dbUri diff --git a/domain/interfaces/source.go b/domain/interfaces/source.go new file mode 100644 index 0000000..db51f86 --- /dev/null +++ b/domain/interfaces/source.go @@ -0,0 +1,10 @@ +package interfaces + +import ( + "github.com/mmcdole/gofeed" +) + +type Sources interface { + CheckSource() error + PullFeed() (*gofeed.Feed, error) +} \ No newline at end of file diff --git a/domain/model/cache.go b/domain/model/cache.go new file mode 100644 index 0000000..72f055d --- /dev/null +++ b/domain/model/cache.go @@ -0,0 +1,15 @@ +package model + +import ( + "time" +) + +type CacheItem struct { + Key string + Value string + + // Group defines what it should be a reference to. + // youtube, reddit, ect + Group string + Expires time.Time +} \ No newline at end of file diff --git a/go.mod b/go.mod index d18423e..298cf0b 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,10 @@ module github.com/jtom38/newsbot/collector go 1.16 require ( + github.com/PuerkitoBio/goquery v1.8.0 // indirect github.com/go-chi/chi/v5 v5.0.7 // indirect + github.com/go-rod/rod v0.105.1 // indirect github.com/joho/godotenv v1.4.0 // indirect + github.com/mmcdole/gofeed v1.1.3 // indirect github.com/robfig/cron/v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 00c4e81..02c5d33 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,14 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= +github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U= +github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI= +github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= +github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= +github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/czasg/go-queue v0.0.0-20211206021309-e3b3e4c4ae3b/go.mod h1:Myb/1g8zHhdc13TwwkSuJ5wShsub1yoPrT+ta8isZQ4= github.com/czasg/go-queue v0.0.0-20211206102528-0d03e5c8ace8 h1:N1zVvx6PqlP5W99D4wmvEoEf9z7dczVICL/oW/g66vc= github.com/czasg/go-queue v0.0.0-20211206102528-0d03e5c8ace8/go.mod h1:Myb/1g8zHhdc13TwwkSuJ5wShsub1yoPrT+ta8isZQ4= @@ -21,6 +28,8 @@ github.com/go-pg/pg/v10 v10.10.6 h1:1vNtPZ4Z9dWUw/TjJwOfFUbF5nEq1IkR6yG8Mq/Iwso= github.com/go-pg/pg/v10 v10.10.6/go.mod h1:GLmFXufrElQHf5uzM3BQlcfwV3nsgnHue5uzjQ6Nqxg= github.com/go-pg/zerochecker v0.2.0 h1:pp7f72c3DobMWOb2ErtZsnrPaSvHd2W4o9//8HtF4mU= github.com/go-pg/zerochecker v0.2.0/go.mod h1:NJZ4wKL0NmTtz0GKCoJ8kym6Xn/EQzXRl2OnAe7MmDo= +github.com/go-rod/rod v0.105.1 h1:r0bNmO9siOe13lG6Vbkaak11u48rYmWGl/Hk4MJdOiE= +github.com/go-rod/rod v0.105.1/go.mod h1:Wrnn6HokFHskwaIVke3ML1y/NBVp7XPIeB8eDzR9vuw= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -39,14 +48,25 @@ github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E= github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc= github.com/joho/godotenv v1.4.0 h1:3l4+N6zfMWnkbPEXKng2o2/MR5mSwTrBih4ZEkkz1lg= github.com/joho/godotenv v1.4.0/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= +github.com/json-iterator/go v1.1.10 h1:Kz6Cvnvv2wGdaG/V8yMvfkmNiXq9Ya2KUv4rouJJr68= +github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/mmcdole/gofeed v1.1.3 h1:pdrvMb18jMSLidGp8j0pLvc9IGziX4vbmvVqmLH6z8o= +github.com/mmcdole/gofeed v1.1.3/go.mod h1:QQO3maftbOu+hiVOGOZDRLymqGQCos4zxbA4j89gMrE= +github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf h1:sWGE2v+hO0Nd4yFU/S/mDBM5plIU8v/Qhfz41hkDIAI= +github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf/go.mod h1:pasqhqstspkosTneA62Nc+2p9SOBBYAPbnmRRWPQ0V8= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 h1:Esafd1046DLDQ0W1YjYsBW+p8U2u7vzgW2SQVmlNazg= +github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= @@ -61,12 +81,16 @@ github.com/robfig/cron v1.2.0 h1:ZjScXvvxeQ63Dbyxy76Fj3AT3Ut0aKsyd2/tl3DTMuQ= github.com/robfig/cron v1.2.0/go.mod h1:JGuDeoQd7Z6yL4zQhZ3OPEVHB7fL6Ka6skscFHfmt2k= github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs= github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro= +github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/tmthrgd/go-hex v0.0.0-20190904060850-447a3041c3bc h1:9lRDQMhESg+zvGYmW5DyG0UqvY96Bu5QYsTLvCHdrgo= github.com/tmthrgd/go-hex v0.0.0-20190904060850-447a3041c3bc/go.mod h1:bciPuU6GHm1iF1pBvUfxfsH0Wmnc2VbpgvbI9ZWuIRs= +github.com/urfave/cli v1.22.3/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= github.com/vmihailenco/bufpool v0.1.11 h1:gOq2WmBrq0i2yW5QJ16ykccQ4wH9UyEsgLm6czKAd94= github.com/vmihailenco/bufpool v0.1.11/go.mod h1:AFf/MOy3l2CFTKbxwt0mp2MwnqjNEs5H/UxrkA5jxTQ= github.com/vmihailenco/msgpack/v5 v5.3.4/go.mod h1:7xyJ9e+0+9SaZT0Wt1RGleJXzli6Q/V5KbhBonMG9jc= @@ -76,6 +100,14 @@ github.com/vmihailenco/tagparser v0.1.2 h1:gnjoVuB/kljJ5wICEEOpx98oXMWPLj22G67Vb github.com/vmihailenco/tagparser v0.1.2/go.mod h1:OeAg3pn3UbLjkWt+rN9oFYB6u/cQgqMEUPoW2WPyhdI= github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g= github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds= +github.com/ysmood/goob v0.4.0 h1:HsxXhyLBeGzWXnqVKtmT9qM7EuVs/XOgkX7T6r1o1AQ= +github.com/ysmood/goob v0.4.0/go.mod h1:u6yx7ZhS4Exf2MwciFr6nIM8knHQIE22lFpWHnfql18= +github.com/ysmood/got v0.23.2/go.mod h1:pE1l4LOwOBhQg6A/8IAatkGp7uZjnalzrZolnlhhMgY= +github.com/ysmood/gotrace v0.6.0/go.mod h1:TzhIG7nHDry5//eYZDYcTzuJLYQIkykJzCRIo4/dzQM= +github.com/ysmood/gson v0.7.1 h1:zKL2MTGtynxdBdlZjyGsvEOZ7dkxaY5TH6QhAbTgz0Q= +github.com/ysmood/gson v0.7.1/go.mod h1:3Kzs5zDl21g5F/BlLTNcuAGAYLKt2lV5G8D1zF3RNmg= +github.com/ysmood/leakless v0.7.0 h1:XCGdaPExyoreoQd+H5qgxM3ReNbSPFsEXpSKwbXbwQw= +github.com/ysmood/leakless v0.7.0/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY8q0JvMQ= golang.org/x/crypto v0.0.0-20180910181607-0e37d006457b/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= @@ -86,15 +118,20 @@ golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20201006153459-a7d1128ccaa0/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk= +golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -120,6 +157,7 @@ golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9sn golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/main.go b/main.go index faeed8d..e3039e3 100644 --- a/main.go +++ b/main.go @@ -15,12 +15,14 @@ import ( ) func main() { + var err error //EnableScheduler() - dc := database.NewDatabaseClient() - err := dc.Diagnosis.Ping() - if err != nil { log.Fatalln(err) } + //dc := database.NewDatabaseClient() + //err := dc.Diagnosis.Ping() + //if err != nil { log.Fatalln(err) } - CheckReddit() + //CheckReddit() + CheckYoutube() app := chi.NewRouter() app.Use(middleware.Logger) @@ -41,7 +43,7 @@ func CheckReddit() { sources, err := dc.Sources.FindBySource("reddit") if err != nil { log.Println(err) } - rc := services.NewReddit(sources[0].Name, sources[0].ID) + rc := services.NewRedditClient(sources[0].Name, sources[0].ID) raw, err := rc.GetContent() if err != nil { log.Println(err) } @@ -54,4 +56,12 @@ func CheckReddit() { if err != nil { log.Println("Failed to post article.")} } } +} + +func CheckYoutube() { + // Add call to the db to request youtube sources. + + // Loop though the services, and generate the clients. + yt := services.NewYoutubeClient(0, "https://www.youtube.com/user/GameGrumps") + yt.CheckSource() } \ No newline at end of file diff --git a/services/cache.go b/services/cache.go new file mode 100644 index 0000000..04dfb1c --- /dev/null +++ b/services/cache.go @@ -0,0 +1,40 @@ +package services + +import ( + "errors" + + "github.com/jtom38/newsbot/collector/domain/model" +) + +type CacheClient struct{} + +var ( + cacheStorage []*model.CacheItem + + ErrCacheRecordMissing = errors.New("unable to find the requested record.") +) + + +func NewCacheClient() CacheClient { + return CacheClient{} +} + +func (cc *CacheClient) Insert(item *model.CacheItem) { + //_, err := cc.Find(item.Key, item.Group) + //if err != nil { } + cacheStorage = append(cacheStorage, item) +} + +func (cc *CacheClient) Find(key string, group string) (*model.CacheItem, error) { + //go cc.FindExpiredEntries() + + for _, item := range cacheStorage { + if item.Group != group { continue } + + if item.Key != key { continue } + + return item, nil + } + + return &model.CacheItem{}, ErrCacheRecordMissing +} \ No newline at end of file diff --git a/services/cacheMonitor.go b/services/cacheMonitor.go new file mode 100644 index 0000000..166eb96 --- /dev/null +++ b/services/cacheMonitor.go @@ -0,0 +1,39 @@ +package services + +import ( + "time" + + "github.com/jtom38/newsbot/collector/domain/model" +) + +type CacheMonitor struct {} + +func NewCacheMonitorClient() CacheMonitor { + return CacheMonitor{} +} + +func (cm *CacheMonitor) Enable() { + +} + +// This will be fired off each time an cache a +func (cm *CacheMonitor) FindExpiredEntries() { + now := time.Now() + for index, item := range cacheStorage { + res := now.After(item.Expires) + if res { + cm.removeExpiredEntries(index) + } + } +} + +// This will create a new slice and add the valid items to it and ignore the one to be removed. +// The existing cacheStorage will be replaced. +func (cc *CacheMonitor) removeExpiredEntries(arrayEntry int) { + var temp []*model.CacheItem + for index, item := range cacheStorage { + if index == arrayEntry { continue } + temp = append(temp, item) + } + cacheStorage = temp +} \ No newline at end of file diff --git a/services/cache_test.go b/services/cache_test.go new file mode 100644 index 0000000..4a0243b --- /dev/null +++ b/services/cache_test.go @@ -0,0 +1,69 @@ +package services_test + +import ( + "testing" + "time" + + "github.com/jtom38/newsbot/collector/domain/model" + "github.com/jtom38/newsbot/collector/services" +) + +func TestNewCacheClient(t *testing.T) { + _ = services.NewCacheClient() +} + +func TestInsert(t *testing.T) { + cache := services.NewCacheClient() + var item *model.CacheItem = &model.CacheItem{ + Key: "UnitTesting", + Value: "Something, or nothing", + Group: "Testing", + Expires: time.Now().Add(5 * time.Second), + } + cache.Insert(item) +} + +func TestFindGroupMissing(t *testing.T) { + cache := services.NewCacheClient() + _, err := cache.Find("UnitTesting", "Unknown") + if err == nil { panic("Nothing was appended with the requested group.") } +} + +func TestFindGroupExists(t *testing.T) { + cache := services.NewCacheClient() + var item *model.CacheItem = &model.CacheItem{ + Key: "UnitTesting", + Value: "Something, or nothing", + Group: "Testing", + Expires: time.Now().Add(5 * time.Second), + } + cache.Insert(item) + _, err := cache.Find("UnitTesting", "Testing2") + //t.Log(res) + if err == nil { panic("") } +} + + +func TestCacheStorage(t *testing.T) { + cc := services.NewCacheClient() + + item1 := &model.CacheItem { + Key: "UnitTesting01", + Value: "", + Group: "Testing", + Expires: time.Now().Add(5 * time.Minute), + } + cc.Insert(item1) + + item2 := &model.CacheItem { + Key: "UnitTesting02", + Value: "", + Group: "Testing", + Expires: time.Now().Add(5 * time.Minute), + } + cc.Insert(item2) + + cache := services.NewCacheClient() + _, err := cache.Find("UnitTesting02", "Testing") + if err != nil { panic("expected to find the value")} +} \ No newline at end of file diff --git a/services/config.go b/services/config.go index 5681a50..680ade5 100644 --- a/services/config.go +++ b/services/config.go @@ -3,7 +3,7 @@ package services import ( "os" "log" - + "github.com/joho/godotenv" ) @@ -13,6 +13,8 @@ const ( REDDIT_PULL_TOP = "REDDIT_PULL_TOP" REDDIT_PULL_HOT = "REDDIT_PULL_HOT" REDDIT_PULL_NSFW = "REDDIT_PULL_NSFW" + + YOUTUBE_DEBUG = "YOUTUBE_DEBUG" ) type ConfigClient struct {} @@ -26,7 +28,7 @@ func NewConfigClient() ConfigClient { return ConfigClient{} } -func (cc ConfigClient) GetConfig(key string) string { +func (cc *ConfigClient) GetConfig(key string) string { res, filled := os.LookupEnv(key) if !filled { log.Printf("Missing the a value for '%v'. Could generate errors.", key) @@ -35,7 +37,7 @@ func (cc ConfigClient) GetConfig(key string) string { } // Use this when your ConfigClient has been opened for awhile and you want to ensure you have the most recent env changes. -func (cc ConfigClient) RefreshEnv() { +func (cc *ConfigClient) RefreshEnv() { loadEnvFile() } diff --git a/services/reddit.go b/services/reddit.go index 6eeb8a2..0a9bb27 100644 --- a/services/reddit.go +++ b/services/reddit.go @@ -14,27 +14,26 @@ type RedditClient struct { subreddit string url string sourceId uint + config RedditConfig } -var ( - PULLTOP string - PULLHOT string - PULLNSFW string -) - -func init() { - cc := NewConfigClient() - PULLTOP = cc.GetConfig(REDDIT_PULL_TOP) - PULLHOT = cc.GetConfig(REDDIT_PULL_HOT) - PULLNSFW = cc.GetConfig(REDDIT_PULL_NSFW) +type RedditConfig struct { + PullTop string + PullHot string + PullNSFW string } -func NewReddit(subreddit string, sourceID uint) RedditClient { +func NewRedditClient(subreddit string, sourceID uint) RedditClient { rc := RedditClient{ subreddit: subreddit, url: fmt.Sprintf("https://www.reddit.com/r/%v.json", subreddit), sourceId: sourceID, } + cc := NewConfigClient() + rc.config.PullHot = cc.GetConfig(REDDIT_PULL_HOT) + rc.config.PullNSFW = cc.GetConfig(REDDIT_PULL_NSFW) + rc.config.PullTop = cc.GetConfig(REDDIT_PULL_TOP) + return rc } diff --git a/services/reddit_test.go b/services/reddit_test.go new file mode 100644 index 0000000..1887893 --- /dev/null +++ b/services/reddit_test.go @@ -0,0 +1,14 @@ +package services_test + +import ( + "testing" + + "github.com/jtom38/newsbot/collector/services" +) + +func TestGetContent(t *testing.T) { + rc := services.NewRedditClient("dadjokes", 0) + _, err := rc.GetContent() + + if err != nil { panic(err) } +} \ No newline at end of file diff --git a/services/youtube.go b/services/youtube.go new file mode 100644 index 0000000..1ac726a --- /dev/null +++ b/services/youtube.go @@ -0,0 +1,240 @@ +package services + +import ( + "errors" + "fmt" + "log" + "net/http" + //"strconv" + + "github.com/PuerkitoBio/goquery" + "github.com/go-rod/rod" + "github.com/mmcdole/gofeed" + + "github.com/jtom38/newsbot/collector/domain/model" +) + +type YoutubeClient struct { + SourceID uint + Url string + ChannelID string + AvatarUri string + Config YoutubeConfig +} + +type YoutubeConfig struct { + Debug bool +} + +var ( + // This is a local slice to store what URI's have been seen to remove extra calls to the DB + YoutubeUriCache []*string + + ErrThumbnailMissing = errors.New("unable to find the video thumbnail on a youtube video") + ErrTagsMissing = errors.New("unable to find the tags on the video") + ErrAvatarMissing = errors.New("unable to find the avatar image on the page") + ErrChannelIdMissing = errors.New("unable to find the channelId on the requested page") +) + +const YOUTUBE_FEED_URL string = "https://www.youtube.com/feeds/videos.xml?channel_id=" + +func NewYoutubeClient(SourceID uint, Url string) YoutubeClient { + yc := YoutubeClient{ + SourceID: SourceID, + Url: Url, + } + /* + cc := NewConfigClient() + + debug, err := strconv.ParseBool(cc.GetConfig(YOUTUBE_DEBUG)) + if err != nil { panic("'YOUTUBE_DEBUG' was not a bool value")} + yc.Config.Debug = debug + */ + return yc +} + +// CheckSource will go and run all the commands needed to process a source. +func (yc *YoutubeClient) CheckSource() error { + docParser, err := yc.GetParser(yc.Url) + if err != nil { return err } + + // Check cache/db for existing value + // If we have the value, skip + channelId, err := yc.GetChannelId(docParser) + if err != nil { return err } + if channelId == "" { return ErrChannelIdMissing } + yc.ChannelID = channelId + + // Check the cache/db forthe value. + // if we have the value, skip + avatar, err := yc.GetAvatarUri() + if err != nil { return err } + if avatar == "" { return ErrAvatarMissing } + yc.AvatarUri = avatar + + feed, err := yc.PullFeed() + if err != nil { return err } + + newPosts, err := yc.CheckForNewPosts(feed) + if err != nil { return err } + + //TODO post to the API + for _, item := range newPosts { + + article := yc.ConvertToArticle(item) + + YoutubeUriCache = append(YoutubeUriCache, &item.Link) + + // Add the post to local cache + log.Println(article) + } + + return nil +} + +func (yc *YoutubeClient) GetParser(uri string) (*goquery.Document, error) { + html, err := http.Get(uri) + if err != nil { + log.Println(err) + } + defer html.Body.Close() + + doc, err := goquery.NewDocumentFromReader(html.Body) + if err != nil { + return nil, err + } + return doc, nil +} + +// This pulls the youtube page and finds the ChannelID. +// This value is required to generate the RSS feed URI +func (yc *YoutubeClient) GetChannelId(doc *goquery.Document) (string, error) { + meta := doc.Find("meta") + for _, item := range meta.Nodes { + + if item.Attr[0].Val == "channelId" { + yc.ChannelID = item.Attr[1].Val + return yc.ChannelID, nil + } + } + return "", ErrChannelIdMissing +} + +// This will parse the page to find the current Avatar of the channel. +func (yc *YoutubeClient) GetAvatarUri() (string, error) { + var AvatarUri string + + browser := rod.New().MustConnect() + page := browser.MustPage(yc.Url) + + res := page.MustElement("#channel-header-container > yt-img-shadow:nth-child(1) > img:nth-child(1)").MustAttribute("src") + + if *res == "" || res == nil { + return AvatarUri, ErrAvatarMissing + } + + AvatarUri = *res + + defer browser.Close() + defer page.Close() + return AvatarUri, nil +} + +// This will parse and look for the tags that has been defined by the user. +func (yc *YoutubeClient) GetTags(parser *goquery.Document) (string, error) { + meta := parser.Find("meta") + + for _, item := range meta.Nodes { + if item.Attr[0].Val == "keywords" { + res := item.Attr[1].Val + return res, nil + } + } + return "", ErrTagsMissing +} + +func (yc *YoutubeClient) GetVideoThumbnail(parser *goquery.Document) (string, error) { + meta := parser.Find("meta") + + for _, item := range meta.Nodes { + if item.Attr[0].Val == "og:image" { + res := item.Attr[1].Val + return res, nil + } + } + return "", ErrThumbnailMissing +} + +// This will pull the RSS feed items and return the results +func (yc *YoutubeClient) PullFeed() (*gofeed.Feed, error) { + feedUri := fmt.Sprintf("%v%v", YOUTUBE_FEED_URL, yc.ChannelID) + fp := gofeed.NewParser() + feed, err := fp.ParseURL(feedUri) + if err != nil { + return nil, err + } + + return feed, nil +} + +// CheckForNewPosts will talk to the Database to see if it has a record for the posts that have been extracted. +// If the post does not exist, it will be added. +func (yc *YoutubeClient) CheckForNewPosts(feed *gofeed.Feed) ([]*gofeed.Item, error) { + var newPosts []*gofeed.Item + for _, item := range feed.Items { + + // Check the cache/db to see if this URI has been seen already + uriExists := yc.CheckUriCache(&item.Link) + if uriExists { continue } + + //TODO Check the DB if the cache is not aware + //TODO If the db knew about it, append it to the local cache + + // if its new, append it. + newPosts = append(newPosts, item) + } + + return newPosts, nil +} + +func (yc *YoutubeClient) CheckUriCache(uri *string) bool { + for _, item := range YoutubeUriCache { + if item == uri { + return true + } + } + + return false +} + +func (yc *YoutubeClient) ConvertToArticle(item *gofeed.Item) model.Articles { + parser, err := yc.GetParser(item.Link) + if err != nil { + log.Printf("Unable to process %v, submit this link as an issue.\n", item.Link) + } + + tags, err := yc.GetTags(parser) + if err != nil { + msg := fmt.Sprintf("%v. %v", ErrTagsMissing, item.Link) + log.Println(msg) + } + + thumb, err := yc.GetVideoThumbnail(parser) + if err != nil { + msg := fmt.Sprintf("%v. %v", ErrThumbnailMissing, item.Link) + log.Println(msg) + } + + var article = model.Articles{ + SourceID: yc.SourceID, + Tags: tags, + Title: item.Title, + Url: item.Link, + PubDate: *item.PublishedParsed, + Thumbnail: thumb, + Description: item.Description, + AuthorName: item.Author.Name, + AuthorImage: yc.AvatarUri, + } + return article +} diff --git a/services/youtube_test.go b/services/youtube_test.go new file mode 100644 index 0000000..5c2cc68 --- /dev/null +++ b/services/youtube_test.go @@ -0,0 +1,135 @@ +package services_test + +import ( + "testing" + + "github.com/jtom38/newsbot/collector/services" +) + +func TestGetPageParser(t *testing.T) { + yc := services.NewYoutubeClient( + 0, + "https://youtube.com/gamegrumps", + ) + _, err := yc.GetParser(yc.Url) + if err != nil { panic(err) } +} + +func TestGetChannelId(t *testing.T) { + yc := services.NewYoutubeClient( + 0, + "https://youtube.com/gamegrumps", + ) + parser, err := yc.GetParser(yc.Url) + if err != nil { panic(err) } + + _, err = yc.GetChannelId(parser) + if err != nil { panic(err) } +} + +func TestPullFeed(t *testing.T) { + yc := services.NewYoutubeClient( + 0, + "https://youtube.com/gamegrumps", + ) + parser, err := yc.GetParser(yc.Url) + if err != nil { panic(err) } + + _, err = yc.GetChannelId(parser) + if err != nil { panic(err) } + + _, err = yc.PullFeed() + if err != nil { panic(err) } +} + +func TestGetAvatarUri(t *testing.T) { + yc := services.NewYoutubeClient( + 0, + "https://youtube.com/gamegrumps", + ) + res, err := yc.GetAvatarUri() + if err != nil { panic(err) } + if res == "" { panic(services.ErrAvatarMissing)} +} + +func TestGetVideoTags(t *testing.T) { + yc := services.NewYoutubeClient( + 0, + "https://youtube.com/gamegrumps", + ) + + var videoUri = "https://www.youtube.com/watch?v=k_sQEXOBe68" + + parser, err := yc.GetParser(videoUri) + if err != nil { panic(err) } + + tags, err := yc.GetTags(parser) + if err == nil && tags == "" { panic("err was empty but value was missing.")} + if err != nil { panic(err) } +} + +func TestGetChannelTags(t *testing.T) { + yc := services.NewYoutubeClient( + 0, + "https://youtube.com/gamegrumps", + ) + + parser, err := yc.GetParser(yc.Url) + if err != nil { panic(err) } + + tags, err := yc.GetTags(parser) + if err == nil && tags == "" { panic("no err but expected value was missing.")} + if err != nil { panic(err) } +} + +func TestConvertToArticle(t *testing.T) { + +} + +func TestGetVideoThumbnail(t *testing.T) { + yc := services.NewYoutubeClient( + 0, + "https://youtube.com/gamegrumps", + ) + parser, err := yc.GetParser("https://www.youtube.com/watch?v=k_sQEXOBe68") + if err != nil {panic(err) } + + thumb, err := yc.GetVideoThumbnail(parser) + if err == nil && thumb == "" { panic("no err but expected result was missing")} + if err != nil { panic(err) } + +} + +func TestCheckSource(t *testing.T) { + yc := services.NewYoutubeClient( + 0, + "https://youtube.com/gamegrumps", + ) + err := yc.CheckSource() + if err != nil { panic(err) } + +} + +func TestCheckUriCache(t *testing.T) { + yc := services.NewYoutubeClient( + 0, + "https://youtube.com/gamegrumps", + ) + item := "demo" + + services.YoutubeUriCache = append(services.YoutubeUriCache, &item) + res := yc.CheckUriCache(&item) + if res == false { panic("expected a value to come back")} +} + +func TestCheckUriCacheFails(t *testing.T) { + yc := services.NewYoutubeClient( + 0, + "https://youtube.com/gamegrumps", + ) + item := "demo1" + + res := yc.CheckUriCache(&item) + if res == true { panic("expected no value to come back")} + +} \ No newline at end of file