From bfa0f1023d88c8603180836fc9fa8cc6c0258b8a Mon Sep 17 00:00:00 2001 From: James Tombleson Date: Wed, 1 May 2024 17:49:38 -0700 Subject: [PATCH] got the rss job setup and disabled the others for now --- internal/repository/article.go | 17 ++++++ internal/services/cron/rss.go | 59 +++++++++++++++++++++ internal/services/cron/rss_test.go | 43 +++++++++++++++ internal/services/cron/scheduler.go | 21 +++++++- internal/services/cron/scheduler_test.go | 18 +++++++ internal/services/input/common.go | 2 +- internal/services/input/rss.go | 67 ++++++++++++------------ internal/services/input/rss_test.go | 17 ++++-- 8 files changed, 205 insertions(+), 39 deletions(-) create mode 100644 internal/services/cron/rss.go create mode 100644 internal/services/cron/rss_test.go diff --git a/internal/repository/article.go b/internal/repository/article.go index 0652ba6..928a957 100644 --- a/internal/repository/article.go +++ b/internal/repository/article.go @@ -24,6 +24,7 @@ type ArticlesRepo interface { ListByPublishDate(ctx context.Context, page, limit int, orderBy string) ([]domain.ArticleEntity, error) ListBySource(ctx context.Context, page, limit, sourceId int, orderBy string) ([]domain.ArticleEntity, error) Create(ctx context.Context, sourceId int64, tags, title, url, thumbnailUrl, description, authorName, authorImageUrl string, pubDate time.Time, isVideo bool) (int64, error) + CreateFromEntity(ctx context.Context, entity domain.ArticleEntity) (int64, error) } type ArticleRepository struct { @@ -192,6 +193,22 @@ func (ar ArticleRepository) Create(ctx context.Context, sourceId int64, tags, ti return 1, nil } +func (ar ArticleRepository) CreateFromEntity(ctx context.Context, entity domain.ArticleEntity) (int64, error) { + dt := time.Now() + queryBuilder := sqlbuilder.NewInsertBuilder() + queryBuilder.InsertInto("articles") + queryBuilder.Cols("UpdatedAt", "CreatedAt", "DeletedAt", "SourceId", "Tags", "Title", "Url", "PubDate", "IsVideo", "ThumbnailUrl", "Description", "AuthorName", "AuthorImageUrl") + queryBuilder.Values(dt, dt, timeZero, entity.SourceID, entity.Tags, entity.Title, entity.Url, entity.PubDate, entity.IsVideo, entity.Thumbnail, entity.Description, entity.AuthorName, entity.AuthorImageUrl) + query, args := queryBuilder.Build() + + _, err := ar.conn.ExecContext(ctx, query, args...) + if err != nil { + return 0, err + } + + return 1, nil +} + func (ur ArticleRepository) processRows(rows *sql.Rows) []domain.ArticleEntity { items := []domain.ArticleEntity{} diff --git a/internal/services/cron/rss.go b/internal/services/cron/rss.go new file mode 100644 index 0000000..fce1a0d --- /dev/null +++ b/internal/services/cron/rss.go @@ -0,0 +1,59 @@ +package cron + +import ( + "log" + "time" + + "git.jamestombleson.com/jtom38/newsbot-api/internal/domain" + "git.jamestombleson.com/jtom38/newsbot-api/internal/services/input" +) + +func (c *Cron) CheckRssSources() { + log.Println("Starting ") + sources, err := c.repo.Sources.ListBySource(*c.ctx, 0, 1000, domain.SourceCollectorRss) + if err != nil { + log.Println(err) + } + + for sourceIndex, source := range sources { + if !source.Enabled { + continue + } + + rssClient := input.NewRssClient(source) + articles, err := rssClient.GetArticles() + if err != nil { + log.Println(err) + } + + for _, article := range articles { + _, err := c.repo.Articles.GetByUrl(*c.ctx, article.Url) + if err == nil { + continue + } + + rowsCreated, err := c.repo.Articles.CreateFromEntity(*c.ctx, article) + if err != nil { + log.Println(err) + } + if rowsCreated != 1 { + log.Println("Got back the wrong number of rows") + } + } + + if sourceIndex != len(sources) { + time.Sleep(time.Second * 30) + } + } +} + +func (c *Cron) ListAllSourceRecords(sourceType string) ([]domain.SourceEntity, error) { + var records []domain.SourceEntity + + sources, err := c.repo.Sources.ListBySource(*c.ctx, 0, 1000, sourceType) + if err != nil { + return records, err + } + + return sources, nil +} diff --git a/internal/services/cron/rss_test.go b/internal/services/cron/rss_test.go new file mode 100644 index 0000000..4ab8da7 --- /dev/null +++ b/internal/services/cron/rss_test.go @@ -0,0 +1,43 @@ +package cron_test + +import ( + "context" + "testing" + + "git.jamestombleson.com/jtom38/newsbot-api/internal/domain" + "git.jamestombleson.com/jtom38/newsbot-api/internal/services" + "git.jamestombleson.com/jtom38/newsbot-api/internal/services/cron" +) + +func TestRssPullsCorrectly(t *testing.T) { + conn, err := setupInMemoryDb() + if err != nil { + t.Error(err) + t.FailNow() + } + defer conn.Close() + + ctx := context.Background() + db := services.NewRepositoryService(conn) + rowsCreated, err := db.Sources.Create(ctx, domain.SourceCollectorRss, "Gitea - Newsbot.api", "https://git.jamestombleson.com/jtom38/newsbot-api.rss", "rss,gitea,newsbot.api",true) + if err != nil { + t.Error(err) + t.FailNow() + } + + if rowsCreated != 1 { + t.Error("failed to create the source record") + t.FailNow() + } + + client := cron.NewScheduler(ctx, conn) + client.CheckRssSources() + + articles, err := db.Articles.ListByPage(ctx, 0, 100) + if err != nil { + t.Error(err) + t.FailNow() + } + + t.Log(len(articles)) +} diff --git a/internal/services/cron/scheduler.go b/internal/services/cron/scheduler.go index edb89a0..f2f978c 100644 --- a/internal/services/cron/scheduler.go +++ b/internal/services/cron/scheduler.go @@ -42,7 +42,6 @@ func NewScheduler(ctx context.Context) *Cron { c := &Cron{ ctx: &ctx, } - timer := cron.New() queries, err := openDatabase() if err != nil { @@ -51,8 +50,16 @@ func NewScheduler(ctx context.Context) *Cron { c.Db = queries //timer.AddFunc("*/5 * * * *", func() { go CheckCache() }) + //features := services.GetEnvConfig() features := services.NewConfig() + timer.AddFunc("5 * * * *", c.CheckRssSources) + + //if features.RedditEnabled { + // timer.AddFunc("5 1-23 * * *", func() { go c.CheckReddit() }) + // log.Print("[Input] Reddit backend was enabled") + // //go c.CheckReddit() + //} res, _ := features.GetFeature(services.FEATURE_ENABLE_REDDIT_BACKEND) if res { timer.AddFunc("5 1-23 * * *", func() { go c.CheckReddit() }) @@ -60,18 +67,30 @@ func NewScheduler(ctx context.Context) *Cron { //go c.CheckReddit() } + //if features.YoutubeEnabled { + // timer.AddFunc("10 1-23 * * *", func() { go c.CheckYoutube() }) + // log.Print("[Input] YouTube backend was enabled") + //} res, _ = features.GetFeature(services.FEATURE_ENABLE_YOUTUBE_BACKEND) if res { timer.AddFunc("10 1-23 * * *", func() { go c.CheckYoutube() }) log.Print("[Input] YouTube backend was enabled") } + //if features.FfxivEnabled { + // timer.AddFunc("5 5,10,15,20 * * *", func() { go c.CheckFfxiv() }) + // log.Print("[Input] FFXIV backend was enabled") + //} res, _ = features.GetFeature(services.FEATURE_ENABLE_FFXIV_BACKEND) if res { timer.AddFunc("5 5,10,15,20 * * *", func() { go c.CheckFfxiv() }) log.Print("[Input] FFXIV backend was enabled") } + //if features.TwitchEnabled { + // timer.AddFunc("15 1-23 * * *", func() { go c.CheckTwitch() }) + // log.Print("[Input] Twitch backend was enabled") + //} res, _ = features.GetFeature(services.FEATURE_ENABLE_TWITCH_BACKEND) if res { timer.AddFunc("15 1-23 * * *", func() { go c.CheckTwitch() }) diff --git a/internal/services/cron/scheduler_test.go b/internal/services/cron/scheduler_test.go index 14bbd38..a0dda86 100644 --- a/internal/services/cron/scheduler_test.go +++ b/internal/services/cron/scheduler_test.go @@ -32,3 +32,21 @@ func TestCheckTwitch(t *testing.T) { t.Error(err) } } + +func setupInMemoryDb() (*sql.DB, error) { + db, err := sql.Open("sqlite", ":memory:") + if err != nil { + return nil, err + } + + err = goose.SetDialect("sqlite3") + if err != nil { + return nil, err + } + + err = goose.Up(db, "../../database/migrations") + if err != nil { + return nil, err + } + return db, nil +} diff --git a/internal/services/input/common.go b/internal/services/input/common.go index 6cae09a..4775e7a 100644 --- a/internal/services/input/common.go +++ b/internal/services/input/common.go @@ -14,4 +14,4 @@ var ( ErrInvalidAuthorImage = errors.New("expected value looks to be wrong, something is missing") ) -const DATETIME_FORMAT string = "1/2/2006 3:4 PM" +const DATETIME_FORMAT string = "1/2/2006 3:4 PM" \ No newline at end of file diff --git a/internal/services/input/rss.go b/internal/services/input/rss.go index 3d2833b..1faa40c 100644 --- a/internal/services/input/rss.go +++ b/internal/services/input/rss.go @@ -1,14 +1,16 @@ package input import ( - "fmt" - "log" + "strings" "git.jamestombleson.com/jtom38/newsbot-api/internal/domain" - "git.jamestombleson.com/jtom38/newsbot-api/internal/services/cache" "github.com/mmcdole/gofeed" ) +type FeedInput interface { + GetArticles() (domain.ArticleEntity, error) +} + type rssClient struct { SourceRecord domain.SourceEntity } @@ -21,39 +23,36 @@ func NewRssClient(sourceRecord domain.SourceEntity) rssClient { return client } -//func (rc rssClient) ReplaceSourceRecord(source model.Sources) { -//rc.SourceRecord = source -//} - -func (rc rssClient) getCacheGroup() string { - return fmt.Sprintf("rss-%v", rc.SourceRecord.DisplayName) -} - -func (rc rssClient) GetContent() error { - feed, err := rc.PullFeed() - if err != nil { - return err - } - - cacheClient := cache.NewCacheClient(rc.getCacheGroup()) - - for _, item := range feed.Items { - log.Println(item) - - cacheClient.FindByValue(item.Link) - - } - - return nil -} - -func (rc rssClient) PullFeed() (*gofeed.Feed, error) { - feedUri := fmt.Sprintf("%v", rc.SourceRecord.Url) - fp := gofeed.NewParser() - feed, err := fp.ParseURL(feedUri) +func (rc rssClient) GetArticles() ([]domain.ArticleEntity, error) { + parser := gofeed.NewParser() + feed, err := parser.ParseURL(rc.SourceRecord.Url) if err != nil { return nil, err } - return feed, nil + sourceTags := strings.Split(rc.SourceRecord.Tags, ",") + var articles []domain.ArticleEntity + for _, post := range feed.Items { + article := domain.ArticleEntity{ + SourceID: rc.SourceRecord.ID, + Title: post.Title, + Description: post.Content, + Url: post.Link, + PubDate: *post.PublishedParsed, + AuthorName: post.Author.Email, + } + + var postTags []string + postTags = append(postTags, sourceTags...) + postTags = append(postTags, post.Categories...) + article.Tags = strings.Join(postTags, ",") + + if post.Image == nil { + article.Thumbnail = "" + } + + articles = append(articles, article) + } + + return articles, nil } diff --git a/internal/services/input/rss_test.go b/internal/services/input/rss_test.go index 728bd85..0b9f3c0 100644 --- a/internal/services/input/rss_test.go +++ b/internal/services/input/rss_test.go @@ -19,12 +19,23 @@ func TestRssClientConstructor(t *testing.T) { func TestRssGetFeed(t *testing.T) { client := input.NewRssClient(rssRecord) - feed, err := client.PullFeed() + _, err := client.GetArticles() if err != nil { t.Error(err) } - if len(feed.Items) >= 0 { - t.Error("failed to collect items from the fees") +} + +func TestRssAgainstGita(t *testing.T) { + client := input.NewRssClient(domain.SourceEntity{ + ID: 2, + DisplayName: "Gitea - Newsbot-api", + Source: domain.SourceCollectorRss, + Url: "https://git.jamestombleson.com/jtom38/newsbot-api.rss", + Tags: "rss,gitea,newsbot-api", + }) + _, err := client.GetArticles() + if err != nil { + t.Error(err) } }