2022-06-30 14:54:58 -07:00
|
|
|
package input
|
2022-04-02 12:05:32 -07:00
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/json"
|
2022-04-07 14:53:40 -07:00
|
|
|
"errors"
|
2022-04-02 12:05:32 -07:00
|
|
|
"fmt"
|
|
|
|
"log"
|
2022-04-29 13:02:25 -07:00
|
|
|
"strings"
|
2022-04-07 14:53:40 -07:00
|
|
|
"time"
|
2022-04-02 12:05:32 -07:00
|
|
|
|
2024-04-23 07:15:38 -07:00
|
|
|
"git.jamestombleson.com/jtom38/newsbot-api/internal/domain"
|
2024-05-09 18:59:50 -07:00
|
|
|
"git.jamestombleson.com/jtom38/newsbot-api/internal/entity"
|
2024-04-23 07:15:38 -07:00
|
|
|
"git.jamestombleson.com/jtom38/newsbot-api/internal/services"
|
2022-04-29 13:02:25 -07:00
|
|
|
"github.com/go-rod/rod"
|
2022-07-14 14:27:40 -07:00
|
|
|
"github.com/go-rod/rod/lib/launcher"
|
2022-04-02 12:05:32 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
type RedditClient struct {
|
2022-04-17 07:25:49 -07:00
|
|
|
config RedditConfig
|
2024-05-09 18:59:50 -07:00
|
|
|
record entity.SourceEntity
|
2022-04-02 12:05:32 -07:00
|
|
|
}
|
|
|
|
|
2022-04-17 07:25:49 -07:00
|
|
|
type RedditConfig struct {
|
2022-06-08 21:17:08 -07:00
|
|
|
PullTop string
|
|
|
|
PullHot string
|
2022-04-17 07:25:49 -07:00
|
|
|
PullNSFW string
|
2022-04-02 12:05:32 -07:00
|
|
|
}
|
|
|
|
|
2024-05-09 18:59:50 -07:00
|
|
|
func NewRedditClient(Record entity.SourceEntity) *RedditClient {
|
2022-04-02 12:05:32 -07:00
|
|
|
rc := RedditClient{
|
2022-06-08 21:17:08 -07:00
|
|
|
record: Record,
|
2022-04-02 12:05:32 -07:00
|
|
|
}
|
2024-04-23 07:15:38 -07:00
|
|
|
cc := services.NewConfig()
|
|
|
|
rc.config.PullHot = cc.GetConfig(services.REDDIT_PULL_HOT)
|
|
|
|
rc.config.PullNSFW = cc.GetConfig(services.REDDIT_PULL_NSFW)
|
|
|
|
rc.config.PullTop = cc.GetConfig(services.REDDIT_PULL_TOP)
|
2022-04-29 13:02:25 -07:00
|
|
|
|
2022-07-12 15:28:31 -07:00
|
|
|
//rc.disableHttp2Client()
|
2022-04-17 07:25:49 -07:00
|
|
|
|
2022-07-12 15:28:31 -07:00
|
|
|
return &rc
|
2022-04-02 12:05:32 -07:00
|
|
|
}
|
|
|
|
|
2022-04-29 13:02:25 -07:00
|
|
|
// This is needed for to get modern go to talk to the endpoint.
|
|
|
|
// https://www.reddit.com/r/redditdev/comments/t8e8hc/getting_nothing_but_429_responses_when_using_go/
|
2022-07-12 15:28:31 -07:00
|
|
|
//func (rc *RedditClient) disableHttp2Client() {
|
|
|
|
// os.Setenv("GODEBUG", "http2client=0")
|
|
|
|
//}
|
|
|
|
|
|
|
|
func (rc *RedditClient) GetBrowser() *rod.Browser {
|
2022-07-14 14:27:40 -07:00
|
|
|
var browser *rod.Browser
|
|
|
|
if path, exists := launcher.LookPath(); exists {
|
|
|
|
u := launcher.New().Bin(path).MustLaunch()
|
|
|
|
browser = rod.New().ControlURL(u).MustConnect()
|
|
|
|
}
|
2022-04-29 13:02:25 -07:00
|
|
|
return browser
|
|
|
|
}
|
|
|
|
|
2022-07-12 15:28:31 -07:00
|
|
|
func (rc *RedditClient) GetPage(parser *rod.Browser, url string) *rod.Page {
|
2022-04-29 13:02:25 -07:00
|
|
|
page := parser.MustPage(url)
|
|
|
|
return page
|
|
|
|
}
|
|
|
|
|
2022-12-04 08:49:17 -08:00
|
|
|
//func (rc RedditClient)
|
2022-06-08 21:17:08 -07:00
|
|
|
|
2022-04-02 12:05:32 -07:00
|
|
|
// GetContent() reaches out to Reddit and pulls the Json data.
|
|
|
|
// It will then convert the data to a struct and return the struct.
|
2024-04-23 07:15:38 -07:00
|
|
|
func (rc *RedditClient) GetContent() (domain.RedditJsonContent, error) {
|
|
|
|
var items domain.RedditJsonContent = domain.RedditJsonContent{}
|
2022-04-02 12:05:32 -07:00
|
|
|
|
2022-06-08 21:17:08 -07:00
|
|
|
// TODO Wire this to support the config options
|
|
|
|
Url := fmt.Sprintf("%v.json", rc.record.Url)
|
|
|
|
|
2024-05-01 18:26:14 -07:00
|
|
|
log.Printf("[Reddit] Collecting results on '%v'", rc.record.DisplayName)
|
2022-07-12 15:28:31 -07:00
|
|
|
|
2022-06-08 21:17:08 -07:00
|
|
|
content, err := getHttpContent(Url)
|
|
|
|
if err != nil {
|
|
|
|
return items, err
|
|
|
|
}
|
|
|
|
if strings.Contains("<h1>whoa there, pardner!</h1>", string(content)) {
|
2022-04-29 13:02:25 -07:00
|
|
|
return items, errors.New("did not get json data from the server")
|
|
|
|
}
|
2022-04-02 12:05:32 -07:00
|
|
|
|
|
|
|
json.Unmarshal(content, &items)
|
2022-04-29 13:02:25 -07:00
|
|
|
if len(items.Data.Children) == 0 {
|
|
|
|
return items, errors.New("failed to unmarshal the data")
|
|
|
|
}
|
2022-04-02 12:05:32 -07:00
|
|
|
return items, nil
|
|
|
|
}
|
|
|
|
|
2024-05-09 18:59:50 -07:00
|
|
|
func (rc *RedditClient) ConvertToArticles(items domain.RedditJsonContent) []entity.ArticleEntity {
|
|
|
|
var redditArticles []entity.ArticleEntity
|
2022-04-07 14:53:40 -07:00
|
|
|
for _, item := range items.Data.Children {
|
2024-05-09 18:59:50 -07:00
|
|
|
var article entity.ArticleEntity
|
2022-04-07 14:53:40 -07:00
|
|
|
article, err := rc.convertToArticle(item.Data)
|
2022-06-08 21:17:08 -07:00
|
|
|
if err != nil {
|
2022-07-12 15:28:31 -07:00
|
|
|
log.Printf("[Reddit] %v", err)
|
2022-06-08 21:17:08 -07:00
|
|
|
continue
|
|
|
|
}
|
2022-04-07 14:53:40 -07:00
|
|
|
redditArticles = append(redditArticles, article)
|
|
|
|
}
|
|
|
|
return redditArticles
|
|
|
|
}
|
|
|
|
|
2022-04-02 12:05:32 -07:00
|
|
|
// ConvertToArticle() will take the reddit model struct and convert them over to Article structs.
|
|
|
|
// This data can be passed to the database.
|
2024-05-09 18:59:50 -07:00
|
|
|
func (rc *RedditClient) convertToArticle(source domain.RedditPost) (entity.ArticleEntity, error) {
|
|
|
|
var item entity.ArticleEntity
|
2022-04-02 12:05:32 -07:00
|
|
|
|
2022-06-08 21:17:08 -07:00
|
|
|
if source.Content == "" && source.Url != "" {
|
2022-04-02 12:05:32 -07:00
|
|
|
item = rc.convertPicturePost(source)
|
|
|
|
}
|
2022-06-08 21:17:08 -07:00
|
|
|
|
2022-04-07 14:53:40 -07:00
|
|
|
if source.Media.RedditVideo.FallBackUrl != "" {
|
|
|
|
item = rc.convertVideoPost(source)
|
|
|
|
}
|
|
|
|
|
|
|
|
if source.Content != "" {
|
|
|
|
item = rc.convertTextPost(source)
|
|
|
|
}
|
|
|
|
|
|
|
|
if source.UrlOverriddenByDest != "" {
|
|
|
|
item = rc.convertRedirectPost(source)
|
|
|
|
}
|
2022-04-02 12:05:32 -07:00
|
|
|
|
2022-07-12 15:28:31 -07:00
|
|
|
if item.Description == "" && item.Title == "" {
|
|
|
|
var err = errors.New("post failed to parse correctly")
|
2022-04-02 12:05:32 -07:00
|
|
|
return item, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return item, nil
|
|
|
|
}
|
|
|
|
|
2024-05-09 18:59:50 -07:00
|
|
|
func (rc *RedditClient) convertPicturePost(source domain.RedditPost) entity.ArticleEntity {
|
|
|
|
var item = entity.ArticleEntity{
|
2024-05-01 18:26:14 -07:00
|
|
|
SourceID: rc.record.ID,
|
|
|
|
Title: source.Title,
|
|
|
|
Tags: fmt.Sprintf("%v", rc.record.Tags),
|
|
|
|
Url: fmt.Sprintf("https://www.reddit.com%v", source.Permalink),
|
|
|
|
PubDate: time.Now(),
|
|
|
|
IsVideo: false,
|
|
|
|
Thumbnail: source.Thumbnail,
|
|
|
|
Description: source.Content,
|
|
|
|
AuthorName: source.Author,
|
|
|
|
AuthorImageUrl: "null",
|
2022-04-07 14:53:40 -07:00
|
|
|
}
|
|
|
|
return item
|
|
|
|
}
|
|
|
|
|
2024-05-09 18:59:50 -07:00
|
|
|
func (rc *RedditClient) convertTextPost(source domain.RedditPost) entity.ArticleEntity {
|
|
|
|
var item = entity.ArticleEntity{
|
2024-05-01 18:26:14 -07:00
|
|
|
SourceID: rc.record.ID,
|
2022-06-08 21:17:08 -07:00
|
|
|
Tags: "a",
|
|
|
|
Title: source.Title,
|
2024-05-01 18:26:14 -07:00
|
|
|
PubDate: time.Now(),
|
2022-06-08 21:17:08 -07:00
|
|
|
Url: fmt.Sprintf("https://www.reddit.com%v", source.Permalink),
|
2024-05-01 18:26:14 -07:00
|
|
|
AuthorName: source.Author,
|
2022-04-02 12:05:32 -07:00
|
|
|
Description: source.Content,
|
|
|
|
}
|
|
|
|
return item
|
|
|
|
}
|
|
|
|
|
2024-05-09 18:59:50 -07:00
|
|
|
func (rc *RedditClient) convertVideoPost(source domain.RedditPost) entity.ArticleEntity {
|
|
|
|
var item = entity.ArticleEntity{
|
2024-05-01 18:26:14 -07:00
|
|
|
SourceID: rc.record.ID,
|
2022-06-08 21:17:08 -07:00
|
|
|
Tags: "a",
|
|
|
|
Title: source.Title,
|
2024-05-01 18:26:14 -07:00
|
|
|
PubDate: time.Now(),
|
2022-06-08 21:17:08 -07:00
|
|
|
Url: fmt.Sprintf("https://www.reddit.com%v", source.Permalink),
|
2024-05-01 18:26:14 -07:00
|
|
|
AuthorName: source.Author,
|
2022-04-07 14:53:40 -07:00
|
|
|
Description: source.Media.RedditVideo.FallBackUrl,
|
|
|
|
}
|
|
|
|
return item
|
2022-04-02 12:05:32 -07:00
|
|
|
}
|
|
|
|
|
2022-04-07 14:53:40 -07:00
|
|
|
// This post is nothing more then a redirect to another location.
|
2024-05-09 18:59:50 -07:00
|
|
|
func (rc *RedditClient) convertRedirectPost(source domain.RedditPost) entity.ArticleEntity {
|
|
|
|
var item = entity.ArticleEntity{
|
2024-05-01 18:26:14 -07:00
|
|
|
SourceID: rc.record.ID,
|
2022-06-08 21:17:08 -07:00
|
|
|
Tags: "a",
|
|
|
|
Title: source.Title,
|
2024-05-01 18:26:14 -07:00
|
|
|
PubDate: time.Now(),
|
2022-06-08 21:17:08 -07:00
|
|
|
Url: fmt.Sprintf("https://www.reddit.com%v", source.Permalink),
|
2024-05-01 18:26:14 -07:00
|
|
|
AuthorName: source.Author,
|
2022-04-07 14:53:40 -07:00
|
|
|
Description: source.UrlOverriddenByDest,
|
|
|
|
}
|
|
|
|
return item
|
2022-06-08 21:17:08 -07:00
|
|
|
}
|