newsbot-api/internal/services/input/reddit.go

188 lines
4.9 KiB
Go

package input
import (
"encoding/json"
"errors"
"fmt"
"log"
"strings"
"time"
"git.jamestombleson.com/jtom38/newsbot-api/internal/domain"
"git.jamestombleson.com/jtom38/newsbot-api/internal/services"
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/launcher"
)
type RedditClient struct {
config services.Configs
record domain.SourceEntity
}
type RedditConfig struct {
PullTop string
PullHot string
PullNSFW string
}
func NewRedditClient(record domain.SourceEntity) *RedditClient {
rc := RedditClient{
record: record,
config: services.GetEnvConfig(),
}
//rc.disableHttp2Client()
return &rc
}
// This is needed for to get modern go to talk to the endpoint.
// https://www.reddit.com/r/redditdev/comments/t8e8hc/getting_nothing_but_429_responses_when_using_go/
//func (rc *RedditClient) disableHttp2Client() {
// os.Setenv("GODEBUG", "http2client=0")
//}
func (rc *RedditClient) GetBrowser() *rod.Browser {
var browser *rod.Browser
if path, exists := launcher.LookPath(); exists {
u := launcher.New().Bin(path).MustLaunch()
browser = rod.New().ControlURL(u).MustConnect()
}
return browser
}
func (rc *RedditClient) GetPage(parser *rod.Browser, url string) *rod.Page {
page := parser.MustPage(url)
return page
}
//func (rc RedditClient)
// GetContent() reaches out to Reddit and pulls the Json data.
// It will then convert the data to a struct and return the struct.
func (rc *RedditClient) GetContent() (domain.RedditJsonContent, error) {
var items domain.RedditJsonContent = domain.RedditJsonContent{}
// TODO Wire this to support the config options
Url := fmt.Sprintf("%v.json", rc.record.Url)
log.Printf("[Reddit] Collecting results on '%v'", rc.record.DisplayName)
content, err := getHttpContent(Url)
if err != nil {
return items, err
}
if strings.Contains("<h1>whoa there, pardner!</h1>", string(content)) {
return items, errors.New("did not get json data from the server")
}
json.Unmarshal(content, &items)
if len(items.Data.Children) == 0 {
return items, errors.New("failed to unmarshal the data")
}
return items, nil
}
func (rc *RedditClient) ConvertToArticles(items domain.RedditJsonContent) []domain.ArticleEntity {
var redditArticles []domain.ArticleEntity
for _, item := range items.Data.Children {
var article domain.ArticleEntity
article, err := rc.convertToArticle(item.Data)
if err != nil {
log.Printf("[Reddit] %v", err)
continue
}
redditArticles = append(redditArticles, article)
}
return redditArticles
}
// ConvertToArticle() will take the reddit model struct and convert them over to Article structs.
// This data can be passed to the database.
func (rc *RedditClient) convertToArticle(source domain.RedditPost) (domain.ArticleEntity, error) {
var item domain.ArticleEntity
if source.Content == "" && source.Url != "" {
item = rc.convertPicturePost(source)
}
if source.Media.RedditVideo.FallBackUrl != "" {
item = rc.convertVideoPost(source)
}
if source.Content != "" {
item = rc.convertTextPost(source)
}
if source.UrlOverriddenByDest != "" {
item = rc.convertRedirectPost(source)
}
if item.Description == "" && item.Title == "" {
var err = errors.New("post failed to parse correctly")
return item, err
}
return item, nil
}
func (rc *RedditClient) convertPicturePost(source domain.RedditPost) domain.ArticleEntity {
var item = domain.ArticleEntity{
SourceID: rc.record.ID,
Title: source.Title,
Tags: fmt.Sprintf("%v", rc.record.Tags),
Url: fmt.Sprintf("https://www.reddit.com%v", source.Permalink),
PubDate: time.Now(),
IsVideo: false,
Thumbnail: source.Thumbnail,
Description: source.Content,
AuthorName: source.Author,
AuthorImageUrl: "",
}
return item
}
func (rc *RedditClient) convertTextPost(source domain.RedditPost) domain.ArticleEntity {
var item = domain.ArticleEntity{
SourceID: rc.record.ID,
Tags: "a",
Title: source.Title,
PubDate: time.Now(),
Url: fmt.Sprintf("https://www.reddit.com%v", source.Permalink),
AuthorName: source.Author,
Description: source.Content,
}
return item
}
func (rc *RedditClient) convertVideoPost(source domain.RedditPost) domain.ArticleEntity {
var item = domain.ArticleEntity{
SourceID: rc.record.ID,
Tags: "a",
Title: source.Title,
PubDate: time.Now(),
Url: fmt.Sprintf("https://www.reddit.com%v", source.Permalink),
AuthorName: source.Author,
Description: source.Media.RedditVideo.FallBackUrl,
}
return item
}
// This post is nothing more then a redirect to another location.
func (rc *RedditClient) convertRedirectPost(source domain.RedditPost) domain.ArticleEntity {
var item = domain.ArticleEntity{
SourceID: rc.record.ID,
Tags: "a",
Title: source.Title,
PubDate: time.Now(),
Url: fmt.Sprintf("https://www.reddit.com%v", source.Permalink),
AuthorName: source.Author,
Description: source.UrlOverriddenByDest,
}
return item
}