2022-06-30 14:54:58 -07:00
|
|
|
package input
|
|
|
|
|
|
|
|
import (
|
2024-05-01 17:49:38 -07:00
|
|
|
"strings"
|
2022-06-30 14:54:58 -07:00
|
|
|
|
2024-05-09 18:59:50 -07:00
|
|
|
"git.jamestombleson.com/jtom38/newsbot-api/internal/entity"
|
2022-06-30 14:54:58 -07:00
|
|
|
"github.com/mmcdole/gofeed"
|
|
|
|
)
|
|
|
|
|
2024-05-01 17:49:38 -07:00
|
|
|
type FeedInput interface {
|
2024-05-09 18:59:50 -07:00
|
|
|
GetArticles() (entity.ArticleEntity, error)
|
2024-05-01 17:49:38 -07:00
|
|
|
}
|
|
|
|
|
2022-06-30 14:54:58 -07:00
|
|
|
type rssClient struct {
|
2024-05-09 18:59:50 -07:00
|
|
|
SourceRecord entity.SourceEntity
|
2022-06-30 14:54:58 -07:00
|
|
|
}
|
|
|
|
|
2024-05-09 18:59:50 -07:00
|
|
|
func NewRssClient(sourceRecord entity.SourceEntity) rssClient {
|
2022-06-30 14:54:58 -07:00
|
|
|
client := rssClient{
|
|
|
|
SourceRecord: sourceRecord,
|
|
|
|
}
|
|
|
|
|
|
|
|
return client
|
|
|
|
}
|
|
|
|
|
2024-05-09 18:59:50 -07:00
|
|
|
func (rc rssClient) GetArticles() ([]entity.ArticleEntity, error) {
|
2024-05-01 17:49:38 -07:00
|
|
|
parser := gofeed.NewParser()
|
|
|
|
feed, err := parser.ParseURL(rc.SourceRecord.Url)
|
2022-12-04 08:49:17 -08:00
|
|
|
if err != nil {
|
2024-05-01 17:49:38 -07:00
|
|
|
return nil, err
|
2022-06-30 14:54:58 -07:00
|
|
|
}
|
|
|
|
|
2024-05-01 17:49:38 -07:00
|
|
|
sourceTags := strings.Split(rc.SourceRecord.Tags, ",")
|
2024-05-09 18:59:50 -07:00
|
|
|
var articles []entity.ArticleEntity
|
2024-05-01 17:49:38 -07:00
|
|
|
for _, post := range feed.Items {
|
2024-05-09 18:59:50 -07:00
|
|
|
article := entity.ArticleEntity{
|
2024-05-01 17:49:38 -07:00
|
|
|
SourceID: rc.SourceRecord.ID,
|
|
|
|
Title: post.Title,
|
|
|
|
Description: post.Content,
|
|
|
|
Url: post.Link,
|
|
|
|
PubDate: *post.PublishedParsed,
|
2024-05-02 17:37:18 -07:00
|
|
|
//AuthorName: post.Authors[0].Email,
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(post.Authors) != 0 {
|
|
|
|
article.AuthorName = post.Authors[0].Email
|
2024-05-01 17:49:38 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
var postTags []string
|
|
|
|
postTags = append(postTags, sourceTags...)
|
|
|
|
postTags = append(postTags, post.Categories...)
|
|
|
|
article.Tags = strings.Join(postTags, ",")
|
|
|
|
|
2024-05-02 17:37:18 -07:00
|
|
|
/*
|
|
|
|
pageContent, err := getHttpContent(article.Url)
|
|
|
|
if err != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
htmlNode, err := html.Parse(bytes.NewReader(pageContent))
|
|
|
|
if err != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
htmlNode.
|
|
|
|
|
|
|
|
fmt.Println(htmlNode)
|
|
|
|
*/
|
|
|
|
|
2024-05-01 17:49:38 -07:00
|
|
|
if post.Image == nil {
|
|
|
|
article.Thumbnail = ""
|
|
|
|
}
|
|
|
|
|
|
|
|
articles = append(articles, article)
|
2022-12-04 08:49:17 -08:00
|
|
|
}
|
2022-06-30 14:54:58 -07:00
|
|
|
|
2024-05-01 17:49:38 -07:00
|
|
|
return articles, nil
|
2022-12-04 08:49:17 -08:00
|
|
|
}
|