-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexplore.go
153 lines (124 loc) · 4.13 KB
/
explore.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
// Package explore provides access to qiita`s tags(items and stocks)
// The data will be collected from qiitas website at http://qiita.com/tags/{tag}/stocks and http://qiita.com/tags/{tag}/items
package explore
import (
"github.com/PuerkitoBio/goquery"
"net/url"
"regexp"
"strconv"
"strings"
"time"
)
// NewExplore is the main entry point of the explore package.
// It provides access to the API of this package by returning a Article datastructure.
// Usage:
// explore := explore.NewExplore()
// stocks, err := explore.GetItems("swift", 1)
// items, err := explore.GetItems("Go", 1)
// Usage:
// explore := explore.NewExplore()
// stocks, err := explore.GetItems("swift", 1)
// items, err := explore.GetItems("Go", 1)
func NewExplore() *Explore {
t := Explore{}
return &t
}
// GetStocks provides a slice of Article(recently stocked) filtered by the given tag and page.
// tag can be filtered by applying a tag by your choice. The input must be a known tag by Qiita.
// page can be filtered by applying by one page of pagers. If an empty int will be applied first page (1) will be the default.
func (e *Explore) GetStocks(tag string, page ...int) ([]Article, error) {
var pageQuery int = 1
if len(page) > 0 {
pageQuery = page[0]
}
return e.getArticles(tag, "stocks", pageQuery)
}
// GetItems provides a slice of Article(articles newly posted) filtered by the given tag and page.
// tag can be filtered by applying a tag by your choice. The input must be a known tag by Qiita.
// page can be filtered by applying by one page of pagers. If an empty int will be applied first page (1) will be the default.
func (e *Explore) GetItems(tag string, page ...int) ([]Article, error) {
var pageQuery int = 1
if len(page) > 0 {
pageQuery = page[0]
}
return e.getArticles(tag, "items", pageQuery)
}
func (e *Explore) getArticles(tag string, category string, page ...int) ([]Article, error) {
var articles []Article
var pageQuery int = 1
if len(page) > 0 {
pageQuery = page[0]
}
u, err := e.generateURL(tag, category, pageQuery)
if err != nil {
return articles, err
}
doc, err := goquery.NewDocument(u.String())
if err != nil {
return articles, err
}
doc.Find("article").Each(func(i int, s *goquery.Selection) {
title := e.trim(s.Find(".publicItem_body a").First().Text())
path, articleExists := s.Find(".publicItem_body a").First().Attr("href")
URL := e.appendBaseHostToPath(path, articleExists)
// Collect tag
var tags []string
s.Find(".tagList_item a").Each(func(i int, s *goquery.Selection) {
tags = append(tags, s.Text())
})
r := regexp.MustCompile(`\d{4}/\d{2}/\d{2}`)
dates := r.FindStringSubmatch(s.Find(".publicItem_status").Text())
dateStr := string(dates[0])
layOut := "2006/01/02"
createdTime, _ := time.Parse(layOut, dateStr)
userName := e.trim(s.Find(".publicItem_status a").Text())
userPath, userExists := s.Find(".publicItem_status a").First().Attr("href")
userURL := e.appendBaseHostToPath(userPath, userExists)
stockCount, err := strconv.Atoi(e.trim(s.Find(".publicItem_stockCount").Text()))
if err != nil {
stockCount = 0
}
a := Article{
Title: title,
Path: path,
URL: URL,
Tags: tags,
UserName: userName,
UserPath: userPath,
UserURL: userURL,
StockCount: stockCount,
CreatedTime: createdTime,
}
articles = append(articles, a)
})
return articles, nil
}
func (e *Explore) appendBaseHostToPath(address string, exists bool) *url.URL {
if exists == false {
return nil
}
u, err := url.Parse(baseHost)
if err != nil {
return nil
}
u.Path = address
return u
}
func (e *Explore) trim(name string) string {
trimmedNameParts := []string{}
nameParts := strings.Split(name, "\n")
for _, part := range nameParts {
trimmedNameParts = append(trimmedNameParts, strings.TrimSpace(part))
}
return strings.Join(trimmedNameParts, "")
}
func (e *Explore) generateURL(tag string, category string, page int) (*url.URL, error) {
parseURL := baseHost + basePath + tag + "/" + category + "?page=" + strconv.Itoa(page)
u, err := url.Parse(parseURL)
if err != nil {
return nil, err
}
q := u.Query()
u.RawQuery = q.Encode()
return u, nil
}