-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.go
248 lines (213 loc) · 8.23 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
/*
Purpose:
- Discourse Reader
Description:
- Retrieves data (e.g. site, category, topic) from Discourse forum.
Releases:
- v1.0.0 - 2022/11/18: initial release
- v1.0.1 - 2025/01/24: compiled with go v1.23.5
Author:
- Klaus Tockloth
Copyright:
- Copyright (c) 2022-2025 Klaus Tockloth
Contact:
Remarks:
- Lint: golangci-lint run --no-config --enable gocritic
- Vulnerability detection: govulncheck ./...
ToDo:
- NN
Links:
- https://docs.discourse.org/
- https://meta.discourse.org/t/available-settings-for-global-rate-limits-and-throttling/78612
- https://meta.discourse.org/t/api-can-pull-only-20-posts/163406/5
*/
package main
import (
"crypto/tls"
"flag"
"fmt"
"log"
"net/http"
"net/url"
"os"
"path/filepath"
"time"
)
// general program info
var (
progName = filepath.Base(os.Args[0])
progVersion = "v1.0.1"
progDate = "2025/01/24"
progPurpose = "Discourse Reader"
progInfo = "Retrieves data (e.g. site, category, topic) from Discourse forum."
userAgent = progName + "/" + progVersion
)
// httpClient represents HTTP client for communication with Discourse
var httpClient *http.Client
// command line parameters
var (
forum *string
category *int
topic *int
pages *int
query *string
output *string
userapikey *string
sleeptime *int
)
// (optional) environment variables
const (
evUSERAPIKEY = "USER_API_KEY"
evHTTPSPROXY = "HTTPS_PROXY"
)
/*
init initializes this program.
*/
func init() {
// initialize standard logger
log.SetFlags(log.Ldate | log.Ltime | log.Lshortfile)
}
/*
main starts this program.
*/
func main() {
fmt.Printf("Program:\n")
fmt.Printf(" Name : %s\n", progName)
fmt.Printf(" Release : %s - %s\n", progVersion, progDate)
fmt.Printf(" Purpose : %s\n", progPurpose)
fmt.Printf(" Info : %s\n\n", progInfo)
forum = flag.String("forum", "", "Discourse forum URL")
category = flag.Int("category", -1, "retrieve data (list of topics) for category with identifier")
topic = flag.Int("topic", -1, "retrieve data (list of posts) for topic with identifier")
pages = flag.Int("pages", 19, "pages of data to retrieve")
query = flag.String("query", "", "general data retrieve query (full URL)")
output = flag.String("output", "", "name of JSON output file")
userapikey = flag.String("userapikey", "", fmt.Sprintf("personal user API key (can also be set as environment var '%s')", evUSERAPIKEY))
sleeptime = flag.Int("sleeptime", 2, "sleep time in seconds before retrieving the next page (avoids user rate limiting)")
flag.Usage = printUsage
flag.Parse()
if flag.NFlag() == 0 {
printUsage()
}
if *output == "" {
log.Fatalf("Error: Option '-output=string' required.\n")
}
if *userapikey == "" {
*userapikey = os.Getenv(evUSERAPIKEY)
if *userapikey == "" {
log.Fatalf("Error: User-API-Key not found (neither as option '-userapikey=string' nor as environment variable '%s').\n", evUSERAPIKEY)
}
}
if *sleeptime < 0 {
log.Fatalf("Error: Option '-sleeptime=int' must be >= 0.\n")
}
// create HTTP transport object
httpTransport := &http.Transport{}
httpTransport.TLSClientConfig = &tls.Config{MinVersion: tls.VersionTLS12}
// get internet proxy from environment
internetProxy := os.Getenv(evHTTPSPROXY)
if internetProxy != "" {
internetProxyURL, err := url.Parse(internetProxy)
if err != nil {
log.Fatalf("Error: Could not parse internet proxy URL from environment, error=[%v], proxy=[%v].", err, internetProxy)
}
httpTransport.Proxy = http.ProxyURL(internetProxyURL)
}
// create HTTPS client
httpClient = &http.Client{
Transport: httpTransport,
Timeout: time.Second * time.Duration(30),
}
switch {
case *query != "":
*query = "https://" + *query
fmt.Printf("Requesting data for query %s ...\n", *query)
queryData, err := getQueryData()
if err != nil {
_ = os.WriteFile(*output, queryData, 0666)
log.Fatalf("Error: Error requesting query data, error=[%s].", err)
}
fmt.Printf("Writing data to file %v ...\n", *output)
err = os.WriteFile(*output, queryData, 0666)
if err != nil {
log.Fatalf("Error: Could not write output file, error=[%v].", err)
}
case *category > 0:
if *forum == "" {
log.Fatalf("Error: Option '-forum=string' required.\n")
}
fmt.Printf("Requesting data (list of topics) for category %d ...\n", *category)
categoryData, err := getCategoryData()
if err != nil {
_ = os.WriteFile(*output, []byte(categoryData), 0666)
log.Fatalf("Error: Error requesting category data, error=[%s].", err)
}
fmt.Printf("Writing data to file %v ...\n", *output)
err = os.WriteFile(*output, []byte(categoryData), 0666)
if err != nil {
log.Fatalf("Error: Could not write output file, error=[%v].", err)
}
case *topic > 0:
if *forum == "" {
log.Fatalf("Error: Option '-forum=string' required.\n")
}
fmt.Printf("Request data (list of posts) for topic %d ...\n", *topic)
topicData, err := getTopicData()
if err != nil {
_ = os.WriteFile(*output, []byte(topicData), 0666)
log.Fatalf("Error: Error requesting topic data, error=[%s].", err)
}
fmt.Printf("Writing data to file %v ...\n", *output)
err = os.WriteFile(*output, []byte(topicData), 0666)
if err != nil {
log.Fatalf("Error: Could not write output file, error=[%v].", err)
}
default:
log.Fatalf("Error: Something went wrong, nothing to do.")
}
fmt.Printf("Done.\n")
}
/*
printUsage prints the usage of this program.
*/
func printUsage() {
fmt.Printf("Usage:\n")
fmt.Printf(" %s -forum=string -query=string -category=int -topic=int -pages=int -output=string -userapikey -sleeptime=int\n", os.Args[0])
fmt.Printf("\nExamples for general query:\n")
fmt.Printf(" %s\n", os.Args[0])
fmt.Printf(" %s -query=community.openstreetmap.org/site.json -output=community.openstreetmap.org.json\n", os.Args[0])
fmt.Printf(" %s -query=community.openstreetmap.org/site.json -output=community.openstreetmap.org.json -userapikey=bd38603815e3f2562c3eb3988c69eb77\n", os.Args[0])
fmt.Printf(" %s -query=meta.discourse.org/site.json -output=meta.discourse.org.json\n", os.Args[0])
fmt.Printf(" %s -query=meta.discourse.org/session/current.json -output=session-current.json\n", os.Args[0])
fmt.Printf("\nExamples for category:\n")
fmt.Printf(" %s -forum=community.openstreetmap.org -category=56 -output=category-56.json\n", os.Args[0])
fmt.Printf(" %s -forum=community.openstreetmap.org -category=56 -output=category-56.json -userapikey=bd38603815e3f2562c3eb3988c69eb77\n", os.Args[0])
fmt.Printf(" %s -forum=meta.discourse.org -category=67 -pages=99 -sleeptime=6 -output=category-67.json\n", os.Args[0])
fmt.Printf("\nExamples for topic:\n")
fmt.Printf(" %s -forum=community.openstreetmap.org -topic=4120 -output=topic-4120.json\n", os.Args[0])
fmt.Printf(" %s -forum=community.openstreetmap.org -topic=4120 -pages=99 -sleeptime=6 -output=topic-4120.json\n", os.Args[0])
fmt.Printf(" %s -forum=community.openstreetmap.org -topic=4120 --output=topic-4120.json -userapikey=bd38603815e3f2562c3eb3988c69eb77\n", os.Args[0])
fmt.Printf(" %s -forum=meta.discourse.org -topic=112837 -output=topic-112837.json\n", os.Args[0])
fmt.Printf("\nOptions:\n")
flag.PrintDefaults()
fmt.Printf("\nRemarks:\n")
fmt.Printf(" - User API key can be set as environment variable [%s].\n", evUSERAPIKEY)
fmt.Printf(" - Internet proxy can be set as environment variable [%s].\n", evHTTPSPROXY)
fmt.Printf(" - Examples for Linux:\n")
fmt.Printf(" export %s=bd38603815e3f2562c3eb3988c69eb77\n", evUSERAPIKEY)
fmt.Printf(" export %s=http://user:[email protected]:8080\n", evHTTPSPROXY)
fmt.Printf(" - Examples for Windows:\n")
fmt.Printf(" set %s=bd38603815e3f2562c3eb3988c69eb77\n", evUSERAPIKEY)
fmt.Printf(" set %s=http://user:[email protected]:8080\n", evHTTPSPROXY)
fmt.Printf("\nRate limiting by forum service:\n")
fmt.Printf(" - This program does functionally no different than a user via a browser. However, the\n" +
" data is retrieved somewhat faster. This can lead to rejections (rate limiting) by the\n" +
" service. To prevent this, the program can pause between fetching pages. The pause time\n" +
" can be specified with the option '-sleeptime=int'.\n")
fmt.Printf(" - Typical user rate limit settings are:\n" +
" - requests per minute : 20\n" +
" - requests per day : 2880\n")
fmt.Printf("\n")
os.Exit(1)
}