-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathConfigFileReference.yaml
113 lines (76 loc) · 3.59 KB
/
ConfigFileReference.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# Log level (overwrittent by the command line option if specified)
# Valid levels are : "debug", "info", "warning", "error" or "none" (default to "info" if unspecified)
logLevel: <log level>
# First we define the list of destinations for our data
destinations:
<destination name>: # Arbitrary destination name
# Type of destinations that we want to configure
# Currently available options :
# - console : send the output to the console
# - influxdb : send the data to an InfluxDB instance
type: <type of destination>
# The other field required depend on the type of destination
# Example for console
console:
type: console # Print to console
# Nothing else to specify
# Example for influxdb
influxdb1:
type: influxdb # The type, here influxdb
url: https://<influx host>:8086 # The influxDB host or IP address
defaultBucket: <bucket name> # The default bucket to use for writing the data
token: <token> # The access token with permission to write to the defaultBucket
# Scraping jobs definitions
jobs:
<job name>: # Arbitrary job name
# Define when the job will run
schedule:
# Specify when the job should run with a cron syntax
cron: "* * * * *"
# By default all jobs will be started when added, unless autostart is set to false
autostart: false
# Each job has one input field that defines what endpoint to fetch for this job
input:
# Specified the URL to fetch
url: <url to fetch>
# Optionally specify the content type if you want to overwrite the reply content type
contentType: <string>
# Scraping template used to extract the data out of the HTML pages
template:
<field name 1>: <css selector>
<field name 2>:
<field name 3>: <css selector>
# List of transformations to apply to the data after fetching
transformations: []
# Each job can define one or more outputs
outputs:
# Specify the destination name
- to: <destination name>
# Specific options for the destination
options:
<option name>: <option value>
# List of transformations to apply to the data before sending it to the destination
transformations: []
# Example for console type destination (does not require any options)
- to: console
# Example for influxdb
- to: influxdb1
# Each influxdb outputs will create a data point in the database.
# The options allows us to define what will be the data point.
options:
# Name of the measurement
measurement: <measurement name>
# Tags to apply to the point. Tags MUST be string values.
tags:
# Tags can be litteral strings (static tagging)
<tag name 1>: LitteralTagValue
# Tags can be specified by a json path to a value in the json data (dynamic tagging)
<tag name 2>: ${<json path to the value in the json data>}
# Example to set a tag 'job_name' to the actual job name fetched from the metadata
job_name: ${__job_name}
# Fields to apply to the point
# The field type is automatically set to string or float
fields:
<field name 1>: LitteralFieldValue # Litteral string value
<field name 2>: 1.123 # Litteral float value
<field name 3>: ${<json path to the data>} # json path