forked from buildkite/terminal-to-html
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser.go
248 lines (225 loc) · 7.51 KB
/
parser.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
package terminal
import (
"unicode"
"unicode/utf8"
)
const (
MODE_NORMAL = iota
MODE_ESCAPE = iota
MODE_CONTROL = iota
MODE_OSC = iota
MODE_CHARSET = iota
MODE_APC = iota
)
// Stateful ANSI parser
type parser struct {
mode int
screen *screen
ansi []byte
cursor int
escapeStartedAt int
instructions []string
instructionStartedAt int
}
/*
* How this state machine works:
*
* We start in MODE_NORMAL. We're not inside an escape sequence. In this mode
* most input is written directly to the screen. If we receive a newline,
* backspace or other cursor-moving signal, we let the screen know so that it
* can change the location of its cursor accordingly.
*
* If we're in MODE_NORMAL and we receive an escape character (\x1b) we enter
* MODE_ESCAPE. The following character could start an escape sequence, a
* control sequence, an operating system command, or be invalid or not understood.
*
* If we're in MODE_ESCAPE we look for three possible characters:
*
* 1. For `[` we enter MODE_CONTROL and start looking for a control sequence.
* 2. For `]` we enter MODE_OSC and look for an operating system command.
* 3. For `(` or ')' we enter MODE_CHARSET and look for a character set name.
* 4. For `_` we enter MODE_APC and parse the rest of the custom control sequence
*
* In all cases we start our instruction buffer. The instruction buffer is used
* to store the individual characters that make up ANSI instructions before
* sending them to the screen. If we receive neither of these characters, we
* treat this as an invalid or unknown escape and return to MODE_NORMAL.
*
* If we're in MODE_CONTROL, we expect to receive a sequence of parameters and
* then a terminal alphabetic character looking like 1;30;42m. That's an
* instruction to turn on bold, set the foreground colour to black and the
* background colour to green. We receive these characters one by one turning
* the parameters into instruction parts (1, 30, 42) followed by an instruction
* type (m). Once the instruction type is received we send it and its parts to
* the screen and return to MODE_NORMAL.
*
* If we're in MODE_OSC, we expect to receive a sequence of characters up to
* and including a bell (\a). We skip forward until this bell is reached, then
* send everything from when we entered MODE_OSC up to the bell to
* parseElementSequence and return to MODE_NORMAL.
*
* If we're in MODE_CHARSET we simply discard the next character which would
* normally designate the character set.
*/
func parseANSIToScreen(s *screen, ansi []byte) {
p := parser{mode: MODE_NORMAL, ansi: ansi, screen: s}
p.mode = MODE_NORMAL
length := len(p.ansi)
for p.cursor = 0; p.cursor < length; {
char, charLen := utf8.DecodeRune(p.ansi[p.cursor:])
switch p.mode {
case MODE_ESCAPE:
// We've received an escape character but aren't inside an escape sequence yet
p.handleEscape(char)
case MODE_CONTROL:
// We're inside a control sequence - figure out its code and its instructions.
p.handleControlSequence(char)
case MODE_OSC:
// We're inside an operating system command, capture until we hit a bell character
p.handleOperatingSystemCommand(char)
case MODE_CHARSET:
// We're inside a charset sequence, capture the next character.
p.handleCharset(char)
case MODE_APC:
// We're inside a custom escape sequence
p.handleApplicationProgramCommand(char)
case MODE_NORMAL:
// Outside of an escape sequence entirely, normal input
p.handleNormal(char)
}
p.cursor += charLen
}
}
func (p *parser) handleCharset(char rune) {
p.mode = MODE_NORMAL
}
func (p *parser) handleOperatingSystemCommand(char rune) {
if char != '\a' {
return
}
p.mode = MODE_NORMAL
// Bell received, stop parsing our potential image
image, err := parseElementSequence(string(p.ansi[p.instructionStartedAt:p.cursor]))
if image == nil && err == nil {
// No image & no error, nothing to render
return
}
ownLine := image == nil || image.elementType != ELEMENT_LINK
if ownLine {
// Images (or the error encountered) should appear on their own line
if p.screen.x != 0 {
p.screen.newLine()
}
p.screen.clear(p.screen.y, screenStartOfLine, screenEndOfLine)
}
if err != nil {
p.screen.appendMany([]rune("*** Error parsing custom element escape sequence: "))
p.screen.appendMany([]rune(err.Error()))
} else {
p.screen.appendElement(image)
}
if ownLine {
p.screen.newLine()
}
}
// handleApplicationProgramCommand is called for each character consumed while
// in MODE_APC, but does nothing until the APC is terminated with BEL (0x07).
//
// Technically an APC sequence is terminated by String Terminator (ST; 0x9C):
// https://en.wikipedia.org/wiki/C0_and_C1_control_codes#C1_controls
//
// But:
// > For historical reasons, Xterm can end the command with BEL as well as the standard ST
// https://en.wikipedia.org/wiki/ANSI_escape_code#OSC_(Operating_System_Command)_sequences
//
// .. and this is how iTerm2 implements inline images:
// > ESC ] 1337 ; key = value ^G
// https://iterm2.com/documentation-images.html
//
// Buildkite's ansi timestamper does the same, and we don't _expect_ to be
// seeing any other APCs that could be ST-terminated... 🤞🏼
func (p *parser) handleApplicationProgramCommand(char rune) {
// check for APC terminator (\a = 0x07 = \x07 = BEL)
if char != '\x07' {
return // APC continues...
}
// APC terminator has been received; return to normal mode and handle the APC...
p.mode = MODE_NORMAL
sequence := string(p.ansi[p.instructionStartedAt:p.cursor])
// this might be a Buildkite Application Program Command sequence...
data, err := parseApcBk(sequence)
if err != nil {
p.screen.appendMany([]rune("*** Error parsing Buildkite APC ANSI escape sequence: "))
p.screen.appendMany([]rune(err.Error()))
return
}
if data != nil {
p.screen.setnxLineMetadata(bkNamespace, data)
}
}
func (p *parser) handleControlSequence(char rune) {
char = unicode.ToUpper(char)
switch char {
case '?', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
// Part of an instruction
case ';':
p.addInstruction()
p.instructionStartedAt = p.cursor + utf8.RuneLen(';')
case 'Q', 'J', 'K', 'G', 'A', 'B', 'C', 'D', 'M':
p.addInstruction()
p.screen.applyEscape(char, p.instructions)
p.mode = MODE_NORMAL
case 'H', 'L':
// Set/reset mode (SM/RM), ignore and continue
p.mode = MODE_NORMAL
default:
// unrecognized character, abort the escapeCode
p.cursor = p.escapeStartedAt
p.mode = MODE_NORMAL
}
}
func (p *parser) handleNormal(char rune) {
switch char {
case '\n':
p.screen.newLine()
case '\r':
p.screen.carriageReturn()
case '\b':
p.screen.backspace()
case '\x1b':
p.escapeStartedAt = p.cursor
p.mode = MODE_ESCAPE
default:
p.screen.append(char)
}
}
func (p *parser) handleEscape(char rune) {
switch char {
case '[':
p.instructionStartedAt = p.cursor + utf8.RuneLen('[')
p.instructions = make([]string, 0, 1)
p.mode = MODE_CONTROL
case ']':
p.instructionStartedAt = p.cursor + utf8.RuneLen('[')
p.mode = MODE_OSC
case ')', '(':
p.instructionStartedAt = p.cursor + utf8.RuneLen('(')
p.mode = MODE_CHARSET
case '_':
p.instructionStartedAt = p.cursor + utf8.RuneLen('[')
p.mode = MODE_APC
case 'M':
p.screen.revNewLine()
p.mode = MODE_NORMAL
default:
// Not an escape code, false alarm
p.cursor = p.escapeStartedAt
p.mode = MODE_NORMAL
}
}
func (p *parser) addInstruction() {
instruction := string(p.ansi[p.instructionStartedAt:p.cursor])
if instruction != "" {
p.instructions = append(p.instructions, instruction)
}
}