-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtokenizer.c
130 lines (118 loc) · 2.85 KB
/
tokenizer.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include "tokenizer.h"
struct tokens {
size_t tokens_length;
char **tokens;
size_t buffers_length;
char **buffers;
};
static void *vector_push(char ***pointer, size_t *size, void *elem) {
*pointer = (char**) realloc(*pointer, sizeof(char *) * (*size + 1));
(*pointer)[*size] = elem;
*size += 1;
return elem;
}
static void *copy_word(char *source, size_t n) {
source[n] = '\0';
char *word = (char *) malloc(n + 1);
strncpy(word, source, n + 1);
return word;
}
struct tokens *tokenize(const char *line) {
if (line == NULL) {
return NULL;
}
static char token[4096];
size_t n = 0, n_max = 4096;
struct tokens *tokens;
size_t line_length = strlen(line);
tokens = (struct tokens *) malloc(sizeof(struct tokens));
tokens->tokens_length = 0;
tokens->tokens = NULL;
tokens->buffers_length = 0;
tokens->buffers = NULL;
const int MODE_NORMAL = 0,
MODE_SQUOTE = 1,
MODE_DQUOTE = 2;
int mode = MODE_NORMAL;
for (unsigned int i = 0; i < line_length; i++) {
char c = line[i];
if (mode == MODE_NORMAL) {
if (c == '\'') {
mode = MODE_SQUOTE;
} else if (c == '"') {
mode = MODE_DQUOTE;
} else if (c == '\\') {
if (i + 1 < line_length) {
token[n++] = line[++i];
}
} else if (isspace(c)) {
if (n > 0) {
void *word = copy_word(token, n);
vector_push(&tokens->tokens, &tokens->tokens_length, word);
n = 0;
}
} else {
token[n++] = c;
}
} else if (mode == MODE_SQUOTE) {
if (c == '\'') {
mode = MODE_NORMAL;
} else if (c == '\\') {
if (i + 1 < line_length) {
token[n++] = line[++i];
}
} else {
token[n++] = c;
}
} else if (mode == MODE_DQUOTE) {
if (c == '"') {
mode = MODE_NORMAL;
} else if (c == '\\') {
if (i + 1 < line_length) {
token[n++] = line[++i];
}
} else {
token[n++] = c;
}
}
if (n + 1 >= n_max) abort();
}
if (n > 0) {
void *word = copy_word(token, n);
vector_push(&tokens->tokens, &tokens->tokens_length, word);
n = 0;
}
return tokens;
}
size_t tokens_get_length(struct tokens *tokens) {
if (tokens == NULL) {
return 0;
} else {
return tokens->tokens_length;
}
}
char *tokens_get_token(struct tokens *tokens, size_t n) {
if (tokens == NULL || n >= tokens->tokens_length) {
return NULL;
} else {
return tokens->tokens[n];
}
}
void tokens_destroy(struct tokens *tokens) {
if (tokens == NULL) {
return;
}
for (int i = 0; i < tokens->tokens_length; i++) {
free(tokens->tokens[i]);
}
for (int i = 0; i < tokens->buffers_length; i++) {
free(tokens->buffers[i]);
}
if (tokens->tokens) {
free(tokens->tokens);
}
free(tokens);
}