-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtokenizer.c
114 lines (103 loc) · 3.22 KB
/
tokenizer.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* tokenizer.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: omoussao <[email protected]> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2022/02/02 16:42:41 by omoussao #+# #+# */
/* Updated: 2022/03/05 19:52:41 by omoussao ### ########.fr */
/* */
/* ************************************************************************** */
#include "minishell.h"
char *whitespaces(t_list *tokens, char *line)
{
int len;
len = 0;
while (line[len] && line[len] != '\n' && ft_isspace(line[len]))
len++;
if (len)
push_back(tokens, WSPACE, NULL);
return (line + len);
}
char *parentheses(t_list *tokens, char *line)
{
char c;
t_token token;
c = *line;
token = (c == '(') * OPAR + (c == ')') * CPAR;
push_back(tokens, token, gc_filter(ft_chardup(c), GC_TMP));
return (line + 1);
}
char *lookahead_state(t_list *tokens, char *line)
{
char c;
int len;
t_token token1;
t_token token2;
char val2[3];
c = *line;
val2[0] = c;
val2[1] = c;
token1 = (c == '|') * PIPE + (c == '&') * BG + (c == '>') * GREAT
+ (c == '<') * LESS + (c == ';') * FG;
token2 = (c == '|') * OR + (c == '&') * AND + (c == '>') * DGREAT
+ (c == '<') * DLESS + (c == ';') * DSEMI;
len = 1;
while (line[len] == c)
len++;
line += len;
while (len > 1)
{
push_back(tokens, token2, gc_filter(ft_strndup(val2, 3), GC_TMP));
len -= 2;
}
if (len)
push_back(tokens, token1, gc_filter(ft_chardup(c), GC_TMP));
return (line);
}
char *normal_state(t_list *tokens, char *line)
{
int len;
char *word;
len = 0;
while (!ft_strchr("\'\"()$<>&|;", line[len]) && !ft_isspace(line[len]))
len++;
if (len)
{
word = gc_filter(ft_strndup(line, len + 1), GC_TMP);
if (word[0] == '~' && (!word[1] || word[1] == '/'))
push_back(tokens, TILDE, word);
else if (ft_strchr(word, '/') || !(ft_strchr(word, '*') || \
ft_strchr(word, '?')))
push_back(tokens, WORD, word);
else
push_back(tokens, WILDC, word);
}
return (line + len);
}
t_list *tokenizer(char *cmdline)
{
t_list *tokens;
tokens = new_list();
push_back(tokens, CMDBEGIN, NULL);
while (*cmdline && *cmdline != '#' && *cmdline != '\n')
{
if (ft_isspace(*cmdline))
cmdline = whitespaces(tokens, cmdline);
else if (*cmdline == '\'')
cmdline = single_quote(tokens, cmdline + 1);
else if (*cmdline == '\"')
cmdline = double_quote(tokens, cmdline + 1);
else if (*cmdline == '$')
cmdline = dollar(tokens, cmdline);
else if (*cmdline && ft_strchr("|&<>;", *cmdline))
cmdline = lookahead_state(tokens, cmdline);
else if (*cmdline == '(' || *cmdline == ')')
cmdline = parentheses(tokens, cmdline);
else
cmdline = normal_state(tokens, cmdline);
}
push_back(tokens, ENDOFCMD, gc_filter(ft_strdup("newline"), GC_TMP));
return (tokens);
}