-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathRegEx.txt
73 lines (51 loc) · 3.07 KB
/
RegEx.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
cat words.txt Just print the whole file
---- Simple Use & Flags ----
grep -E 'Sue' words.txt all lines where 'Sue' occurs
grep -E -c 'Sue' words.txt counts number of matched lines
grep -E -n 'Sue' words.txt now with line numbers
grep -E -o 'Sue' words.txt just print the match - 'Sue' each on different line
grep -E -v 'Sue' words.txt all lines where 'Sue' does NOT occur
---- Anchors ----
grep -E '^Sue' words.txt lines that START with 'Sue'
grep -E 'Sue$' words.txt lines that END with 'Sue'
---- Special characters - ( ? \ . [ ] ^ $ * ) ----
grep -E '*' words.txt not really what you expect
grep -E '\*' words.txt Special characters (*) must be '\'
---- Quantifiers ----
grep -E 'a b. c' words.txt '.' any ONE character
grep -E 'a b.* c' words.txt * = 0 or more (of the previous character)
grep -E 'a b.? c' words.txt ? = 0 or 1 (of the previous character)
grep -E 'a b.+ c' words.txt + = 1 or more (of the previous character)
grep -E 'a b.{3} c' words.txt {3} exactly 3 (of the previous character)
grep -E 'a b.{2,3} c' words.txt {2,3} between 2 and 3 (of the previous character)
---- Matches are greedy - They will match as much as possible ----
grep -E -o 'TH.*S' words.txt Matches are greedy
grep -E -o 'TH[^S]*S' words.txt [^S] everything that is NOT 'S'
grep -E -o '<a.*>' words.txt probably not what you want
grep -E -o '<a[^>]*>' words.txt pick out the 'anchor' tag
---- Ranges of Characters [] ----
grep -E 'a b[a-z]{2} c' words.txt [a-z] - any lower case letter
grep -E 'a b[0-9]{2} c' words.txt [0-9] any number
grep -E 'a b[0-9,a-z]{3} c' words.txt any number or letter
grep -E '[0-9]{3}-[0-9]{4}' words.txt Phone Numbers
grep -E '[0-9]{3}-[0-9]{2}-[0-9]{4}' words.txt SSN
---- Words and boundries ----
grep -E ' Bob ' words.txt Want single word 'Bob' - Does not always work.
grep -E '\bBob\b' words.txt Want single word 'Bob' - Use word boundaries
grep -E 'a \b\w+\b c' words.txt \b\w+\b will match a single word
---- Groupings () and | ----
grep -E '(Bob|Eve)' words.txt | = or
grep -E '(Eve ){3}' words.txt The string 'Eve ' 3 times in a row
grep -E '(ba){2}' words.txt the string 'ba' 2 times in a row
grep -E 'a b(1|a) c' words.txt
grep -E '(From|Subject|Date):' words.txt () groupings
sed 's/Sue/*SUE*/g' words.txt simple global subtitution (g = global)
sed 's/[0-9]/(&)/g' words.txt sub with backreference '&'
sed '1,22d' words.txt deleting lines
tr - transliterate
cat words.txt | tr A-Z a-z quick upper/lowercase exchange
cat words.txt | tr Sa xy change all S to x and a to y
uniq - report or omit repeated lines (-i ignore case, -c count)
uniq -c -i words.txt
cat words.txt | tr A-Z a-z | grep -o -E '[a-z]' | sort | uniq -c -i | sort -n
cat words.txt | tr A-Z a-z | grep -o -E '\b\w{4}\b' | sort | uniq -c -i | sort -n