RegEx.txt

cat words.txt                         Just print the whole file

---- Simple Use & Flags ----

grep -E 'Sue' words.txt               all lines where 'Sue' occurs
grep -E -c 'Sue' words.txt            counts number of matched lines
grep -E -n 'Sue' words.txt            now with line numbers
grep -E -o 'Sue' words.txt            just print the match - 'Sue' each on different line
grep -E -v 'Sue' words.txt            all lines where 'Sue' does NOT occur

---- Anchors ----

grep -E '^Sue' words.txt              lines that START with 'Sue'
grep -E 'Sue$' words.txt              lines that END with 'Sue'

---- Special characters - ( ?  \  .  [  ]  ^  $ * ) ----

grep -E '*' words.txt                 not really what you expect
grep -E '\*' words.txt                Special characters (*) must be '\'

---- Quantifiers ----

grep -E 'a b. c' words.txt            '.' any ONE character
grep -E 'a b.* c' words.txt           * = 0 or more (of the previous character)
grep -E 'a b.? c' words.txt           ? = 0 or 1 (of the previous character)
grep -E 'a b.+ c' words.txt           + = 1 or more (of the previous character)
grep -E 'a b.{3} c' words.txt         {3} exactly 3 (of the previous character)
grep -E 'a b.{2,3} c' words.txt       {2,3} between 2 and 3 (of the previous character)

---- Matches are greedy - They will match as much as possible ----

grep -E -o 'TH.*S' words.txt          Matches are greedy
grep -E -o 'TH[^S]*S' words.txt       [^S] everything that is NOT 'S'
grep -E -o '<a.*>' words.txt          probably not what you want
grep -E -o '<a[^>]*>' words.txt       pick out the 'anchor' tag

---- Ranges of Characters [] ----

grep -E 'a b[a-z]{2} c' words.txt     [a-z] - any lower case letter
grep -E 'a b[0-9]{2} c' words.txt     [0-9] any number
grep -E 'a b[0-9,a-z]{3} c' words.txt     any number or letter
grep -E '[0-9]{3}-[0-9]{4}' words.txt      Phone Numbers
grep -E '[0-9]{3}-[0-9]{2}-[0-9]{4}' words.txt       SSN

---- Words and boundries ----

grep -E ' Bob ' words.txt             Want single word 'Bob' - Does not always work.
grep -E '\bBob\b' words.txt           Want single word 'Bob' - Use word boundaries
grep -E 'a \b\w+\b c' words.txt       \b\w+\b will match a single word

---- Groupings () and | ----

grep -E '(Bob|Eve)' words.txt           | = or
grep -E '(Eve ){3}' words.txt         The string 'Eve ' 3 times in a row
grep -E '(ba){2}' words.txt           the string 'ba' 2 times in a row
grep -E 'a b(1|a) c' words.txt
grep -E '(From|Subject|Date):' words.txt             () groupings

sed 's/Sue/*SUE*/g' words.txt         simple global subtitution (g = global)
sed 's/[0-9]/(&)/g' words.txt         sub with backreference '&'
sed '1,22d' words.txt                  deleting lines

tr - transliterate

cat words.txt | tr A-Z a-z            quick upper/lowercase exchange
cat words.txt | tr Sa  xy             change all S to x and a to y

uniq - report or omit repeated lines  (-i ignore case, -c count)
uniq -c -i words.txt

cat words.txt | tr A-Z a-z | grep -o -E '[a-z]' | sort | uniq -c -i | sort -n

cat words.txt | tr A-Z a-z | grep -o -E '\b\w{4}\b' | sort | uniq -c -i | sort -n