-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathregular_expressions_demo.py
163 lines (132 loc) · 5.16 KB
/
regular_expressions_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import re
# # Non-capturing and Named Groups
# #https://docs.python.org/3/howto/regex.html#non-capturing-and-named-groups
# ret_val = re.search("(?:P<my_name>[abc])+",'abc')
# print('ret_val:',ret_val)
# print(ret_val.group('my_name'))
# for i in ret_val.groups():
# print(i)
# ret_val = re.search("([abc]+)",'abc')
# print(ret_val.group(1))
# {m,n}? and {m,n}+
#https://docs.python.org/3/library/re.html#regular-expression-syntax
# ret_val = re.search("(a{3,5}+)(aa)",'aaaaaa')
# print(ret_val)
# print(ret_val.group(1))
# print(ret_val.group(2))
# ret_val = re.split(r'\W+', 'Words, words, words.')
# print(ret_val)
# example to show what groups() does and what group returns
# groups() -->Return a tuple containing all the subgroups of the match, from 1 up to however many groups are in the pattern.
str = '[email protected]'
pattern = '\d+.*'
ret_val = re.search(pattern=pattern,string=str)
print(ret_val.group(1))
for i in ret_val.groups():
print(i)
# #------------------------------------
# # DEMO
# #------------------------------------
# import re
# string0 = "abcdefghijklmnopqrstuvwxyz"
# string1 = "abcdefghijklmnopqrstuvwxyz1234567890"
# string2 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
# string3 = "0123456789"
# string4 = "HELLO HEL"
# string5 = "hello hel"
# string6 = "hello123"
# string14 = "hello123h"
# string7 = "123hello"
# string8 = "__"
# string9 = "......"
# string10 = "**************"
# string11 = "```````````````"
# string12 = "!!!!!!!!!!!"
# string13 = ""
# string15 = "abcdef"
# string16 = "a430928409238409234"
# string17 = "a4"
# my_list = [string0,string1,string2,string3,string4,string5,string6,string14,string7,string8,string9,string10,string11,string12,string13,string15,string16,string17]
# for elem in my_list:
# test_string = elem
# pattern = '^[a-z](([a-zA-Z_.*`! ]{5})|([0-9]*))$'
# #\w - Matches any alphanumeric character (digits and alphabets).
# #\d - Matches any decimal digit. Equivalent to [0-9]
# #\s - Matches where a string contains any whitespace character.
# # Equivalent to [ \t\n\r\f\v].
# result = re.search(pattern, test_string)
# print(type(result))
# if result:
# print("The groups returned from search are ", result.groups())
# print("Group 0 result " , result.group(0))
# try:
# print("Group 1 result " , result.group(1))
# except:
# pass
# try:
# print("Group 2 result " , result.group(2))
# except:
# pass
# try:
# print("Group 3 result " , result.group(3))
# except:
# pass
# [] . ^ $ * + ? {} () \ |
"""
--------------------------------
Exercises -- Regular Expressions
-------------------------------
Given the list of strings as input :
Kane#[email protected]
1) provide me the list of emails that do have special characters of #~`!
2) provide me the list of emails that start with numbers
3) provide me the list of emails that start with numbers followed by an underscore
4) provide me the list of emails that start with numbers followed by an underscore or small case characters
5) provide me the list of emails that start with numbers followed by an underscore or small case characters or large case characters
6) Provide me list of emails with only numbers before the @
7) Provide me list of emails with numbers anywhere before the @
"""
#---------------------------------------------
#*** Solutions to above Exercise ***
#---------------------------------------------
import re
string1 = '[email protected]'
string2 = '[email protected]'
string3 = '[email protected]'
string4 = '[email protected]'
string5 = '[email protected]'
string6 = '[email protected]'
string7 = '[email protected]'
string8 = '[email protected]'
string9 = '[email protected]'
string10 = 'Kane#[email protected]'
my_list = [string1,string2,string3,string4,string5,string6,string7,string8,string9,string10]
# 1) provide me the list of emails that do have special characters of #~`!
# pattern = '.*[#~`!].*'
# 2) provide me the list of emails that start with numbers
# pattern = '^\d+.*'
# 3) provide me the list of emails that start with numbers followed by an underscore
# pattern = '^\d+_.*'
# 4) provide me the list of emails that start with numbers followed by an underscore or small case characters
# pattern = '^\d+[_a-z]+.*'
# 5) provide me the list of emails that start with numbers followed by an underscore or small case characters or large case characters
# pattern = '^\d+[_a-zA-Z]+.*'
# 6) Provide me list of emails with only numbers before the @
# pattern = '^\d+@.*'
# 7) Provide me list of emails with numbers anywhere before the @
pattern = '.*\d+@.*'
print(f"Following are the email ids matching the pattern {pattern}")
for elem in my_list:
test_string = elem
result = re.search(pattern, test_string)
if result:
print("Email Id :" , result.group(0))