Regex

https://www.regexpal.com/

summary

import re
re.compile(pattern) 
reg_obj.search(string)
match_obj.group()

match

Things

Where

How many times

details

import re

#pattern \d\d\d - three digits
#add r to show a raw string, which does not interpret \ as an escape
#otherwise would have to use \\d for digit

regex_object = re.compile(r'\d\d') #compile  - set pattern
match_object = regex_object.search('my number is 333') #search string
result = match_object.group() #extract result
print('found this number: ' + result)

regex_object2 = re.compile(r'(\d\d\d)-(\d\d\d)') #matching groups
match_object2 = regex_object2.search('my number is 333-444') #search string
result_all = match_object2.groups() #note plural groups; gives tuple
result_0 = match_object2.group(0)  #0 for whole result or leave empty
result_1 = match_object2.group(1)  #1 for first match group
result_2 = match_object2.group(2)  #2 for second match group
print(result_all, result_0, result_1, result_2)
firstgroup, secondgroup = result_all  #assign names to the parts of the tuple
print(firstgroup)

#r means raw string, but it doesn't ignore brackets
#so if you need to find brackets, escape these with backslashes : \( and \)
regex_with_brackets = re.compile(r'(\(\d\d\))')

# pipe means search this or that
regex_tree = re.compile(r'Pine|Wattle')
mo_tree = regex_tree.search('there is a Pine and a Wattle') #will find first
print(mo_tree.group(0))

#find prefix-thing (first ocurrence)
alans = re.compile(r'Alan(Partridge|key)')
print(alans)
alans_mo = alans.search('AlanPartridge is here but not Alankey') #note only if no space between
print(alans_mo.groups())  #('Partridge',) does not find 'key'
print(alans_mo.group(0))  #whole result: AlanPartridge
print(alans_mo.group(1))  #first group out of the groups: Partridge, no second group

#match an optional bit of text
chair = re.compile(r'Chair(wo)?man') #? matches 0 or 1 instance of wo
# * matches 0 or more
# + matches 1 or more
# (wo){3} matches wo 3 times
# (wo){3,5} matches wo 3-5 times, returns max
# (wo){3,5}? matches wo 3-5 times, returns min
mo_chair = chair.search('The Chairwoman')
print(mo_chair.group())
mo_chair = chair.search('The Chairman')
print(mo_chair.group())

#find all
regex_object = re.compile(r'\d\d-\d\d') #no groups
found_object = regex_object.findall('my number is 33-44 and 66-77') #findall instead of search
print(found_object) # a list of strings found
regex_object = re.compile(r'(\d\d)-(\d\d)') #HAS GROUPS
found_object = regex_object.findall('my number is 33-44 and 66-77') #findall instead of search
print(found_object) # a list of TUPLES found, each tuple is group, group

#Things you can match:
# digits, w0rd_stuff, space stuff - \d \w \s
# not these - \D \W \S
# match 0+, 0-1, 1+, 3x, 3-5x
# match specific [bunch of letters]
# match ranges [0-5] [a-z] [^ not this stuff] ^match_must_start_here  must_end_here$
# match anything 1 time except \n - dot
# match anything (.*)  max
# match anything (.*?) min

#can pass more args to the compile() function - e.g. re.compile('string', arg)
# e.g. arg is re.DOTALL -- will match past \n