2018-09-11正規表現ノート

8692 ワード

正規表現は、文字列がルールを満たすかどうかを確認するためのツールです.たとえば、1.口座番号は携帯番号/メールアドレス/何位からなるかなど...[原句]汚い言葉を*などに置き換える...
1.正規文法2.pythonの正規表現のサポートには、re fullmatch(正規表現、文字列):文字列が正規表現ルールに合致するかどうかを判断する組み込みモジュールがあります.
from re import fullmatch
# 1) .       
#        ,                
re_str = r'.'
result = fullmatch(re_str,'y')
print(result)
#        ,                  
re_str =r'..'
result = fullmatch(re_str,'ye')
print(result)
#        ,      abc,         
re_str = r'abc.'
result = fullmatch(re_str,'abc*')
print(result)
# 2) \w          
#        ,      abc,               
re_str = r'abc\w\w'
result = fullmatch(re_str,'abc_2')
print(result)
# 3)\s      (     、                  )
#        ,                            
re_str = r'\w\w\w\s.'
result = fullmatch(re_str,'yey u')
print(result)
# 4) \d        
#        ,                
re_str = r'\d\d\d.'
result = fullmatch(re_str,'123y')
print(result)
# 5)\b         (     、     、            )
#\b                 \b   
re_str = r'good\b\sgood'
result = fullmatch(re_str,'good good')
print(result)
# 5) \b          (     、     、             )
#   :     \b,         ,         \b         
#             when,      ,     where。     n         
re_str = r'when\b\swhere'
result = fullmatch(re_str, 'when where')
print(result)

re_str = r'abc\b'
result = fullmatch(re_str, 'abc')
print(result)

# 6) ^                   
#        ,           
re_str = r'^\d\d'
result = fullmatch(re_str, '23')
print(result)

result = search(r'^\d\d', '99abc11hkj')
print(result)

# 7) $                   
#        a  ,  a         
re_str = r'a\d$'
result = fullmatch(re_str, 'a8')
print(result)

result = search(re_str, 'a9aaa8')
print(result)


# 8) \W        、  、      
re_str = r'\W\w'
result = fullmatch(re_str, '!a')
print(result)

# 9) \S        
re_str = r'\S\w\w\w'
result = fullmatch(re_str, '@a2h')
print(result)

# 10) \D          
# 11) \B        

# =============================    =============================
# 1) []              
#   :            
#     3     ,    a  b  c,      
re_str = r'[abc+]\d\d'
result = fullmatch(re_str, '+67')
print(result)

# -             :                     。      '-'    ,         
# [1-8]:        :'1','2','3','4','5','6','7','8'
# [-18]  [18-]:        '1','8','-'
#              1-8    ,         
re_str = r'[1-8][a-z][a-z]'
result = fullmatch(re_str, '2hn')
print(result)

re_str = r'[!+-][A-Z]'
result = fullmatch(re_str, '-D')
print(result)

# 2) [^   ]     []           
#           ,              ,    abc
re_str = r'[^A-Z\d]abc'
result = fullmatch(re_str, '#abc')
print(result)

# 3) *   0     
#        ,     b,b    0     a
re_str = r'a*b'  # 'b', 'ab','aab', 'aaab', 'aaaab' .....
print(fullmatch(re_str, 'aaaaab'))

re_str = r'\d*'
re_str = r'[abc]*'   # '','a', 'ab', 'aa', 'abccabc' .....

# 4) +   1     (    )
#                  
re_str = r'[1-9]+\d*'  # 10, 11, 100 ,1000
print(fullmatch(re_str, '1010'))

# 5) ?   0     
re_str = r'@?\d+'
print(fullmatch(re_str, '@16723'))

#             (         )
# +200, -120, 99, -1, 3, +4
re_str = r'[+-]?[1-9]+\d*'
print(fullmatch(re_str, '200'))

# 6) {N}   N 
re_str = r'\d{3}'      #   3      
re_str = r'[a-zA-Z]{3}'   #   3      
print(fullmatch(re_str, 'aHh'))

# 7) {N,}     N 
re_str = r'\w{4,}'
print(fullmatch(re_str, 'hanc_123'))

# 8) {,N}     N 
re_str = r'a{,4}b'  # 'b', 'ab', 'aab', 'aaab','aaaab'
print(fullmatch(re_str, 'aaaab'))

# 9) {M,N}     M ,  N  (N>M)
re_str = r'a{2,4}b'  # 'aab', 'aaab', 'aaaab'
print(fullmatch(re_str, 'aaab'))

# ==============================3.     ======================
# 1) |   (         or)
#                    
re_str = r'[a-zA-Z]{3}|\d{3}'
print(fullmatch(re_str, 'abc'))

# '\d{3}[a-z]{2}'          , '[A-F]{3}'         
re_str = r'\d{3}[a-z]{2}|[A-F]{3}'
print(fullmatch(re_str, 'ABC'))

#   :           :    |       ,            ,                 
#   :        ,              (       )
# 'abc12.5hhh60,30.2kkk9nn0.12'
# 100, 89.89, 20.12, 0.23
# re_str = r'[1-9]\d*[.]?\d*|0[.]\d+'
# # print()
re_str = r'[1-9]\d*|\d+[.]\d+'
re_str = r'\d+[.]\d+|[1-9]\d*'
print(findall(re_str, 'abc12.5hhh60,30.2kkk9nn0.12'))

# 2)   
# a.  
#    ()          
#           3    ac23bn45hj34
re_str = r'([a-z]{2}\d{2}){3}'
print(fullmatch(re_str, 'ac23bn45hj34'))

#        ,                     
re_str = r'(\d[a-z])+'
print(fullmatch(re_str, '9a2s3k4k9o'))

# b.  
#     \                  。              
re_str = r'(\d{2}[A-Z])=%\1\1'
print(fullmatch(re_str,'23B=%23B23B'))

re_str = r'(\d{3})-(\w{2})\1{2}\2'
print(fullmatch(re_str,'123-aa123123aa'))

# c.  
#              ,   ()    。   findall   
re_str = r'a(\d{3})b'
print(fullmatch(re_str, 'a786b'))
print(findall(re_str, 'a786b'))




#   :
#         、            6~20     
# QQ  5~12         0
user_name = input('   :')
qq = input('QQ:')

if fullmatch(r'\w{6,20}', user_name):
    print('     ')
else:
    print('      ')

if fullmatch(r'[1-9]\d{4,11}', qq):
    print('QQ   ')
else:
    print('QQ    ')

#   :       ,                

##      
import re


1.
                          。
 python       r         ,            

2.
       ,               \       
###\+  \.  \*  \?   \\ \(  \)   \[  \]   \^   \$  \|

  :
a. -                      
b.         []         ,    -                  
c. \           , ^             




re_str = r'a+' print(re.fullmatch(re_str, 'a+'))
re_str = r'+a' print(re.fullmatch(re_str, '+a'))
re_str = r'\w-a' print(re.fullmatch(re_str, '\w-a'))
re_str = r'(\d{3}' print(re.fullmatch(re_str,'(234'))
re_str = r'[abc]'
re_str = r'[^.?*]mbc\' print(re.fullmatch(re_str, '?mbc\'))

##re  
import re

1.compile(     ):                 
re_str = r'\d+'
re_object = re.compile(re_str)
print(re_object)

#      ,       
re.fullmatch(re_str, '78hj')
#      ,       
re_object.fullmatch('78hj')

# 2.match(     ,    )   fullmatch(     ,   )
# match:                    
# fullmatch:                    
#          ,            ,    None
re_str = r'abc\d{3}'
match1 = re.match(re_str, 'abc234abcdef')
match2 = re.fullmatch(re_str, 'abc234')
print(match1)
print(match2)
# a.      。           :(    ,    ) ----        
print(match1.span())
#     
print(match1.start())
#     
print(match1.end())
#   :group  ,              
re_str = r'(\d{3})\+([a-z]{2})'
match1 = re.match(re_str, '234+hjaaaaaa')
print(match1)
print(match1.span())
#       ,   1      
print(match1.span(1))
#       ,   2      
print(match1.span(2))

#       ,   2        
print(match1.start(2))

# b.             
print(match1.group())
print(match1.group(1))
print(match1.group(2))

# c.           

print(match1.string)


# 3.search(     ,    )
#                        ,            ,     None
search1 = re.search(r'\d+aa', 'he9aallo 78aabc wolrd')
print(search1)
if search1:
    print(search1.span())

#   :  search                   ....
# '   10000 ,   18 ,   :180,  100 '
str1 = '   10000 ,   18 ,   :180,  100 '
re_str = r'[1-9]\d*'
search1 = re.search(re_str, str1)
while search1:
    print(search1.group())
    end = search1.end()
    str1 = str1[end:]
    search1 = re.search(re_str, str1)

# 4.split(     ,   )
#                  
#       
str1 = '     ,     。     ,     !'
result = re.split(r'\W+', str1)
print(result)

#     \w  

# 5.sub(     ,     ,       )
#          
word = '      ?       . Fuck you'
result = re.sub(r'  | |Fuck|  ', '*', word)
print(result)

# 6.findall(     ,    )
#                   
#       
#   :             
result = re.findall(r'\d([a-z]+)', '   1boy,and    0action')
print(result)