python 3ベース:正則(二)

25594 ワード

目次
正規表現のコンパイル
一般手順
re.compie
patternオブジェクトのプロパティと方法
fagsプロパティ
groupsプロパティ
groupindexプロパティ
pattern.match()メソッド
re.match()メソッド
pattern.search()メソッド
re.search()メソッド
findal()メソッド
pattern.findal()メソッド
re.findal()メソッド
pattern.finditter()メソッド
re.finditter()メソッド
pattern.spit()メソッド
re.spit()メソッド
pattern.sub()メソッド
re.sub()メソッド
pattern.subn()メソッド
re.subn()メソッド
前にまとめた正規表現のいくつかの記号に続いて、このセクションでは、正規表現のコンパイル方法とコンパイル後の式オブジェクトでよく使われる方法/属性、およびreモジュールの関連方法の比較について説明します.
正規表現のコンパイル
よく使われる正規表現を正規表現オブジェクトにコンパイルし、繰り返し利用することで一定の効率を高めることができます.1回コンパイルしてから複数回使用したり、シーケンス化したりすることができます.
一般手順
Reを使用する一般的な手順は、正規表現の文字列形式をpatternインスタンスにコンパイルした後、patternインスタンスを使用してテキストを処理し、一致結果(Matchインスタンス)を取得し、最後にMatchインスタンスを使用して情報を取得して他の操作を行うことです.

re.compie
re.compile(pattern, flags=0)
patternクラスのファクトリメソッドは、正規表現patternをpatternオブジェクトにコンパイルし、そのオブジェクトを返します.
パラメータ:2番目のパラメータflagはマッチングモードであり、値はreなどのビットまたは演算子'|'で同時に有効にすることができる.Iとre.M、regex文字列でモードを指定することもできます.たとえば、次のようにします.
re.compile('pattern',re.I|re.M)
次のように等価です.
re.compile('(?im)pattern')
コードの例:


import re

#         Pattern  
pattern=re.compile(r'hello')
#  Pattern    ，      ，        None
match=pattern.match('hello world')
if match:
      #      
      print (match.group())
      print (match.pos)
      print (match.endpos)


 
 pattern 
 pattern re.compile() ，pattern ， re.compile() 。pattern ：

>>> p = re.compile(r'\d')
>>> dir(p)
['__class__', '__copy__', '__deepcopy__', '__delattr__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', 'findall', 'finditer', 'flags', 'fullmatch', 'groupindex', 'groups', 'match', 'pattern', 'scanner', 'search', 'split', 'sub', 'subn']

 
fags 
 ， . p.flags ， 。

    :         Pattern  

>>> p = re.compile(r'(\w+) (\w+)(?P.*)', re.DOTALL)

>>> p.flags#compile    pattern   flags    48

48

>>> re.DOTALL   #       re.DOTALL    16




>>> re.match(r'.','
bds',16)     #            re.DOTALL    

<_sre.sre_match object="" span="(0," match="
">

>>> re.match(r".","
db",48)   #16 48      
<_sre.sre_match>
>>> p = re.compile(r'(\w+) (\w+)(?P.*)', 16)

>>> p.flags#compile    pattern   flags    48

48
 
groups 
 ， 。

    :

>>> p = re.compile(r'(\w+) (\w+)(?P.*)', re.DOTALL)
>>> p.groups
3
 
groupindex 
 ， ， 

    :

#          Pattern  
>>> p = re.compile(r'(\w+) (\w+)(?P.*)', re.DOTALL)

#                       

>>> p.groupindex
mappingproxy({'sign': 3})
 
 
pattern.match() 
match(string=None, pos=0, endpos=9223372036854775807, *, pattern=None)
 : string pos pattern， endpos， Match ； pattern endpos， None。
 
 ：
string： 
pos: ， ， 0
endpos: ， ， len(string)
 Match ， group() 

    :
#encoding=utf-8

import re
p = re.compile(r'\d+')

#       
m1 = p.match('123abc')
if m1:
    print('      ：',m1.group())
else:
    print('       ')

#      
m2 = p.match('123abc',2)
if m2:
    print('      ：',m2.group())
else:
    print('       ')

#      ，           ，    
m3 = p.match('123abc',2,10)
if m3:
    print('      ：',m3.group())
else:
    print('       ')
 
re.match() 
match(pattern, string, flags=0)
 : string pattern， Match ； None。
 
 ：
pattern: 
string： 
flags: ， ， ， 
 ： pattern.match() ， pos endpos

    :

match=re.match('\d\s\w','1 Abs',re.I)
if match:
   print ('      ：',match.group())
else:
   print ('       ')

 
pattern.search() 
search(string=None, pos=0, endpos=9223372036854775807, *, pattern=None)
 ： string[pos,endpos] pos pattern， ， Match ； ， pos 1 ， pos=endpos None。
 ：
string： 
pos: ， ， 0
endpos: ， ， len(string)
 pos endpos ， Match ， group() 
 
 ： pattern.match ，match ， ， None; search ， 

    :
#encoding=utf-8

import re
#      ，      ，0        （     ）
p=re.compile(r'\d+\s?\w*')
#       
m1 = p.search('12 bcr123Abc123')
if m1:
      print ('      ：', m1.group())
else:
      print ("            ")


#      

m2 = p.search('12 bcr123Abc123',1,10)
if m2:
      print ('      ：', m2.group())
else:
      print ("            ")

#      ，           
m3 = p.search('12 bcr123Abc123',3,30)
if m3:
      print ('      ：', m3.group())
else:
      print ("            ")
 
re.search() 
search(pattern, string, flags=0)
 ： ， None。
 ： pattern.search ， pos endpos
 ：
pattern: 
string： 
flags: ， ， ， 

    :  abc     ，   （*），    
#encoding=utf-8

import re
m = re.search(r'[abc]\*\d{2}','c*2345')
if m:
    print('      ：',m.group())
else:
    print('       ')
 
findal() 
    ： ， ， 。 ， ， ：


 () ， ，  。 ， （ ’(’ ， ）



>>> re.findall(r"([a-z])(\d+)","1a2b3c")
[('a', '2'), ('b', '3')]


>>> re.findall(r"([a-z])(\d+)([a-z])","a1ab2bc3")
[('a', '1', 'a'), ('b', '2', 'b')]


>>> re.findall(r"(([a-z])(\d+)([a-z]))","a1ab2bc3c")
[('a1a', 'a', '1', 'a'), ('b2b', 'b', '2', 'b'), ('c3c', 'c', '3', 'c')]
 
 

2. ， ，  ， 。（ ： ， ）


>>> re.findall(r"[a-z](\d+)","1a2b3c")
['2', '3']
 

 ，  


>>> re.findall(r"\d+","1a2b3c")
['1', '2', '3']
 
pattern.findal() 
findall(string=None, pos=0, endpos=9223372036854775807, *, source=None)
 ： string[pos,endpos] pos pattern ， endpos ，   ，  
 ：
string： 
pos: ， ， 0
endpos: ， ， len(string)。 pos endpos ， 。

#    1:          

#           10         
pattern=re.compile(r'\d+')
match=pattern.findall('o89ne1two2three3four4',0,10)
print (match)


#    2:      1     

#             
matchStr='adsd12343.jl34d5645fd789'
pattern=re.compile(r'.*?(\d+).*?')
result=pattern.findall(matchStr)
print (result)
 

 ： ， ， ， ， 


    3:            

#                
add = 'https://www.net.com.edu//action=?asdfsd and other https://www.baidu.com//a=b'
pattern=re.compile(r'((w{3}\.)(\w+\.)+(com|edu|cn|net))')
result = pattern.findall(add)
print (result)
 
re.findal() 
findall(pattern, string, flags=0)
 ： ， 。
 ： pattern.findall ， pos endpos
 ：
pattern: 
string： 
flags: ， ， ， 

  

    1:          
#          ’oo’     
res = re.findall(r'\w*oo\w*', 'woo this foo is too')
print  (res)

    2:      1     
#              
res=re.findall(r'.*?(\d+).*?','adsd12343.jl34d5645fd789')
print (res)


    3:            
#                
add = 'https://www.net.com.edu//action=?asdfsd and other https://www.baidu.com//a=b'
res = re.findall(r'((w{3}\.)(\w+\.)+(com|edu|cn|net))',add)
print (res)

 
pattern.finditer() 
finditer(string, pos=0, endpos=9223372036854775807)
 ： findall ， ，  ， list，  ， ， 。
 ：
string： 
pos: ， ， 0
endpos: ， ， len(string)。 pos endpos ， 。

    :

#encoding=utf-8
import re
p = re.compile(r'[a-z]+',re.I)
for i in p.finditer('one12two34three56four'):
    print(i.group())
 
re.finditer() 
finditer(pattern, string, flags=0)
 ： findall ， ，  ， list，  ， ， 。
 
 ：
pattern: 
string： 
flags: ， ， ， 

    :

#                 
#encoding=utf-8
import re
for i in re.finditer(r'[A-Za-z]+','one12two34three56four'):
    print(i.group())
 
pattern.spit() 
split(string=None, maxsplit=0, *, source=None)
 ： ， string ，  
 
 ：
string： 
maxsplit : ， ， 0. 

    :
#encoding=utf-8
import re

p = re.compile(r'\d+')
#       
print( p.split('one1two2three3four4'))
#  3 
print( p.split('one1two2three3four4',3))
 
re.spit() 
split(pattern, string, maxsplit=0, flags=0)
 ： ， string ， list
 
 ：
pattern: 
string： 
maxsplit : ， ， 0. 
flags: ， ， ， 

    1:

#encoding=utf-8

import re
#       
print(re.split(r'\d+','one1two2three3four4'))
#  3 
print( re.split(r'\d+','one1two2three3four4',3))

    2:   abcd
>>> s = "a 2  b  2    c  5  d"
>>> print( re.split(r'\s+\d\s+',s))
['a', 'b', 'c', 'd']
>>> print( re.split(r'\s+\d\s+',s,2))
['a', 'b', 'c  5  d']
>>> print( re.split(r'\s+\d\s+',s,20))
['a', 'b', 'c', 'd']
 
 
pattern.sub() 
sub(repl, string, count=0)
 ： repl string ；
 repl   ， \id \d、\d ， 0；
 repl   ， Match ， （ ）
 
 ：
repl： 
string： 
count: ， 0 ， 1 1 ， 2 2 ， ， 0
  

    1:repl           

p=re.compile(r'(\w+) (\w+)')#       
s='i say, hello world!'
print (p.findall(s))
#\2,\1      ，               
print (p.sub(r'\2 \1',s))

 

 ：
"i say,hello word"
 2 ：
 ：i say---> 1：i 2：say   \2 \1--->say i
say i,hello word
 ：hello word
hello world---> 1：hello   2：world   \2 \1--->world hello
 ： say I,world hello!


    2:repl    ，      m    

#encoding=utf-8
import re
p=re.compile(r'(\w+) (\w+)')#       
def func(m):
  return m.group(1).title() +' ' + m.group(2).title()

print (p.sub(func,"i say,hello word"))



    3:
#encoding=utf-8

import re
p=re.compile(r'(\w+) (\w+)')
s='i say, hello world!'
# repl    
def func(m):
      print (m)
##      print (dir(m))
      print (m.string)
      print (m.regs)      #                     ,    
      print (m.group())   #          
      print (m.group(1))  #              
      print (m.group(2))  #              
      return m.group(1).upper()+ m.group(2).upper()


print('
')
print (p.sub(func,s,1)) #    
print('*'*40)
print (p.sub(func,s))
 
re.sub() 
re.sub(pattern, repl, string, count=0, flags=0) 
 ：
pattern： 
repl： 
string： 
count： ， 0 ， 1 1 , ， 0。
flags： ， 。 、 。

    1:repl    

#encoding=utf-8

import re
#      4    0
tel='13549876489'
print(re.sub(r'\d{4}$','0'*4,tel))

#            
s = 'num=0 # a number'
print(re.sub(r'#.*$','',s))
 

    2:repl   

  add         ，               add  ，                

#encoding=utf-8

import re
def add(m):
    '''        10'''
    return str(int(m.group())+10)
print(re.sub(r'\d+',add,'1 2 3 4 5'))





    3:            
>>> re.sub(r'\s','','and   \r 
 \tn')
'andn'
>>> re.sub(r"[ \t\r]+","","aa   11b     b22  
   \t   cc")
'aa11bb22
cc'

 
pattern.subn() 
pattern.subn(repl, string[, count = 0]) 
 ：sub ， tuple，tuple ， 。 ， subn 。

    1:repl    ，          
#encoding=utf-8
import re
s = "^&&today&(is%%#fine# day!"
p = re.compile(r'[^\w\s]+')
print(p.subn('',s))



    2:repl      
#encoding=utf-8

import re
print('
')
def add(m):
    '''        10'''
    return str(int(m.group())+10)
print(re.subn(r'\d+',add,'1 2 3 4 5'))
 
re.subn() 
re.subn(pattern, repl, string, count=0, flags=0) 

    :    
      
>>> re.subn(r"[ \t\r]+","","aa   11b     b22  
   \t   cc")
('aa11bb22
cc', 4)

Effective C++T 07:多態ベースクラスに対してvirtual構造関数を宣言する

Rocket.ChatをCentOS 7にインストール