python識別登録検証コードピクチャ機能の実現コード（完全コード）

7850 ワード

python 識別認証コード

自動化テストケースを作成する時は、ログインするたびに認証コードを入力してください。その後、python自身に画像の中の検証コードを認識させたいです。自分で手動で登録する必要がないので、識別機能がどのように実現されるかを調べてメモしてください。
まず使用するライブラリを導入します。re、Image、pytesseract、selenium、time


import re #     
from PIL import Image #             
import pytesseract #        
from selenium import webdriver #       
import time #

まず、検証コード画像を取得する必要があります。さらに識別することができます。
クラスを作成して、webdriverとfind_を定義します。element_by_selectorメソッドは、ウェブページを開くために使用されます。


class VerificationCode:
  def __init__(self):
    self.driver = webdriver.Firefox()
    self.find_element = self.driver.find_element_by_css_selector

ブラウザを開いて検証コードの画像を切り取ります。


 def get_pictures(self):
    self.driver.get('http://123.255.123.3') #       
    self.driver.save_screenshot('pictures.png') #     
    page_snap_obj = Image.open('pictures.png')
    img = self.find_element('#pic') #        
    time.sleep(1)
    location = img.location
    size = img.size #           
    left = location['x']
    top = location['y']
    right = left + size['width']
    bottom = top + size['height']
    image_obj = page_snap_obj.crop((left, top, right, bottom)) #         ，     
    image_obj.show() #            
    self.driver.close() #             
    return image_obj

未処理前の認証コード画像は以下の通りです。

未処理の検証コードピクチャは、pythonにとって識別率が低いので、よく見ると、画像の中には、五色撹乱識別の点が多く、識別率に非常に影響していることが分かります。
取得した検証コードを以下に処理します。
まずconvertで画像を白黒に変えます。閾値を超えるのは黒です。


def processing_image(self):
    image_obj = self.get_pictures() #      
    img = image_obj.convert("L") #    
    pixdata = img.load()
    w, h = img.size
    threshold = 160 #            ，              
    #       ，        
    for y in range(h):
      for x in range(w):
        if pixdata[x, y] < threshold:
          pixdata[x, y] = 0
        else:
          pixdata[x, y] = 255
    return img

階調処理後の画像

その後、識別を妨害する画素点を削除する。


  def delete_spot(self):
    images = self.processing_image()
    data = images.getdata()
    w, h = images.size
    black_point = 0
    for x in range(1, w - 1):
      for y in range(1, h - 1):
        mid_pixel = data[w * y + x] #         
        if mid_pixel < 50: #                 
          top_pixel = data[w * (y - 1) + x]
          left_pixel = data[w * y + (x - 1)]
          down_pixel = data[w * (y + 1) + x]
          right_pixel = data[w * y + (x + 1)]
          #                
          if top_pixel < 10:
            black_point += 1
          if left_pixel < 10:
            black_point += 1
          if down_pixel < 10:
            black_point += 1
          if right_pixel < 10:
            black_point += 1
          if black_point < 1:
            images.putpixel((x, y), 255)
          black_point = 0
    # images.show()
    return images

ノイズ除去後の画像

最後に処理した画像を文字に変換します。
まずpytesseractのパスを設定します。デフォルトのパスは間違っていますので、画像をテキストに変換します。個々のピクチャでは、処理漏れがあると認識されますので、スペースやポイント、またはセミコロンなどと認識されます。検証コードの中の特殊な文字を取り除く処理を追加しました。
PS：tesseractファイルダウンロードリンク


def image_str(self):
    image = self.delete_spot()
    pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" #   pyteseract  
    result = pytesseract.image_to_string(image) #      
    resultj = re.sub(u"([^\u4e00-\u9fa5\u0030-\u0039\u0041-\u005a\u0061-\u007a])", "", result) #            
    result_four = resultj[0:4] #     4   
    # print(resultj) #         
    return result_four

完全コードは以下の通りです。


import re #     
from PIL import Image #             
import pytesseract #        
from selenium import webdriver #       
import time #       
 
 
class VerificationCode:
  def __init__(self):
    self.driver = webdriver.Firefox()
    self.find_element = self.driver.find_element_by_css_selector
 
  def get_pictures(self):
    self.driver.get('http://123.255.123.3') #       
    self.driver.save_screenshot('pictures.png') #     
    page_snap_obj = Image.open('pictures.png')
    img = self.find_element('#pic') #        
    time.sleep(1)
    location = img.location
    size = img.size #           
    left = location['x']
    top = location['y']
    right = left + size['width']
    bottom = top + size['height']
    image_obj = page_snap_obj.crop((left, top, right, bottom)) #         ，     
    image_obj.show() #            
    self.driver.close() #             
    return image_obj
 
  def processing_image(self):
    image_obj = self.get_pictures() #      
    img = image_obj.convert("L") #    
    pixdata = img.load()
    w, h = img.size
    threshold = 160
    #       ，        
    for y in range(h):
      for x in range(w):
        if pixdata[x, y] < threshold:
          pixdata[x, y] = 0
        else:
          pixdata[x, y] = 255
    return img
 
  def delete_spot(self):
    images = self.processing_image()
    data = images.getdata()
    w, h = images.size
    black_point = 0
    for x in range(1, w - 1):
      for y in range(1, h - 1):
        mid_pixel = data[w * y + x] #         
        if mid_pixel < 50: #                 
          top_pixel = data[w * (y - 1) + x]
          left_pixel = data[w * y + (x - 1)]
          down_pixel = data[w * (y + 1) + x]
          right_pixel = data[w * y + (x + 1)]
          #                
          if top_pixel < 10:
            black_point += 1
          if left_pixel < 10:
            black_point += 1
          if down_pixel < 10:
            black_point += 1
          if right_pixel < 10:
            black_point += 1
          if black_point < 1:
            images.putpixel((x, y), 255)
          black_point = 0
    # images.show()
    return images
 
  def image_str(self):
    image = self.delete_spot()
    pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" #   pyteseract  
    result = pytesseract.image_to_string(image) #      
    resultj = re.sub(u"([^\u4e00-\u9fa5\u0030-\u0039\u0041-\u005a\u0061-\u007a])", "", result) #            
    result_four = resultj[0:4] #     4   
    # print(resultj) #         
    return result_four
 
if __name__ == '__main__':
  a = VerificationCode()
  a.image_str()

コメントを見るとtesseract.exeファイルが必要な人が多いですが、ファイルが大きすぎてメールを送るとダウンロードできない場合があります。必要なのはちょっと待ってください。
ここではpythonの識別登録検証コード写真（完全コード）に関する記事を紹介します。より多くの関連pythonの識別登録検証コード画像の内容は以前の文章を検索してください。または下記の関連記事を引き続きご覧ください。これからもよろしくお願いします。

正規表現の基本文法詳細

pythonコードを使ってアシスタントを実行します。