Python Bs 4レビュー

11249 ワード

BeautifulSoup bs 4は主にfind()メソッドとfind_を使用するall()メソッドでドキュメントを検索します.find()は単一のデータを検索するために使用され、find_all()は、複数のデータを検索するために使用されます.
find_all()とfind()
name->tag名string->コンテンツrecursive->すべての子孫ノードを検索するかどうかデフォルトtrue falseサブノードのみを検索する
2つの方法の使い方は似ていますここではfind_all()を例に挙げます.
#  tag  </span>
soup.find_all(<span style="color: #800000;">"</span><span style="color: #800000;">title</span><span style="color: #800000;">"</span><span style="color: #000000;">)

</span><span style="color: #008000;">#</span><span style="color: #008000;">    </span><span style="color: #008000;">
#</span><span style="color: #008000;">  id "link2"   </span>
soup.find_all(id=<span style="color: #800000;">'</span><span style="color: #800000;">link2</span><span style="color: #800000;">'</span><span style="color: #000000;">)

</span><span style="color: #008000;">#</span><span style="color: #008000;">             ,      ,  ,True</span>
soup.find_all(id=re.compile(<span style="color: #800000;">"</span><span style="color: #800000;">elsie</span><span style="color: #800000;">"</span><span style="color: #000000;">))

</span><span style="color: #008000;">#</span><span style="color: #008000;">        </span>
soup.find_all(href=re.compile(<span style="color: #800000;">"</span><span style="color: #800000;">elsie</span><span style="color: #800000;">"</span>), id=<span style="color: #800000;">'</span><span style="color: #800000;">link1</span><span style="color: #800000;">'</span><span style="color: #000000;">)

</span><span style="color: #008000;">#</span><span style="color: #008000;">           (data-foo)</span>
soup.find_all(attrs={<span style="color: #800000;">"</span><span style="color: #800000;">data-foo</span><span style="color: #800000;">"</span>: <span style="color: #800000;">"</span><span style="color: #800000;">value</span><span style="color: #800000;">"</span><span style="color: #000000;">})

</span><span style="color: #008000;">#</span><span style="color: #008000;">  class -->class python     class_</span>
soup.find_all(class_=<span style="color: #800000;">"</span><span style="color: #800000;">top</span><span style="color: #800000;">"</span><span style="color: #000000;">)
</span><span style="color: #008000;">#</span><span style="color: #008000;">    </span>

<span style="color: #008000;">#</span><span style="color: #008000;">  string(  )</span><span style="color: #008000;">
#</span><span style="color: #008000;">      'Elsie' </span>
soup.find_all(string=<span style="color: #800000;">"</span><span style="color: #800000;">Elsie</span><span style="color: #800000;">"</span><span style="color: #000000;">)

</span><span style="color: #008000;">#</span><span style="color: #008000;">       </span>
soup.find_all(string=[<span style="color: #800000;">"</span><span style="color: #800000;">Tillie</span><span style="color: #800000;">"</span>, <span style="color: #800000;">"</span><span style="color: #800000;">Elsie</span><span style="color: #800000;">"</span>, <span style="color: #800000;">"</span><span style="color: #800000;">Lacie</span><span style="color: #800000;">"</span><span style="color: #000000;">])

</span><span style="color: #008000;">#</span><span style="color: #008000;">          </span>
soup.find_all(string=re.compile(<span style="color: #800000;">"</span><span style="color: #800000;">Dormouse</span><span style="color: #800000;">"</span><span style="color: #000000;">))

</span><span style="color: #008000;">#</span><span style="color: #008000;">    </span>
soup.find_all(string=<span style="color: #000000;">is_the_only_string_within_a_tag)
</span><span style="color: #008000;">#</span><span style="color: #008000;">    </span>

<span style="color: #008000;">#</span><span style="color: #008000;">    </span><span style="color: #008000;">
#</span><span style="color: #008000;">       2</span>
soup.find_all(<span style="color: #800000;">"</span><span style="color: #800000;">a</span><span style="color: #800000;">"</span>, limit=2<span style="color: #000000;">)

</span><span style="color: #008000;">#</span><span style="color: #008000;">        </span>
soup.html.find_all(<span style="color: #800000;">"</span><span style="color: #800000;">a</span><span style="color: #800000;">"</span>, recursive=<span style="color: #000000;">False)
</span><span style="color: #008000;">#</span><span style="color: #008000;">      </span></code></pre> 
 </div> 
 <p>  </p> 
 <div class="cnblogs_code"> 
  <pre><code>soup.find_all(<span style="color: #800000;">"</span><span style="color: #800000;">a</span><span style="color: #800000;">"</span><span style="color: #000000;">)
</span><span style="color: #008000;">#</span><span style="color: #008000;">   </span>
soup(<span style="color: #800000;">"</span><span style="color: #800000;">a</span><span style="color: #800000;">"</span>)</code></pre> 
 </div> 
 <div class="cnblogs_code"> 
  <pre><code>soup.title.find_all(string=<span style="color: #000000;">True)
</span><span style="color: #008000;">#</span><span style="color: #008000;">   </span>
soup.title(string=True)</code></pre> 
 </div> 
 <h3 id="css   ">CSS   </h3> 
 <div class="cnblogs_Highlighter"> 
  <pre><code>Beautiful Soup      CSS   

#  tag title
soup.select("title")

#  tag      
soup.select("html head title")

#       
soup.select("head > title")
soup.select("p > #link1")

#       id link1     class sister   
soup.select("#link1 + .sister")

#  p        ul  
soup.select("p + ul")

#     CSS       
soup.select("#link1,#link2")

#        
soup.select('a[href="http://example.com/elsie"]')
# [<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>]

soup.select('a[href^="http://example.com/"]')
# [<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
#  <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
#  <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]

soup.select('a[href$="tillie"]')
# [<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]

soup.select('a[href*=".com/el"]')
# [<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>]
#          

#      
soup.select('p[lang|=en]')

#       
soup.select_one(".sister")
</code></pre> 
 </div> 
 <p>  </p> 
 <h3 id="find  ">find_  </h3> 
 <p>find_parents()   find_parent()</p> 
 <p>          </p> 
 <div class="cnblogs_code"> 
  <pre><code><span style="color: #008000;">#</span><span style="color: #008000;">      a</span>
a = soup(<span style="color: #800000;">"</span><span style="color: #800000;">a</span><span style="color: #800000;">"</span>, id=<span style="color: #800000;">"</span><span style="color: #800000;">link1</span><span style="color: #800000;">"</span><span style="color: #000000;">)

</span><span style="color: #008000;">#</span><span style="color: #008000;">  a      P  </span>
a_string.find_parent(<span style="color: #800000;">"</span><span style="color: #800000;">p</span><span style="color: #800000;">"</span>)</code></pre> 
 </div> 
 <p>find_next_siblings()   find_next_sibling()</p> 
 <p>                <br/> (                  )</p> 
 <p>find_previous_siblings()   find_previous_sibling()</p> 
 <p>                <br/> (                  )</p> 
 <p>find_all_next()   find_next()</p> 
 <p>              </p> 
 <p>find_all_previous()   find_previous()</p> 
 <p>              </p> 
</div>
                            </div>
                        </div>