21.1.27
import requests
from bs4 import BeautifulSoup
incruit_result = requests.get("https://search.incruit.com/list/search.asp?col=job&src=gsw*search&kw=%b9%e9%bf%a3%b5%e5&startno=0")
incruit_soup = BeautifulSoup(incruit_result.text, "html.parser")
paging = incruit_soup.find("p", {"class" : "sqr_paging sqr_pg_mid"})
pages = paging.find_all("a")
spans = []
for page in pages:
spans.append(page.find("span"))
spans = spans[:-2]
# span으로 2~5페이지까지 나오고 마지막에 단추 두 개를 지웠음.
すべてのページのジャンプボタンの前にspan値をスクロールするしかないようです.クロムに何か変化がありますか...import requests
from bs4 import BeautifulSoup
incruit_result = requests.get("https://search.incruit.com/list/search.asp?col=job&src=gsw*search&kw=%b9%e9%bf%a3%b5%e5&startno=0")
incruit_soup = BeautifulSoup(incruit_result.text, "html.parser")
paging = incruit_soup.find("p", {"class" : "sqr_paging sqr_pg_mid"})
links = paging.find_all("a")
pages = []
for link in links:
pages.append(link.find("span"))
pages = pages[:-2]
max_page = pages[-2]
# 5페이지니까.
要求最大(incruit.py)
import requests
from bs4 import BeautifulSoup
LIMIT = 20
URL = "https://search.incruit.com/list/search.asp?col=job&src=gsw*search&kw=%b9%e9%bf%a3%b5%e5&startno={LIMIT}"
def extract_incruit_pages():
result = requests.get(URL)
soup = BeautifulSoup(result.text, "html.parser")
paging = soup.find("p", {"class" : "sqr_paging sqr_pg_mid"})
links = paging.find_all("a")
pages = []
for link in links[:-2]:
pages.append(int(link.string))
max_page = pages[-2]
return max_page
def extract_incruit_jobs(last_page):
for page in range(last_page):
result = requests.get(f"{URL}&startno={page*LIMIT}")
print(result.status_code)
(main.py)from incruit import extract_incruit_pages, extract_incruit_jobs
last_incruit_page = extract_incruit_pages()
extract_incruit_jobs(last_incruit_page)
誤字に注意する.単純なものからもう少しで塞がれるところだった.幸いなことに、200枚はまあまあです.import requests
from bs4 import BeautifulSoup
LIMIT = 20
URL = "https://search.incruit.com/list/search.asp?col=job&src=gsw*search&kw=%b9%e9%bf%a3%b5%e5&startno={LIMIT}"
def extract_incruit_pages():
result = requests.get(URL)
soup = BeautifulSoup(result.text, "html.parser")
paging = soup.find("p", {"class" : "sqr_paging sqr_pg_mid"})
links = paging.find_all("a")
pages = []
for link in links[:-2]:
pages.append(int(link.string))
max_page = pages[-2]
return max_page
def extract_incruit_jobs(last_page):
jobs = []
#for page in range(last_page):
result = requests.get(f"{URL}&startno={0*LIMIT}")
soup = BeautifulSoup(result.text, "html.parser")
results = soup.find_all("p", {"class" : "detail"})
for result in results:
title_case = result.find("span", {"class" : "rcrtTitle"})
title = title_case.find("a").get_text()
print(title)
return jobs
メイン料理は触らなかった.import requests
from bs4 import BeautifulSoup
LIMIT = 20
URL = "https://search.incruit.com/list/search.asp?col=job&src=gsw*search&kw=%b9%e9%bf%a3%b5%e5&startno={LIMIT}"
def extract_incruit_pages():
result = requests.get(URL)
soup = BeautifulSoup(result.text, "html.parser")
paging = soup.find("p", {"class" : "sqr_paging sqr_pg_mid"})
links = paging.find_all("a")
pages = []
for link in links[:-2]:
pages.append(int(link.string))
max_page = pages[-2]
return max_page
def extract_incruit_jobs(last_page):
jobs = []
#for page in range(last_page):
result = requests.get(f"{URL}&startno={0*LIMIT}")
soup = BeautifulSoup(result.text, "html.parser")
results = soup.find_all("p", {"class" : "detail"})
for result in results:
title = result.find("span", {"class" : "rcrtTitle"}).find("a").get_text()
results2 = soup.find_all("h3")
for result in results2:
company = result.find("a")
if company == result.find("h3"):
company = str(result.find("h3"))
else:
company = str(result.find("a").string)
print(company)
return jobs
import requests
from bs4 import BeautifulSoup
LIMIT = 20
URL = "https://search.incruit.com/list/search.asp?col=job&src=gsw*search&kw=%b9%e9%bf%a3%b5%e5&startno={LIMIT}"
def extract_incruit_pages():
result = requests.get(URL)
soup = BeautifulSoup(result.text, "html.parser")
paging = soup.find("p", {"class" : "sqr_paging sqr_pg_mid"})
links = paging.find_all("a")
pages = []
for link in links[:-2]:
pages.append(int(link.string))
max_page = pages[-2]
return max_page
def extract_incruit_jobs(last_page):
jobs = []
for page in range(last_page):
result = requests.get(f"{URL}&startno={page*LIMIT}")
soup = BeautifulSoup(result.text, "html.parser")
results = soup.find_all("p", {"class" : "detail"})
for result in results:
title = result.find("span", {"class" : "rcrtTitle"}).find("a").get_text()
results2 = soup.find_all("h3")
for result in results2:
company = result.find("a")
if company == result.find("h3"):
company = str(result.find("h3"))
else:
company = str(result.find("a").string)
print({company + ':' + title})
return jobs
(main.py)from incruit import extract_incruit_pages, extract_incruit_jobs
last_incruit_page = extract_incruit_pages()
extract_incruit_jobs(last_incruit_page)
残念なことに、総評:ウェブページは他のウェブページとは異なる構造(liで縛られたものなど)が等級化されていないので、学んだ内容より少し体現が少ない.(役職、社名).中間に会社名のない司会者でも解決できるのは良い経験です.(if文を使用).またstringのほかget text()を用いて文字列をキャプチャすることも学んだ.返信リンク
Reference
この問題について(21.1.27), 我々は、より多くの情報をここで見つけました https://velog.io/@sinichy7/21.1.27テキストは自由に共有またはコピーできます。ただし、このドキュメントのURLは参考URLとして残しておいてください。
Collection and Share based on the CC Protocol