RSeleniumに基づいて中国の裁判文書網の文書データを取得する

2198 ワード

転送:RSeleniumの基本的な使い方のまとめ
PythonとRが共同で「中国裁判文書網」文書内容の登録を完了
#     (      )      
system("java -jar F:/.../SeleniumSever/selenium-server-standalone-3.8.1.jar",wait=F)
system("java -Dwebdriver.chrome.driver=F:/.../SeleniumSever/chromedriver.exe",wait=F)
setwd("F:/...")
library("readxl")
library("magrittr")
library("RSelenium")
library("rvest")
library("dplyr")
#             
law_firm_info% select(c(1,4)) %>% unique()
remDr% '[['(1) %>% read_html() %>% html_nodes(xpath="//*[@id='resultList']/div/div[1]") %>% html_text() #    
document_name% '[['(1) %>% read_html() %>% html_nodes(xpath="//*[@id='resultList']/div/table/tbody/tr[1]/td/div/a[2]") %>% html_text() #    
tmp% '[['(1) %>% read_html() %>% html_nodes(xpath="//*[@id='resultList']/div/table/tbody/tr[2]/td/div") %>% html_text() #    +    +    
tmp_2 div:nth-child(",j,") > table > tbody > tr:nth-child(1) > td > div > a:nth-child(4)")
       btn% '[['(1) %>% read_html() %>% html_nodes("#DivContent div") %>% html_text()
data_frame_tmp1),]
	   data_frame_huizong% '[['(1) %>% read_html() %>% html_nodes(xpath="//*[@id='resultList']") %>% html_text())=="        ...") 
next

#       :             
tmp_tian a.next');
          btn$clickElement();1+1
		  },
		  error=function(e) return(paste0(law_firm_info[1,1],"-       ",j," ,           ,    :",e$message))
)
          if (trycatch_value_2 != 2) {print(trycatch_value_2);break} else {
Sys.sleep(runif(1,10,17))
#          --       ?=j  
trycatch_value_3% '[['(1) %>% read_html() %>% html_nodes("#pageNumber > span") %>% html_text()
		  if (as.numeric(page_tmp)==j) break else
		  {Sys.sleep(runif(1,10,17)+m);next}
		  };1+1
		  },
		  error=function(e) return(paste0(law_firm_info[1,1],"-       ",j," ,       ,    :",e$message))
)
          if (trycatch_value_3 != 2) {print(trycatch_value_3);break} else {
	  
trycatch_value_1