Python爬虫ノート(八)——Scrapy公式ドキュメント読書——Scrapy常用コマンド集錦


# project_dir       project_name     ,     project_dir,
          
scrapy startproject  [project_dir]

 
#        
scrapy -h

 
#          
scrapy  -h

 
#          (     ,           ,      ,             ,              )
scrapy genspider [-t template]  

#template,     ,   :basic、crawl、csvfeed、xmlfeed
#name,       
#domain,       allowed_domains start_urls  
scrapy genspider [-t template]  
#    spider   
scrapy crawl 
        

 
#        ,-l:         
scrapy check [-l] 
        

 
#            
scrapy list
          

 
#          
scrapy edit 
          

 
#       ,        
scrapy fetch 
#--headers:  http       ,        
#--no-redirect:      
#--spider=SPIDER:         
scrapy fetch 

 
#         ,      ,              ,   ajax、js     ,                 ,
        
scrapy view 
#--spider=SPIDER:        
#--no-redirect:      
scrapy view 

 
#  shell  ,     ,      ,        ,     
scrapy shell [url]

 
#  url   ,  -callback    (      ,   parse)    
scrapy parse  [options]
        
     :
--spider=SPIDER:       
--a NAME=VALUE:           
--callback(-c):        
--meta(-m):      request   request   ,      json   
--pipelines:  item pipeline
--rules(-r):        response rule
--noitems:    item       
--nolinks:    link       
--depth(-d):             (        )
--verbose(-v):           

  
$ scrapy parse http://www.example.com/ -c parse_item
[ ... scrapy log lines crawling example.com spider ... ]

>>> STATUS DEPTH LEVEL 1 <<<
# Scraped Items  ------------------------------------------------------------
[{'name': u'Example item',
 'category': u'Furniture',
 'length': u'12 cm'}]

# Requests  -----------------------------------------------------------------
[]

 
scrapy settings [options]
  setting  ( "  "  )
        ,        ,     Scrapy   

  
$ scrapy settings --get BOT_NAME
scrapybot
$ scrapy settings --get DOWNLOAD_DELAY
0

 
#  spider
scrapy runspider 
  
$ scrapy runspider myspider.py
[ ... spider starts crawling ... ]
#  scrapy   ,     -v,    python   
scrapy version [-v]
#           ,         
scrapy bench