# project_dir project_name , project_dir,
scrapy startproject [project_dir]
#
scrapy -h
#
scrapy -h
# ( , , , , )
scrapy genspider [-t template]
#template, , :basic、crawl、csvfeed、xmlfeed
#name,
#domain, allowed_domains start_urls
scrapy genspider [-t template]
# spider
scrapy crawl
# ,-l:
scrapy check [-l]
#
scrapy list
#
scrapy edit
# ,
scrapy fetch
#--headers: http ,
#--no-redirect:
#--spider=SPIDER:
scrapy fetch
# , , , ajax、js , ,
scrapy view
#--spider=SPIDER:
#--no-redirect:
scrapy view
# shell , , , ,
scrapy shell [url]
# url , -callback ( , parse)
scrapy parse [options]
:
--spider=SPIDER:
--a NAME=VALUE:
--callback(-c):
--meta(-m): request request , json
--pipelines: item pipeline
--rules(-r): response rule
--noitems: item
--nolinks: link
--depth(-d): ( )
--verbose(-v):
$ scrapy parse http://www.example.com/ -c parse_item
[ ... scrapy log lines crawling example.com spider ... ]
>>> STATUS DEPTH LEVEL 1 <<<
# Scraped Items ------------------------------------------------------------
[{'name': u'Example item',
'category': u'Furniture',
'length': u'12 cm'}]
# Requests -----------------------------------------------------------------
[]
scrapy settings [options]
setting ( " " )
, , Scrapy
$ scrapy settings --get BOT_NAME
scrapybot
$ scrapy settings --get DOWNLOAD_DELAY
0
# spider
scrapy runspider
$ scrapy runspider myspider.py
[ ... spider starts crawling ... ]
# scrapy , -v, python
scrapy version [-v]
# ,
scrapy bench