Pythonは「スマートホーム」に関するデータを取得

4134 ワード

スマートホームに関するデータを取得
  • 「スマートドアロック」
  • を登る
  • データ
  • を簡単に処理
    スマートドアロックをはがす
    from selenium import webdriver
    # from selenium.webdriver.common.by import By
    # from pyquery import PyQuery as pq
    import time
    import csv
    
    browser = webdriver.Chrome()
    # browser1 = webdriver.Chrome()
    
    browser.get('http://search.gome.com.cn/search?intcmp=smart-1000066448-5&question=%E6%99%BA%E8%83%BD%E9%97%A8%E9%94%81&deliv=1&market=10&pzpq=0&pzin=v4')
    # browser1.get('https://www.baidu.com')
    
    
    def spider(page):
        browser.execute_script('window.scrollTo(0, 1000)')
        time.sleep(2)
        browser.execute_script('window.scrollTo(1000, 2000)')
        time.sleep(2)
        browser.execute_script('window.scrollTo(2000, 3000)')
        time.sleep(2)
        browser.execute_script('window.scrollTo(3000, 4000)')
        time.sleep(2)
        browser.execute_script('window.scrollTo(4000, 5000)')
        time.sleep(2)
        browser.execute_script('window.scrollTo(5000, 6000)')
        time.sleep(2)
    
        prices = browser.find_elements_by_class_name('item-price')
        description = browser.find_elements_by_class_name('item-name')
        evaluation = browser.find_elements_by_class_name('comment')
    
        input = browser.find_element_by_id('pNum')
        submit = browser.find_element_by_class_name('btn')
    
        print('     ', page, ' ')
        with open('information.csv', 'a', encoding='utf-8') as f:
            writer = csv.writer(f)
    
            for i in range(len(prices)):
                writer.writerow([prices[i].text, description[i].text, evaluation[i].text])
                print(i)
                print(prices[i].text)
                print(description[i].text)
                print(evaluation[i].text)
        page += 1
        input.clear()
        input.send_keys(page)
        submit.click()
        spider(page)
    
    
    # items = doc('.item-price-info').items()
    # for item in items:
    #     price = item.find('.item-price').text()
    #     print(price)
    
    
    spider(1)
    browser.close()
    
    from selenium import webdriver
    import time
    import csv
    
    browser = webdriver.Chrome()
    browser.get('https://b2b.baidu.com/s?q=%E6%99%BA%E8%83%BD%E9%97%A8%E9%94%81&from=search')
    
    
    def find():
        try:
            #     
            price_list = browser.find_elements_by_xpath('//div[@class="p-card-price"]')
            goods_list = browser.find_elements_by_xpath('//div[@class="p-card-name"]')
            company_list = browser.find_elements_by_xpath('//div[contains(@class,"p-card-company") or '
                                                          '@class = "p-card-company-vip"]')
            address_list = browser.find_elements_by_xpath('//div[@class="p-card-address"]')
            with open('Doorlock.csv', 'a', encoding='utf-8') as f:
                writer = csv.writer(f)
                for i in range(0, len(price_list)):
                        writer.writerow([price_list[i].text, goods_list[i].text, company_list[i].text, address_list[i].text])
                        print(i)
                        print(price_list[i].text)
                        print(goods_list[i].text)
                        print(company_list[i].text)
                        print(address_list[i].text)
            button = browser.find_element_by_xpath('//li[@class="ivu-page-next ivu-page-custom-text"]')
            button.click()
            time.sleep(2)
            find()
        except:
            print('    ')
    
    
    find()
    browser.close()
    

    データを簡単に処理する
    import pandas as pd
    import matplotlib.pyplot as plt
    
    column_name = ['  ', '  ', '  ', '  ']
    
    data = pd.read_csv('Doorlock.csv', names=column_name)
    
    #   “¥” “ ”
    yuan = data['  '].str.contains(' ').fillna(False)
    for i, y in data[yuan].iterrows():
        price = float(y['  '][1:-1])
        data.at[i, '  '] = '{}'.format(price)
    
    #        “  ”  
    data = data[~ data['  '].str.contains('  ')]
    
    print(data)