Pythonのmatplotlibモジュールを使用してyahoo financeの履歴データをキャプチャし、図面を描きます.

9369 ワード

どのように各会社の株の歴史のデータを自動的に取得して絵を描くかは金融テキストの感情分析プロジェクトの必要な部分で、確かにこれらのデータはfinance.yahoo.comでは簡単に見ることができますが、プログラムを利用して自動的に取得し、リアルタイムで表示する方法が問題です.これまで爬虫類を書いてデータを捕まえることを考えていたが、明らかに骨が折れて効率が悪い.matplotlib moduleにはfinance moduleがあり、この機能を簡単に実現できます.
finance.py is a collection of modules for collecting , collecting ,analying and plotting financial data.まず、matplotlibモジュールを使用してfinanceを取得するexampleを見てみましょう.yahoo.comの中の歴史のデータはそして絵を描いて、先にコードを貼ります

from pylab import figure, show
from matplotlib.finance import quotes_historical_yahoo
from matplotlib.dates import YearLocator, MonthLocator, DateFormatter
import datetime
date1 = datetime.date( 2012, 1, 1 )
date2 = datetime.date( 2012, 11, 11 )

daysFmt  = DateFormatter('%m-%d-%Y')

quotes = quotes_historical_yahoo('MSFT', date1, date2)
if len(quotes) == 0:
    raise SystemExit

dates = [q[0] for q in quotes]
opens = [q[1] for q in quotes]

fig = figure()
ax = fig.add_subplot(111)
ax.plot_date(dates, opens, '-')

# format the ticks
ax.xaxis.set_major_formatter(daysFmt)
ax.autoscale_view()

# format the coords message box
def price(x): return '$%1.2f'%x
ax.fmt_xdata = DateFormatter('%Y-%m-%d')
ax.fmt_ydata = price
ax.grid(True)

fig.autofmt_xdate()
show()

date 1、date 2はそれぞれデータを検索する開始時間であり、例えばこの例ではマイクロソフトの2012.1.1から2012.11.11までの歴史的な株価を検索する.
quotes_historical_yahooはyahoo履歴データを取得する関数で、会社のTicker Symbolとクエリーの開始日を入力し、バッファファイルとして出力する必要があります.具体的なコードは以下の通りです.

def quotes_historical_yahoo(ticker, date1, date2, asobject=False,
                                        adjusted=True, cachename=None):
    """
    Get historical data for ticker between date1 and date2.  date1 and
    date2 are datetime instances or (year, month, day) sequences.

    See :func:`parse_yahoo_historical` for explanation of output formats
    and the *asobject* and *adjusted* kwargs.

    Ex:
    sp = f.quotes_historical_yahoo('^GSPC', d1, d2,
                                asobject=True, adjusted=True)
    returns = (sp.open[1:] - sp.open[:-1])/sp.open[1:]
    [n,bins,patches] = hist(returns, 100)
    mu = mean(returns)
    sigma = std(returns)
    x = normpdf(bins, mu, sigma)
    plot(bins, x, color='red', lw=2)

    cachename is the name of the local file cache.  If None, will
    default to the md5 hash or the url (which incorporates the ticker
    and date range)
    """
    # Maybe enable a warning later as part of a slow transition
    # to using None instead of False.
    #if asobject is False:
    #    warnings.warn("Recommend changing to asobject=None")

    fh = fetch_historical_yahoo(ticker, date1, date2, cachename)

    try:
        ret = parse_yahoo_historical(fh, asobject=asobject,
                                            adjusted=adjusted)
        if len(ret) == 0:
            return None
    except IOError as exc:
        warnings.warn('fh failure
%s'%(exc.strerror[1]))
        return None

    return ret

fetch_historical_yahoo関数は履歴データファイルfhを返します.もちろんhttp://table.finance.yahoo.com/table.csv?a=%d&b=%d&c=%d&d=%d&e=%d&f=%d&s=%s&y=0&g=%s&ignore=.csv手動でダウンロードします.具体的な数値計算はコードを参照してください.

def fetch_historical_yahoo(ticker, date1, date2, cachename=None,dividends=False):
    """
    Fetch historical data for ticker between date1 and date2.  date1 and
    date2 are date or datetime instances, or (year, month, day) sequences.

    Ex:
    fh = fetch_historical_yahoo('^GSPC', (2000, 1, 1), (2001, 12, 31))

    cachename is the name of the local file cache.  If None, will
    default to the md5 hash or the url (which incorporates the ticker
    and date range)
    
    set dividends=True to return dividends instead of price data.  With
    this option set, parse functions will not work

    a file handle is returned
    """

    ticker = ticker.upper()


    if iterable(date1):
        d1 = (date1[1]-1, date1[2], date1[0])
    else:
        d1 = (date1.month-1, date1.day, date1.year)
    if iterable(date2):
        d2 = (date2[1]-1, date2[2], date2[0])
    else:
        d2 = (date2.month-1, date2.day, date2.year)


    if dividends:
        g='v'
        verbose.report('Retrieving dividends instead of prices')
    else:
        g='d'

    urlFmt = 'http://table.finance.yahoo.com/table.csv?a=%d&b=%d&c=%d&d=%d&e=%d&f=%d&s=%s&y=0&g=%s&ignore=.csv'


    url =  urlFmt % (d1[0], d1[1], d1[2],
                     d2[0], d2[1], d2[2], ticker, g)


    if cachename is None:
        cachename = os.path.join(cachedir, md5(url).hexdigest())
    if os.path.exists(cachename):
        fh = open(cachename)
        verbose.report('Using cachefile %s for %s'%(cachename, ticker))
    else:
        mkdirs(cachedir)
        urlfh = urlopen(url)

        fh = open(cachename, 'wb')
        fh.write(urlfh.read())
        fh.close()
        verbose.report('Saved %s data to cache file %s'%(ticker, cachename))
        fh = open(cachename, 'r')

    return fh

parse_yahoo_historical関数は履歴データを解析し、ファイルを読み取り、ファイルの一部の内容を操作することができ、コードは以下の通りである.

def parse_yahoo_historical(fh, adjusted=True, asobject=False):
    """
    Parse the historical data in file handle fh from yahoo finance.

    *adjusted*
      If True (default) replace open, close, high, and low prices with
      their adjusted values. The adjustment is by a scale factor, S =
      adjusted_close/close. Adjusted prices are actual prices
      multiplied by S.

      Volume is not adjusted as it is already backward split adjusted
      by Yahoo. If you want to compute dollars traded, multiply volume
      by the adjusted close, regardless of whether you choose adjusted
      = True|False.


    *asobject*
      If False (default for compatibility with earlier versions)
      return a list of tuples containing

        d, open, close, high, low, volume

      If None (preferred alternative to False), return
      a 2-D ndarray corresponding to the list of tuples.

      Otherwise return a numpy recarray with

        date, year, month, day, d, open, close, high, low,
        volume, adjusted_close

      where d is a floating poing representation of date,
      as returned by date2num, and date is a python standard
      library datetime.date instance.

      The name of this kwarg is a historical artifact.  Formerly,
      True returned a cbook Bunch
      holding 1-D ndarrays.  The behavior of a numpy recarray is
      very similar to the Bunch.

    """

    lines = fh.readlines()

    results = []

    datefmt = '%Y-%m-%d'

    for line in lines[1:]:

        vals = line.split(',')
        if len(vals)!=7:
            continue      # add warning?
        datestr = vals[0]
        #dt = datetime.date(*time.strptime(datestr, datefmt)[:3])
        # Using strptime doubles the runtime. With the present
        # format, we don't need it.
        dt = datetime.date(*[int(val) for val in datestr.split('-')])
        dnum = date2num(dt)
        open, high, low, close =  [float(val) for val in vals[1:5]]
        volume = float(vals[5])
        aclose = float(vals[6])

        results.append((dt, dt.year, dt.month, dt.day,
                        dnum, open, close, high, low, volume, aclose))
    results.reverse()
    d = np.array(results, dtype=stock_dt)
    if adjusted:
        scale = d['aclose'] / d['close']
        scale[np.isinf(scale)] = np.nan
        d['open'] *= scale
        d['close'] *= scale
        d['high'] *= scale
        d['low'] *= scale

    if not asobject:
        # 2-D sequence; formerly list of tuples, now ndarray
        ret = np.zeros((len(d), 6), dtype=np.float)
        ret[:,0] = d['d']
        ret[:,1] = d['open']
        ret[:,2] = d['close']
        ret[:,3] = d['high']
        ret[:,4] = d['low']
        ret[:,5] = d['volume']
        if asobject is None:
            return ret
        return [tuple(row) for row in ret]

    return d.view(np.recarray)  # Close enough to former Bunch return

また、履歴データを操作する必要がない場合は、ローカルファイルにダウンロードして保存するだけで、次のコードを参照できます.

#this example can download the data in finance.yahoo and put in our computers

import os,urllib2,urllib

ticker = 'MSFT'           #the Ticker Symbol
date1 = ( 2012, 1, 1 )    #begining time
date2 = ( 2012, 11, 11 )  #ending time


d1 = (date1[1]-1, date1[2], date1[0])
    
d2 = (date2[1]-1, date2[2], date2[0])

g='d'

urlFmt = 'http://table.finance.yahoo.com/table.csv?a=%d&b=%d&c=%d&d=%d&e=%d&f=%d&s=%s&y=0&g=%s&ignore=.csv'
url =  urlFmt % (d1[0], d1[1], d1[2],
                     d2[0], d2[1], d2[2], ticker, g)  #the url of historical data
print url

path = r'C:\Users\yinyao\Desktop\Python code'  #Saving path
file_name = r'\ticker.csv'                #file name
dest_dir = os.path.join(path,file_name)   #located file
urllib.urlretrieve(url,dest_dir)        #download the data and put in located file

C++中値転送、参照転送、ポインタ転送

「HeadFirstデザインパターン」と「Rubyによるデザインパターン」を読んで Iterator パターン