Gnocchi:6、gnocchiに基づく時系列アルゴリズムdemo実現

4887 ワード

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @File    : scipy_demo.py
# @Software: PyCharm

'''
  : 
https://github.com/gnocchixyz/gnocchi/tree/3.1.4

  gnocchi          demo
gnocchi    :
  1:     ts   ts.index           indexes
  2:       indexes  numpy.unique     uniqeIndexes
  3:  ndimage.mean  ,      
ndimage.mean(ts.value , labels=indexes, index=uniqueIndexes)
        aggregatedValues
  4:  uniqueIndexes   datetime64[ns]   numpy  
timestamps
  5:      3   aggregatedValues   4   timestamps
        ,                newTimeSerie
  6:           n,      ,  newTimeSerie[-n:]
              


  :
scipy.ndimage.measurements.mean(input, labels=None, index=None)[source]
  :     labels     
  : 
input:  ,
labels:    ,   。           
         ,  。
      label               。
index:       
   :  




       :
 (a // b) * b:            b      a  
 (numpy.array(ts.index, 'float') // freq) * freq:
                 ,     freq  ,         
   :
 1,2,3,4,5,6,7,8,9
 freq=3
           
 0 0 3 3 3 6 6 6 9
             ,  freq       
'''

'''
ref:
https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.measurements.mean.html
scipy.ndimage.measurements.mean

scipy.ndimage.measurements.mean(input, labels=None, index=None)[source]
Calculate the mean of the values of an array at labels.

Parameters:	
input : array_like
Array on which to compute the mean of elements over distinct regions.
labels : array_like, optional
Array of labels of same shape, or broadcastable to the same shape as input. All elements sharing the same label form one region over which the mean of the elements is computed.
index : int or sequence of ints, optional
Labels of the objects over which the mean is to be computed. Default is None, in which case the mean for all values where label is greater than 0 is calculated.
Returns:	
out : list
Sequence of same length as index, with the mean of the different regions labeled by the labels in index.
See also
ndimage.variance, ndimage.standard_deviation, ndimage.minimum, ndimage.maximum, ndimage.sum, ndimage.label

scipy.ndimage.measurements.mean(input, labels=None, index=None)[source]
  :     labels     
  : 
input:  ,
labels:    ,   。           
         ,  。
      label               。
index:       
   :  

Examples

>>>
>>> a = np.arange(25).reshape((5,5))
>>> labels = np.zeros_like(a)
>>> labels[3:5,3:5] = 1
>>> index = np.unique(labels)
>>> labels
array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 1, 1],
       [0, 0, 0, 1, 1]])
>>> index
array([0, 1])
>>> ndimage.mean(a, labels=labels, index=index)
[10.285714285714286, 21.0]

'''

import numpy as np
from scipy import ndimage
import numpy
import pandas as pd
from scipy import ndimage


def aggregateGnocchiTimeSerie():
    #   0:         
    dates = pd.DatetimeIndex(['2018-04-18 11:20:30', '2018-04-18 11:21:30',
                              '2018-04-18 11:22:30', '2018-04-18 11:23:30',
                              '2018-04-18 11:24:30', '2018-04-18 11:25:30',
                              '2018-04-18 11:26:30', '2018-04-18 11:27:30',
                              '2018-04-18 11:28:30', '2018-04-18 11:29:30',
                              '2018-04-18 11:30:30', '2018-04-18 11:31:30',])
    print dates
    ts = pd.Series(np.arange(12), index = dates)
    print "step 0 ############ time series:"
    print ts
    granularity = 300.0
    freq = granularity * 10e8
    floatIndexes = numpy.array(ts.index, 'float')
    print "############ float indexes:"
    print floatIndexes
    #   1:                   
    indexes = (floatIndexes // freq) * freq
    print "step 1 ############ group indexes:"
    print indexes
    #   2:             
    uniqueIndexes, counts = numpy.unique(indexes , return_counts=True)
    print "step 2############ unique indexes:"
    print uniqueIndexes
    print "############ values"
    print ts.values
    #   3:         ,    ,          
    values = ndimage.mean(ts.values, labels=indexes, index=uniqueIndexes)
    print "step 3 ############ gnocchi mean aggregated result"
    print values
    #   4:                  
    timestamps = numpy.array(uniqueIndexes, 'datetime64[ns]')
    print "step 4 ############ recover unique indexes"
    print timestamps
    #   5:                        
    timestamps = pd.to_datetime(timestamps)
    print timestamps
    newTimeSerie = pd.Series(values, timestamps)
    print "step 5 ############ get aggregated time serie"
    print newTimeSerie


if __name__ == "__main__":
    aggregateGnocchiTimeSerie()