Gnocchi:6、gnocchiに基づく時系列アルゴリズムdemo実現
4887 ワード
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @File : scipy_demo.py
# @Software: PyCharm
'''
:
https://github.com/gnocchixyz/gnocchi/tree/3.1.4
gnocchi demo
gnocchi :
1: ts ts.index indexes
2: indexes numpy.unique uniqeIndexes
3: ndimage.mean ,
ndimage.mean(ts.value , labels=indexes, index=uniqueIndexes)
aggregatedValues
4: uniqueIndexes datetime64[ns] numpy
timestamps
5: 3 aggregatedValues 4 timestamps
, newTimeSerie
6: n, , newTimeSerie[-n:]
:
scipy.ndimage.measurements.mean(input, labels=None, index=None)[source]
: labels
:
input: ,
labels: , 。
, 。
label 。
index:
:
:
(a // b) * b: b a
(numpy.array(ts.index, 'float') // freq) * freq:
, freq ,
:
1,2,3,4,5,6,7,8,9
freq=3
0 0 3 3 3 6 6 6 9
, freq
'''
'''
ref:
https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.measurements.mean.html
scipy.ndimage.measurements.mean
scipy.ndimage.measurements.mean(input, labels=None, index=None)[source]
Calculate the mean of the values of an array at labels.
Parameters:
input : array_like
Array on which to compute the mean of elements over distinct regions.
labels : array_like, optional
Array of labels of same shape, or broadcastable to the same shape as input. All elements sharing the same label form one region over which the mean of the elements is computed.
index : int or sequence of ints, optional
Labels of the objects over which the mean is to be computed. Default is None, in which case the mean for all values where label is greater than 0 is calculated.
Returns:
out : list
Sequence of same length as index, with the mean of the different regions labeled by the labels in index.
See also
ndimage.variance, ndimage.standard_deviation, ndimage.minimum, ndimage.maximum, ndimage.sum, ndimage.label
scipy.ndimage.measurements.mean(input, labels=None, index=None)[source]
: labels
:
input: ,
labels: , 。
, 。
label 。
index:
:
Examples
>>>
>>> a = np.arange(25).reshape((5,5))
>>> labels = np.zeros_like(a)
>>> labels[3:5,3:5] = 1
>>> index = np.unique(labels)
>>> labels
array([[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 1, 1],
[0, 0, 0, 1, 1]])
>>> index
array([0, 1])
>>> ndimage.mean(a, labels=labels, index=index)
[10.285714285714286, 21.0]
'''
import numpy as np
from scipy import ndimage
import numpy
import pandas as pd
from scipy import ndimage
def aggregateGnocchiTimeSerie():
# 0:
dates = pd.DatetimeIndex(['2018-04-18 11:20:30', '2018-04-18 11:21:30',
'2018-04-18 11:22:30', '2018-04-18 11:23:30',
'2018-04-18 11:24:30', '2018-04-18 11:25:30',
'2018-04-18 11:26:30', '2018-04-18 11:27:30',
'2018-04-18 11:28:30', '2018-04-18 11:29:30',
'2018-04-18 11:30:30', '2018-04-18 11:31:30',])
print dates
ts = pd.Series(np.arange(12), index = dates)
print "step 0 ############ time series:"
print ts
granularity = 300.0
freq = granularity * 10e8
floatIndexes = numpy.array(ts.index, 'float')
print "############ float indexes:"
print floatIndexes
# 1:
indexes = (floatIndexes // freq) * freq
print "step 1 ############ group indexes:"
print indexes
# 2:
uniqueIndexes, counts = numpy.unique(indexes , return_counts=True)
print "step 2############ unique indexes:"
print uniqueIndexes
print "############ values"
print ts.values
# 3: , ,
values = ndimage.mean(ts.values, labels=indexes, index=uniqueIndexes)
print "step 3 ############ gnocchi mean aggregated result"
print values
# 4:
timestamps = numpy.array(uniqueIndexes, 'datetime64[ns]')
print "step 4 ############ recover unique indexes"
print timestamps
# 5:
timestamps = pd.to_datetime(timestamps)
print timestamps
newTimeSerie = pd.Series(values, timestamps)
print "step 5 ############ get aggregated time serie"
print newTimeSerie
if __name__ == "__main__":
aggregateGnocchiTimeSerie()