## 定义数据处理类TimeSeriesDiff, 对数据作data.diff()/data.shift(1)处理 class TimeSeriesDiff(BaseEstimator, TransformerMixin): def __init__(self, k=1): self.k = k def fit(self, X, y=None): return self def transform(self, X, y=None): if type(X) is pd.core.frame.DataFrame or type(X) is pd.core.series.Series: return X.diff(self.k) / X.shift(self.k) else: raise Exception("Have to be a pandas data frame or Series object!")
## 定义数据处理类TimeSeriesEmbedder,每k个数据组成新的数据,共有N-k组数据 class TimeSeriesEmbedder(BaseEstimator, TransformerMixin): def __init__(self, k): self.k = k def fit(self, X, y= None): return self def transform(self, X, y = None): return embed_time_series(X, self.k)
def embed_time_series(x, k): n = len(x) if k >= n: raise Exception("Can not deal with k greater than the length of x") output_x = list(map(lambda i: list(x[i:(i+k)]), range(0, n-k))) return np.array(output_x)