Bei der Analyse von Zeitreihendaten ist es manchmal erforderlich, eine Teilzeitreihe zu extrahieren und Merkmale zu extrahieren. Die folgenden Funktionen können extrahiert werden.
from scipy import fftpack, signal
import scipy
import numpy as np
import matplotlib.pyplot as plt
def pentropy(y, fs):
f, Pxx = signal.periodogram(y, fs)
pk = Pxx / np.mean(Pxx)
S = scipy.stats.entropy(pk)
return S
def generate_features(y, x, fs=1, order=5):
"""
Parameters
----------
y : np.ndarray
Wellendaten
x : np.ndarray
Zeit etc.
fs : int
Abtastfrequenz
order : int
Spitzenerkennungsintervall
Returns
----------
np.ndarray
Merkmalsvektor
"""
# norm
L1 = np.linalg.norm(y, ord=1)
L2 = np.linalg.norm(y, ord=2)
# mean
mean = np.mean(y)
# Standard deviation
std = np.std(y)
# skewness
skew = scipy.stats.skew(y)
# kurtosis
kurtosis = scipy.stats.kurtosis(y)
# mad
mad = np.linalg.norm(y - mean, ord=1)
# percentile
percentil_calc = np.percentile(y, [0, 1, 25, 50, 75, 99, 100])
# relative_percentile
relative_percentile = percentil_calc - mean
# value range
max_range = percentil_calc[-1] - percentil_calc[0]
# asymmetry
v_max = relative_percentile[-1]
v_min = relative_percentile[0]
asymmetry = v_max + v_min
#Index des Spitzenwerts abrufen
maxid = signal.argrelmax(y, order=order) #Maximalwert
minid = signal.argrelmin(y, order=order) #Mindestwert
# max height of peaks
max_height = np.max(y[maxid])
# min height of peaks
min_height = np.min(y[minid])
# peak height diff
peak_width = max_height - min_height
# mean value of peak's width
x_p = np.append(x[maxid], x[minid])
mean_width_all = np.mean(x_p)
# number of peaks
num_peak = len(x_p)
# max, min and their diff, mean of peak's width
diff1 = np.diff(x_p)
width_max = np.max(diff1)
width_min = np.min(diff1)
width_diff = width_max - width_min
width_mean = np.mean(diff1)
width_median = np.median(diff1)
# spectral entropy
se = pentropy(y, fs)
# merge to features (15features)
features = np.concatenate([np.asarray(
[L1, L2, mean, std, skew, kurtosis, mad, max_range,
asymmetry, max_height, min_height, peak_width,
mean_width_all, num_peak, width_max, width_min, width_diff,
width_mean, width_median, se]
), relative_percentile])
return features
x = np.linspace(0, 10, 100)
yorg = np.sin(x)
y = yorg + np.random.randn(100)*0.5
features = generate_features(y, x, order=5)