When analyzing time series data, it is sometimes necessary to extract partial time series and extract features. The following features can be extracted.
from scipy import fftpack, signal
import scipy
import numpy as np
import matplotlib.pyplot as plt
def pentropy(y, fs):
f, Pxx = signal.periodogram(y, fs)
pk = Pxx / np.mean(Pxx)
S = scipy.stats.entropy(pk)
return S
def generate_features(y, x, fs=1, order=5):
"""
Parameters
----------
y : np.ndarray
Wave data
x : np.ndarray
Time etc.
fs : int
Sample frequency
order : int
Peak detection interval
Returns
----------
np.ndarray
Feature vector
"""
# norm
L1 = np.linalg.norm(y, ord=1)
L2 = np.linalg.norm(y, ord=2)
# mean
mean = np.mean(y)
# Standard deviation
std = np.std(y)
# skewness
skew = scipy.stats.skew(y)
# kurtosis
kurtosis = scipy.stats.kurtosis(y)
# mad
mad = np.linalg.norm(y - mean, ord=1)
# percentile
percentil_calc = np.percentile(y, [0, 1, 25, 50, 75, 99, 100])
# relative_percentile
relative_percentile = percentil_calc - mean
# value range
max_range = percentil_calc[-1] - percentil_calc[0]
# asymmetry
v_max = relative_percentile[-1]
v_min = relative_percentile[0]
asymmetry = v_max + v_min
#Get index of peak value
maxid = signal.argrelmax(y, order=order) #Maximum value
minid = signal.argrelmin(y, order=order) #minimum value
# max height of peaks
max_height = np.max(y[maxid])
# min height of peaks
min_height = np.min(y[minid])
# peak height diff
peak_width = max_height - min_height
# mean value of peak's width
x_p = np.append(x[maxid], x[minid])
mean_width_all = np.mean(x_p)
# number of peaks
num_peak = len(x_p)
# max, min and their diff, mean of peak's width
diff1 = np.diff(x_p)
width_max = np.max(diff1)
width_min = np.min(diff1)
width_diff = width_max - width_min
width_mean = np.mean(diff1)
width_median = np.median(diff1)
# spectral entropy
se = pentropy(y, fs)
# merge to features (15features)
features = np.concatenate([np.asarray(
[L1, L2, mean, std, skew, kurtosis, mad, max_range,
asymmetry, max_height, min_height, peak_width,
mean_width_all, num_peak, width_max, width_min, width_diff,
width_mean, width_median, se]
), relative_percentile])
return features
x = np.linspace(0, 10, 100)
yorg = np.sin(x)
y = yorg + np.random.randn(100)*0.5
features = generate_features(y, x, order=5)
Recommended Posts