Generate a kernel density function with Jupyter and visualize it. Click here for a description of the kernel density function [https://www.ie-kau.net/entry/kernel_density_est)
The kernel density function seems to have a slightly different shape depending on whether the gaussian_kde function is used or the seabon kdeplot function is used.
/home/sampletest/sample.py
from numpy.random import randn
import seaborn as sns
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
sns.set_style("whitegrid")
from scipy.integrate import cumtrapz #Library for integrating pdf in all intervals
#Give a dataset and generate a kernel density function. It also calculates the cumulative frequency of the kernel density function and
#Cumulative value down,Returns the X value that is up.
def pdf_kernel(dataset,down,up):
d_max=np.max(dataset)+0.2
d_min=np.min(dataset)-0.2
d_kernel = gaussian_kde(dataset) #Generate a kernel density estimation function. Bandwidth is calculated automatically.
#Specify the range to be integrated: d_min to d_Integrate in the range of max.
d_xs = np.linspace(d_min, d_max, num=1000)
#Kernel density function input(d_xs)And output(d_ys)Is defined.
d_ys = d_kernel(d_xs)
#Cumulative distribution function d_Integrate in the range of xs.
d_integral = cumtrapz(d_ys, d_xs)
#cdf(x) = 0.Find x that is 03. d_0 in the integral array.The number closest to 03 is calculated as the number in the array.
idx_d= np.searchsorted(d_integral, down)
#cdf(x) = 0.Find x that is 9. d_0 in the integral array.The number closest to 9 is calculated as the number in the array.
idx_u = np.searchsorted(d_integral, up)
#The graph is displayed.
ax=plt.plot(d_xs, d_ys, label="KDE")
plt.xlim(d_min-1, d_max+1)
#Cumulative value 5%Show range of
plt.fill_between(d_xs[:idx_d], 0, d_ys[:idx_d], facecolor="r", alpha=0.5)
#Cumulative value 90%Show range of
plt.fill_between(d_xs[idx_u:], 0, d_ys[idx_u:], facecolor="r", alpha=0.5)
#Legend displayed in the upper right
plt.legend(loc="upper right")
pdf_val={"down":d_xs[idx_d],"up":d_xs[idx_u]}
return pdf_val
#The main program starts from the following
dataset = randn(50) #Generate 50 random numbers that follow a uniform distribution
down=0.05
up=0.9
#Give a dataset and generate a kernel density function. It also calculates the cumulative frequency of the kernel density function and
#Cumulative value down,Returns the X value that is up.
val=pdf_kernel(dataset,down,up)
#Write a kernel density function using seaborn.
sns.kdeplot(dataset,label="from seaborn")
Execution result </ b> #The range of cumulative value 0 to down (5% in the illustrated example) and the range of cumulative value up (90% in the illustrated example) to 100% are displayed.
Recommended Posts