――Je souhaite connaître la taille de l'échantillon et la capacité de vérification lors de la conception d'un test de conformité.
Donc, à titre d'exercice, je l'ai implémenté en python sur la base du code source de la fonction power.prop.test de R. Veuillez vous référer à la documentation officielle R pour la signification des arguments. Le code est à la fin de l'article.
Trouvez la taille d'échantillon n pour satisfaire les conditions ci-dessus.
# python
> params_dict = power_prop_test(p1=0.01, p2=0.03, power=0.8, sig_level=0.05, alternative="one_sided")
Sample size: n = 604.845426012357
Probability: p1 = 0.01, p2 = 0.03
sig_level = 0.05,
power = 0.8,
alternative = one_sided
NOTE: n is number in "each" group
> print(params_dict)
{'n': 604.845426012357, 'p1': 0.01, 'p2': 0.03, 'sig_level': 0.05, 'power': 0.8, 'alternative': 'one_sided'}
C'est presque le même que le résultat de la transmission de la même valeur dans R.
# R
> power.prop.test(p1=0.01, p2=0.03, power=0.8, sig.level=0.05, alternative="one.sided")
Two-sample comparison of proportions power calculation
n = 604.8434
p1 = 0.01
p2 = 0.03
sig.level = 0.05
power = 0.8
alternative = one.sided
NOTE: n is number in *each* group
Ensuite, trouvons la puissance de détection (puissance) lorsque la taille de l'échantillon est n = 1000 de la même manière.
#python
> power_prop_test(n=1000, p1=0.01, p2=0.03, sig_level=0.05, alternative="one_sided")
Sample size: n = 1000
Probability: p1 = 0.01, p2 = 0.03
sig_level = 0.05,
power = 0.9398478094069961,
alternative = one_sided
NOTE: n is number in "each" group
#R
> power.prop.test(n=1000, p1=0.01, p2=0.03, sig.level=0.05, alternative="one.sided")
Two-sample comparison of proportions power calculation
n = 1000
p1 = 0.01
p2 = 0.03
sig.level = 0.05
power = 0.9398478
alternative = one.sided
NOTE: n is number in *each* group
Cela prend également presque la même valeur.
Merci pour la lecture.
#Réglez le paramètre souhaité sur Aucun
#l'alternative est"one_sided" "two_sided"Sélectionnez soit
#Renvoie chaque paramètre calculé sous forme de dictionnaire
def power_prop_test(n=None, p1=None, p2=None, sig_level=0.05, power=None,
alternative="one_sided", strict=False,
tol=2.220446049250313e-16**0.25):
from math import sqrt
from scipy.stats import norm
from scipy.optimize import brentq
missing_params = [arg for arg in [n, p1, p2, sig_level, power] if not arg]
num_none = len(missing_params)
if num_none != 1:
raise ValueError("exactly one of 'n', 'p1', 'p2', 'power' and 'sig.level'must be None")
if sig_level is not None:
if sig_level < 0 or sig_level > 1:
raise ValueError("\"sig_level\" must be between 0 and 1")
if power is not None:
if power < 0 or power > 1:
raise ValueError("\"power\" must be between 0 and 1")
if alternative not in ["two_sided", "one_sided"]:
raise ValueError("alternative must be \"two_sided\" or \"one_sided\"")
t_side_dict = {"two_sided":2, "one_sided":1}
t_side = t_side_dict[alternative]
# compute power
def p_body(p1=p1, p2=p2, sig_level=sig_level, n=n, t_side=t_side, strict=strict):
if strict and t_side==2:
qu = norm.ppf(1-sig_level/t_side)
d = abs(p1-p2)
q1 = 1-p1
q2 = 1-p2
pbar = (p1 + p2) / 2
qbar = 1 - pbar
v1 = p1 * q1
v2 = p2 * q2
vbar = pbar * qbar
power_value = (norm.cdf((sqrt(n) * d - qu * sqrt(2 * vbar))/sqrt(v1 + v2))
+ norm.cdf(-(sqrt(n) * d + qu * sqrt(2 * vbar))/sqrt(v1 + v2)))
return power_value
else:
power_value = norm.cdf((sqrt(n) * abs(p1 - p2) - (-norm.ppf(sig_level / t_side)
* sqrt((p1 + p2) * (1 - (p1 + p2)/2)))) / sqrt(p1 *
(1 - p1) + p2 * (1 - p2)))
return power_value
if power is None:
power = p_body()
# solve the equation for the None value argument
elif n is None:
def n_solver(x, power=power):
return p_body(n=x) - power
n = brentq(f=n_solver, a=1, b=1e+07, rtol=tol, maxiter=1000000)
elif p1 is None:
def p1_solver(x, power=power):
return p_body(p1=x) - power
try:
p1 = brentq(f=p1_solver, a=0, b=p2, rtol=tol, maxiter=1000000)
except:
ValueError("No p1 in [0, p2] can be found to achive the desired power")
elif p2 is None:
def p2_solver(x, power=power):
return p_body(p2=x) - power
try:
p2 = brentq(f=p2_solver, a=p1, b=1, rtol=tol, maxiter=1000000)
except:
VealueError("No p2 in [p1, 1] can be found to achive the desired power")
elif sig_level is None:
def sig_level_solver(x, power=power):
return p_body(sig_level=x) - power
try:
sig_level = brentq(f=sig_level_solver, a=1e-10, b=1-1e-10, rtol=tol, maxiter=1000000)
except:
ValueError("No significance level [0,1] can be found to achieve the desired power")
print("""
Sample size: n = {0}
Probability: p1 = {1}, p2 = {2}
sig_level = {3},
power = {4},
alternative = {5}
NOTE: n is number in "each" group
""".format(n, p1, p2, sig_level, power, alternative))
#Chaque paramètre est renvoyé dans un dictionnaire avec le nom de l'argument comme clé
params_dict = {"n":n, "p1":p1, "p2":p2,
"sig_level":sig_level, "power":power, "alternative":alternative}
return params_dict
Recommended Posts