균등분포,F-분포 헷갈림★PDF,PPF,CDF★표준정규확률 구할땐 CDF★기초통계학-[Chapter06 - 연습문제-09]
1. 표준정규확률변수 Z에 대한 확률 구하기(CDF!!!)
1> P(Z>=2.05)
a = 1- scipy.stats.norm.cdf(2.05)
a
0.02018
2> P(Z<1.11)
a = scipy.stats.norm.cdf(1.11)
print(a)
0.8665
3> P(Z>-1.27)
a = 1 - scipy.stats.norm.cdf(-1.27)
print(a)
0.8979
4> P(-1.02<=Z<=1.02)
a = ( scipy.stats.norm.cdf(1.02) - scipy.stats.norm.cdf(0) ) *2
print(a)
0.6922
2.. X~ N(5,4)
https://knowallworld.tistory.com/254
1> P(X>=4.5)
a = 1 -scipy.stats.norm.cdf(-0.25)
print(a)
==> P(Z>= (4.5 -5) / (루트(4) ) = P(Z>= -0.25) = 0.5987
2> P(X<6.5)
a = scipy.stats.norm.cdf(0.75)
print(a)
==> P(Z < 6.5-5 / (루트(4)) = P(Z < 0.75) = 0.7733
3>P(X<=2.5)
a = scipy.stats.norm.cdf(-1.25)
print(a)
==> P(Z < 2.5-5 / (루트(4)) = P(Z < -1.25) = 0.1056
4>P(3<=X<=7)
a = (scipy.stats.norm.cdf(1) - scipy.stats.norm.cdf(0)) * 2
print(a)
P( (3-5)/루트(4) <= Z <= (7-5)/루트(4) ) = P(-1 <= Z<= 1) = 0.6826
3.. V~ x**2(10) ==> 카이제곱 분포(자유도)
https://knowallworld.tistory.com/258
1> 97.5% 백분위수 x**2_0.025
X = np.arange(0,30 ,.01)
fig = plt.figure(figsize = (15,8))
dof = 10
ax = sns.lineplot(X , scipy.stats.chi2(dof).pdf(X))
X_l = scipy.stats.chi2(dof).ppf(0.025) #==> 1.644
X_r = scipy.stats.chi2(dof).ppf(1- 0.025)
ax.fill_between(X , scipy.stats.chi2(dof).pdf(X) , 0 , where = (X<=X_l) | (X>=X_r))
area = scipy.stats.chi2(dof).cdf(X_l) #넓이 구하기!!!!!
print(area)
ax.text(4 , .015, 'P(X <' + r'$\chi^2_{0.025}$)' + f"= {round(area,4)}",fontsize=15)
plt.annotate('' , xy=(2, .002), xytext=(4 , .014) , arrowprops = dict(facecolor = 'black'))
ax.vlines(x= X_l, ymin= 0 , ymax= scipy.stats.chi2(dof).pdf(X_l) , color = 'black' , linestyle ='solid' , label ='{}'.format(2))
ax.text(X_l + 1, .002, r'$\chi^2_L= {}$'.format(round(X_l,2)) ,fontsize=15)
area = 1- scipy.stats.chi2(dof).cdf(X_r) #넓이 구하기!!!!!
print(area)
ax.text(24 , .015, 'P(X >' + r'$\chi^2_{0.025}$)' + f"= {round(area,4)}",fontsize=15)
plt.annotate('' , xy=(22, .002), xytext=(24 , .014) , arrowprops = dict(facecolor = 'black'))
ax.vlines(x= X_r, ymin= 0 , ymax= scipy.stats.chi2(dof).pdf(X_r) , color = 'black' , linestyle ='solid' , label ='{}'.format(2))
ax.text(X_r - 4, .002, r'$\chi^2_R= {}$'.format(round(X_r,2)) ,fontsize=15)
==> CDF는 넓이 구하기!
==> PPF를 통하여 Z값(원하는 분포값) 얻을 수 있다.
2> P(V > x_0) = 0.995를 만족
P(V<= x_0) = 0.005 (넓이)
==> ppf를 통하여 P(V < X**2_0.005) = P(V< 2.1558) = 0.005
==> 넓이는 cdf로 , ppf로 x_0값 투입 시켜 Z값, V값등을 얻어내자!
x_0 = 2.16
4. T~ t(10) ==> t 분포(자유도)
https://knowallworld.tistory.com/259
1> 95%백분위수 t_0.05
X = np.arange(-5,5 ,.01)
fig = plt.figure(figsize = (15,8))
dof = 10
ax = sns.lineplot(X , scipy.stats.t(dof).pdf(X))
X_l = scipy.stats.t(dof).ppf(0.05) #==> 1.644
X_r = scipy.stats.t(dof).ppf(1- 0.05)
ax.fill_between(X , scipy.stats.t(dof).pdf(X) , 0 , where = (X<=X_l) | (X>=X_r))
area = scipy.stats.t(dof).cdf(X_l) #넓이 구하기!!!!!
print(area)
ax.text(-4, .05, 'P(V <' + r'$\chi_{0}$)' + f"= {round(area,4)}",fontsize=15)
plt.annotate('' , xy=(-2.6, .004), xytext=(-3, .03) , arrowprops = dict(facecolor = 'black'))
ax.vlines(x= X_l, ymin= 0 , ymax= scipy.stats.t(dof).pdf(X_l) , color = 'black' , linestyle ='solid' , label ='{}'.format(2))
ax.text(X_l + 0.4, .004, r'$\chi^2_L= {}$'.format(round(X_l,2)) ,fontsize=15)
area = 1- scipy.stats.t(dof).cdf(X_r) #넓이 구하기!!!!!
print(area)
ax.text(4 , .02, 'P(V >' + r'$\chi_{0}$)' + f"= {round(area,4)}",fontsize=15)
plt.annotate('' , xy=(3.5, .002), xytext=(4 , .014) , arrowprops = dict(facecolor = 'black'))
ax.vlines(x= X_r, ymin= 0 , ymax= scipy.stats.t(dof).pdf(X_r) , color = 'black' , linestyle ='solid' , label ='{}'.format(2))
ax.text(X_r-1 , .002, r'$\chi^2_R= {}$'.format(round(X_r,2)) ,fontsize=15)
X_R = 1.81
2> P(T<=t_0) = 0.995
X = np.arange(-5,5 ,.01)
fig = plt.figure(figsize = (15,8))
dof = 10
ax = sns.lineplot(X , scipy.stats.t(dof).pdf(X))
X_l = scipy.stats.t(dof).ppf(.995) #==> 1.644
# X_r = scipy.stats.t(dof).ppf(1- 0.05)
ax.fill_between(X , scipy.stats.t(dof).pdf(X) , 0 , where = (X<=X_l))
area = scipy.stats.t(dof).cdf(X_l) #넓이 구하기!!!!!
print(area)
ax.text(-4, .05, 'P(T <' + r'$\chi_{0}$)' + f"= {round(area,4)}",fontsize=15)
plt.annotate('' , xy=(-2.6, .004), xytext=(-3, .03) , arrowprops = dict(facecolor = 'black'))
ax.vlines(x= X_l, ymin= 0 , ymax= scipy.stats.t(dof).pdf(X_l) , color = 'black' , linestyle ='solid' , label ='{}'.format(2))
ax.text(X_l + 0.4, .004, r'$\chi^2_L= {}$'.format(round(X_l,2)) ,fontsize=15)
t_0 = 3.17
5. F~ F(8 , 6) ==> F 분포(자유도 1 , 자유도 2)
https://knowallworld.tistory.com/260
1> f_0.01 , 8 , 6
X = np.arange(0,10 ,.01)
fig = plt.figure(figsize = (15,8))
dof = [8,6]
ax = sns.lineplot(X , scipy.stats.f(dof[0] , dof[1]).pdf(X))
X_r = scipy.stats.f(dof[0] , dof[1]).ppf(1- 0.01) #==> 1.644
ax.fill_between(X , scipy.stats.f(dof[0] , dof[1]).pdf(X) , 0 , where = (X>=X_r))
area = scipy.stats.f(dof[0] , dof[1]).cdf(X_r) #넓이 구하기!!!!!
print(area)
ax.text(7, .12, 'P(F <' + r'$f_{0.01 , 8 , 6}$)' + f"= {round(area,4)}",fontsize=15)
plt.annotate('' , xy=(8.1, .005), xytext=(7.1, .1) , arrowprops = dict(facecolor = 'black'))
ax.vlines(x= X_r, ymin= 0 , ymax= scipy.stats.f(dof[0] , dof[1]).pdf(X_r) , color = 'black' , linestyle ='solid' , label ='{}'.format(2))
ax.text(X_r + 0.4, .004, r'$f_{0.01, 8 , 6}' + '= {}$'.format(round(X_r,2)) ,fontsize=15)
b = ['F(8,6)']
plt.legend(b , fontsize= 13)
f_0.01,8,6 = 8.1
2>f_0.05,8,6
X = np.arange(0,10 ,.001)
fig = plt.figure(figsize = (15,8))
dof = [8,6]
ax = sns.lineplot(X , scipy.stats.f(dof[0] , dof[1]).pdf(X))
X_r = scipy.stats.f(dof[0] , dof[1]).ppf(1-0.05) #==> 상위 1%일때 x좌표
ax.fill_between(X , scipy.stats.f(dof[0] , dof[1]).pdf(X) , 0 , where = (X>=X_r))
area = 1- scipy.stats.f(dof[0] , dof[1]).cdf(X_r) #넓이 구하기!!!!!
print(area)
ax.text(7, .12, 'P(F >' + r'$f_{0.05 , 8 , 6}$)' + f"= {round(area,4)}",fontsize=15)
plt.annotate('' , xy=(8.1, .005), xytext=(7.1, .1) , arrowprops = dict(facecolor = 'black'))
ax.vlines(x= X_r, ymin= 0 , ymax= scipy.stats.f(dof[0] , dof[1]).pdf(X_r) , color = 'black' , linestyle ='solid' , label ='{}'.format(2))
ax.text(X_r - 1.4, .004, r'$f_{0.05, 8 , 6}' + '= {}$'.format(round(X_r,2)) ,fontsize=15)
b = ['F(8,6)']
plt.legend(b , fontsize= 13)
f_0.05,8,6 = 4.15
3>f_0.90,8,6
X = np.arange(0,10 ,.001)
fig = plt.figure(figsize = (15,8))
dof = [8,6]
ax = sns.lineplot(X , scipy.stats.f(dof[0] , dof[1]).pdf(X))
X_r = scipy.stats.f(dof[0] , dof[1]).ppf(1-0.9) #==> 상위 1%일때 x좌표
ax.fill_between(X , scipy.stats.f(dof[0] , dof[1]).pdf(X) , 0 , where = (X>=X_r))
area = 1- scipy.stats.f(dof[0] , dof[1]).cdf(X_r) #넓이 구하기!!!!!
print(area)
ax.text(7, .12, 'P(F >' + r'$f_{0.9 , 8 , 6}$)' + f"= {round(area,4)}",fontsize=15)
plt.annotate('' , xy=(8.1, .005), xytext=(7.1, .1) , arrowprops = dict(facecolor = 'black'))
ax.vlines(x= X_r, ymin= 0 , ymax= scipy.stats.f(dof[0] , dof[1]).pdf(X_r) , color = 'black' , linestyle ='solid' , label ='{}'.format(2))
ax.text(X_r + 0.04, .05, r'$f_{0.9, 8 , 6}' + '= {}$'.format(round(X_r,2)) ,fontsize=15)
b = ['F(8,6)']
plt.legend(b , fontsize= 13)
f_0.90,8,6 = 0.37
4>f_0.99,8,6
X = np.arange(0,10 ,.001)
fig = plt.figure(figsize = (15,8))
dof = [8,6]
ax = sns.lineplot(X , scipy.stats.f(dof[0] , dof[1]).pdf(X))
X_r = scipy.stats.f(dof[0] , dof[1]).ppf(1-0.99) #==> 상위 1%일때 x좌표
ax.fill_between(X , scipy.stats.f(dof[0] , dof[1]).pdf(X) , 0 , where = (X>=X_r))
area = 1- scipy.stats.f(dof[0] , dof[1]).cdf(X_r) #넓이 구하기!!!!!
print(area)
ax.text(7, .12, 'P(F >' + r'$f_{0.99 , 8 , 6}$)' + f"= {round(area,4)}",fontsize=15)
plt.annotate('' , xy=(8.1, .005), xytext=(7.1, .1) , arrowprops = dict(facecolor = 'black'))
ax.vlines(x= X_r, ymin= 0 , ymax= scipy.stats.f(dof[0] , dof[1]).pdf(X_r) , color = 'black' , linestyle ='solid' , label ='{}'.format(2))
ax.text(X_r + .2, .05, r'$f_{0.99, 8 , 6}' + '= {}$'.format(round(X_r,2)) ,fontsize=15)
b = ['F(8,6)']
plt.legend(b , fontsize= 13)
f_0.99,8,6 = 0.16
==> 답지가 틀렸다 100%
6. X~ U(-2 , 2) ==> 균등분포
https://knowallworld.tistory.com/252
1> X의 확률밀도함수
-2<=X<=2 ==> 1/4
otherwise ==> 0
2> X의 평균과 분산
3>
확률 평균-표준편차 이상 평균+표준편차 이하
x = Symbol('x')
f_x = 1/4
f_x_m = f_x * x
print(f_x_m)
mean_1 = Integral(f_x_m, (x,(-2 , 2))).doit()
print("평균 : {}".format(mean_1))
f_x_v = f_x_m * x
print(f_x_v)
vars_1 = Integral(f_x_v , (x,-2,2)).doit() - mean_1**2
print("분산 : {}".format(vars_1))
ratio = Integral(f_x , (x,mean_1-math.sqrt(vars_1),mean_1+math.sqrt(vars_1))).doit()
print("확률 평균-표준편차 이상 평균+표준편차 이하는 {}이다.".format(ratio))
0.25*x
평균 : 0
0.25*x**2
분산 : 1.33333333333333
확률 평균-표준편차 이상 평균+표준편차 이하는 0.577350269189626이다.
출처 : [쉽게 배우는 생활속의 통계학] [북스힐 , 이재원]
※혼자 공부 정리용