1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
|
############################################################
# #
# Virtual Laboratory of Statistics in Python #
# #
# Inferential statistics with one population (01.06.2017) #
# #
# Complutense University of Madrid, Spain #
# #
# THIS SCRIPT IS PROVIDED BY THE AUTHORS "AS IS" AND #
# CAN BE USED BY ANYONE FOR THE PURPOSES OF EDUCATION #
# AND RESEARCH. #
# #
############################################################
import math import numpy as np import scipy.stats as s import matplotlib.pyplot as plt import pylab
# ONE POPULATION
# Declare here the name of the data file
data=np.loadtxt('datafile.dat', skiprows=1) print(data)
# Statistical summary
n, min_max, mean, var, skew, kurt = s.describe(data) std=math.sqrt(var) print() print("==============================================================") print("n = ",n) print('Average = ',mean) print('Median = ',np.median(data)) print('Variance =',var) print('Stand. dev. = ',std) print('Stand. error of the mean =',std/math.sqrt(n)) print("==============================================================") print()
# Box-and-Whisker plot
plt.figure() plt.boxplot(data, 1, 'rs', 0)
# Scatter plot
y_data=[np.random.random() for x in range (0, len(data))] plt.figure() plt.scatter(data, y_data, color="red", marker="^")
# Normal probability plot
plt.figure() s.probplot(data, dist="norm", plot=pylab) pylab.show()
# Gaussian histogram
plt.hist(data) plt.title("Gaussian Histogram") plt.xlabel("Value") plt.ylabel("Frequency") plt.show()
# Histogram
# histtype= normed=0 1 'bar' 'step', cumulative=0 1
bins=round(1+3.222*math.log10(len(data))) plt.figure() plt.hist(data,bins,normed=0,color='green',alpha=0.8, histtype='bar', cumulative=0) pylab.show()
# Normality test
print() print("Normality tests: ") print() normed_data=(data-mean)/std kolmogorov_results=s.kstest(normed_data,'norm') print("Kolmogorov = ",kolmogorov_results) shapiro_results=s.shapiro(data) print("Shapiro-Wilks = ",shapiro_results) agostino_results=s.mstats.normaltest(data) print("D’Agostino = ",agostino_results) anderson_results=s.anderson(normed_data,'norm') print("Anderson-Darling = ",anderson_results) print()
# t-test one population
# Set up H0: Mean
H0=1.045
print("H0 = ",mean," :") print() # test
t_stat, p_value_t = s.ttest_1samp(data,H0) print("t-test : ",t_stat," p_value : ",p_value_t) if p_value_t>=0.05: print("ACCEPT H0: Mean = ",mean) else: print("REJECT H0: Mean = ",mean) print()
# Wilcoxon Signed-Rank Test
# H0: Median
print("H0 = ",np.median(data)," :") print() # test
w_stat, p_value_w = s.wilcoxon(data-np.median(data)) print("Wilcoxon test : ",w_stat," p_value : ",p_value_w) if p_value_w>=0.05: print("ACCEPT H0: Median = ",np.median(data)) else: print("REJECT H0: Median = :",np.median(data)) print()
# Confidence Intervals
def sdev_interval(alpha, std, data): df = n-1
chi2upper = math.sqrt(s.chi2.ppf(1-alpha/2.0, df)) chi2lower = math.sqrt(s.chi2.ppf(alpha/2.0, df)) LRL = std * math.sqrt(n)/ chi2upper URL = std * math.sqrt(n)/ chi2lower return (LRL, URL) def var_interval(alpha, var, data): LRL, URL = sdev_interval(alpha, math.sqrt(var), data) return (LRL*LRL, URL*URL)
# Choose level of significance
alpha=0.05
ci_mean = s.norm.interval(1-alpha,loc=mean,scale=std/math.sqrt(n)) print("Confidence intervals. Level of significance = ",alpha," :") print() print(" Mean : ",ci_mean)
ci_sdev = sdev_interval(alpha, std, data) print(" Stand. dev. : ",ci_sdev) ci_var = var_interval(alpha,var,data) print(" Variance : ",ci_var) print()
|