1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
|
############################################################
# #
# Virtual Laboratory of Statistics in Python #
# #
# Univariate descriptive statistics (01.06.2017) #
# #
# Complutense University of Madrid, Spain #
# #
# THIS SCRIPT IS PROVIDED BY THE AUTHORS "AS IS" AND #
# CAN BE USED BY ANYONE FOR THE PURPOSES OF EDUCATION #
# AND RESEARCH. #
# #
############################################################
import math
import numpy as np # importing numpy
import scipy.stats as s # importing scipy.stats
import statistics as ss # importing statistics
# Embedded graphics
import matplotlib.pyplot as plt # importing matplotlib
import seaborn as sns # importing seaborn
import pylab
# Aesthetic parameters of seaborn
sns.set_palette("deep", desat=.6) sns.set_context(rc={"figure.figsize": (8, 4)})
# Declare here the name of the data file
data=np.loadtxt('datafile.dat', skiprows=0) print(data)
# Measures of centralization, dispersion and form
def quadratic_mean(num): return math.sqrt(sum(n*n for n in num)/len(num)) print('n = ',len(data)) print('Minimum = ',min(data)) print('Maximum = ',max(data)) print('Rank = ',max(data)-min(data)) print('Average = ',ss.mean(data)) print('Geometric mean= ',s.gmean(data)) print('Harmonic mean= ','s.hmean(data)') print('Quadratic mean=',quadratic_mean(data)) print('Mode = ',s.mode(data)) print('Median = ',np.median(data)) print('Q1 = ',np.percentile(data,25)) print('Q2 = ',np.percentile(data,50)) print('Q3 = ',np.percentile(data,75)) print('Variance =',ss.variance(data)) print('Standard deviation = ',ss.stdev(data)) print('Standard error of the mean =',ss.stdev(data)/math.sqrt(len(data))) print('Interquatile rank = ',np.percentile(data,75)-np.percentile(data,25)) print('Coefficient of variation = ',(ss.stdev(data)/ss.mean(data))*100) print() print('Skewness =',s.skewtest(data)) print() print('Kurtosis = ',s.kurtosistest(data)) print() print('Normality test (D’Agostino and Pearson’s ) = ',s.normaltest(data)) print() # Descriptive statistics
# Box-and-Whisker plot
# basic plot
plt.boxplot(data,0,' ') # notched plot
plt.figure() plt.boxplot(data, 1,' ') # change outliers symbols
plt.figure() plt.boxplot(data, 1, 'gD') # horizontal plot
plt.figure() plt.boxplot(data, 0, 'rs', 0) # whisker length change
plt.figure() plt.boxplot(data, 0, 'rs', 0, 0.75)
# Histogram
# histtype= normed=0 1 'bar' 'step', cumulative=0 1
plt.figure() numBins = round(1+3.222*math.log10(len(data))) plt.hist(data,numBins,normed=0,color='green',alpha=0.8, histtype='bar', cumulative=0) # Scatter plot
y_data=[np.random.random() for x in range (0, len(data))] plt.figure() plt.scatter(data, y_data, color="red", marker="^")
# Normality probability plot
plt.figure() s.probplot(data, dist="norm", plot=pylab) pylab.show()
|