1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
############################################################
#                                                          #
#         Virtual Laboratory of Statistics in Python       #
#                                                          #
#       Univariate descriptive statistics  (01.06.2017)    #
#                                                          #                
#         Complutense University of Madrid, Spain          #
#                                                          #
#   THIS SCRIPT IS PROVIDED BY THE AUTHORS "AS IS" AND     #
#   CAN BE USED BY ANYONE FOR THE PURPOSES OF EDUCATION    #
#   AND RESEARCH.                                          #
#                                                          #
############################################################
import math
import numpy as np # importing numpy
import scipy.stats as s # importing scipy.stats
import statistics as ss # importing statistics  

# Embedded graphics
import matplotlib.pyplot as plt # importing matplotlib
import seaborn as sns # importing seaborn

import pylab

# Aesthetic parameters of seaborn
sns.set_palette("deep", desat=.6)
sns.set_context(rc={"figure.figsize": (8, 4)})

# Declare here the name of the data file data=np.loadtxt('datafile.dat', skiprows=0)
print(data)

# Measures of centralization, dispersion and form def quadratic_mean(num):
return math.sqrt(sum(n*n for n in num)/len(num))
print('n = ',len(data))
print('Minimum = ',min(data))
print('Maximum = ',max(data))
print('Rank = ',max(data)-min(data))
print('Average = ',ss.mean(data))
print('Geometric mean= ',s.gmean(data))
print('Harmonic mean= ','s.hmean(data)')
print('Quadratic mean=',quadratic_mean(data))
print('Mode = ',s.mode(data))
print('Median = ',np.median(data))
print('Q1 = ',np.percentile(data,25))
print('Q2 = ',np.percentile(data,50))
print('Q3 = ',np.percentile(data,75))
print('Variance =',ss.variance(data))
print('Standard deviation = ',ss.stdev(data))
print('Standard error of the mean =',ss.stdev(data)/math.sqrt(len(data)))
print('Interquatile rank = ',np.percentile(data,75)-np.percentile(data,25))
print('Coefficient of variation = ',(ss.stdev(data)/ss.mean(data))*100)
print()
print('Skewness =',s.skewtest(data))
print()
print('Kurtosis = ',s.kurtosistest(data))
print()
print('Normality test (D’Agostino and Pearson’s ) = ',s.normaltest(data))
print()
# Descriptive statistics # Box-and-Whisker plot # basic plot plt.boxplot(data,0,' ')
# notched plot plt.figure()
plt.boxplot(data, 1,' ')
# change outliers symbols plt.figure()
plt.boxplot(data, 1, 'gD')
# horizontal plot plt.figure()
plt.boxplot(data, 0, 'rs', 0)
# whisker length change plt.figure()
plt.boxplot(data, 0, 'rs', 0, 0.75)

# Histogram # histtype= normed=0 1 'bar' 'step', cumulative=0 1 plt.figure()
numBins = round(1+3.222*math.log10(len(data)))
plt.hist(data,numBins,normed=0,color='green',alpha=0.8, histtype='bar', cumulative=0)

# Scatter plot y_data=[np.random.random() for x in range (0, len(data))]
plt.figure()
plt.scatter(data, y_data, color="red", marker="^")

# Normality probability plot plt.figure()
s.probplot(data, dist="norm", plot=pylab)
pylab.show()