1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
############################################################
#                                                          #
#         Virtual Laboratory of Statistics in Python       #
#                                                          #
#    General Linear Models: Linear Regression (01.08.2017) #
#                                                          #                
#         Complutense University of Madrid, Spain          #
#                                                          #
#   THIS SCRIPT IS PROVIDED BY THE AUTHORS "AS IS" AND     #
#   CAN BE USED BY ANYONE FOR THE PURPOSES OF EDUCATION    #
#   AND RESEARCH.                                          #
#                                                          #
############################################################
import math
import numpy as np 
import scipy.stats as s
import statistics as ss
import statsmodels.api as sm import matplotlib.pyplot as plt

import pylab # LINEAR REGRESSION # Declare here the name of the data file col1, col2 = np.loadtxt('datafile.dat', unpack=True)

# Box-and-Whisker plot plt.figure()
plt.boxplot([col1,col2], 1,' ')

# Scatter plot y_data=[np.random.random() for x in range (0, len(col1))]
plt.figure()
plt.scatter(col1, y_data, color="red", marker="^")

y_data=[np.random.random() for x in range (0, len(col2))]
plt.figure()
plt.scatter(col2, y_data, color="red", marker="+")

# Normal probability plot plt.figure()
s.probplot(col1, dist="norm", plot=pylab)
pylab.show()

plt.figure()
s.probplot(col2, dist="norm", plot=pylab)
pylab.show()

# Gaussian histogram plt.hist(col1)
plt.title("Gaussian Histogram")
plt.xlabel("Value")
plt.ylabel("Frequency")
plt.show()

plt.hist(col2)
plt.title("Gaussian Histogram")
plt.xlabel("Value")
plt.ylabel("Frequency")
plt.show()

# Histogram # histtype= normed=0 1 'bar' 'step', cumulative=0 1 bins1=round(1+3.222*math.log10(len(col1)))
bins2=round(1+3.222*math.log10(len(col2)))
plt.figure()
plt.hist(col1,bins1,normed=0,color='green',alpha=0.8, histtype='bar', cumulative=0)
plt.hist(col2,bins2,normed=0,color='yellow',alpha=0.8, histtype='bar', cumulative=0)
pylab.show()


# Normality test print()
print("Normality tests (Variable 1): ")
print()
normed_datos=(col1-ss.mean(col1))/ss.stdev(col1)
kolmogorov_results=s.kstest(normed_datos,'norm')
print("Kolmogorov = ",kolmogorov_results)
shapiro_results=s.shapiro(col1)
print("Shapiro-Wilks = ",shapiro_results)
agostino_results=s.mstats.normaltest(col1)
print("D’Agostino = ",agostino_results)
anderson_results=s.anderson(normed_datos,'norm')
print("Anderson-Darling = ",anderson_results)
print()
print()
print("Normality tests (Variable 2): ")
print()
normed_datos=(col2-ss.mean(col2))/ss.stdev(col2)
kolmogorov_results=s.kstest(normed_datos,'norm')
print("Kolmogorov = ",kolmogorov_results)
shapiro_results=s.shapiro(col2)
print("Shapiro-Wilks = ",shapiro_results)
agostino_results=s.mstats.normaltest(col2)
print("D’Agostino = ",agostino_results)
anderson_results=s.anderson(normed_datos,'norm')
print("Anderson-Darling = ",anderson_results)
print()
print()

# Simple linear regression print()
# Method 1 slope_result, intercept_result, r_value, p_value, std_err_result=s.linregress(col1,col2)
print ('Linear regression ')
print ('===============================')
print ('slope:', slope_result)
print ('intercept:', intercept_result)
print ('correlation coefficient:', r_value)
print ('p-value:', p_value)
print ('Standard error of the estimate:',std_err_result , '\n')
print()
print ('===============================')
print()
# Method 2: Note that X=col1 e Y=col2 print()
results = sm.OLS(col2,sm.add_constant(col1)).fit()
print (results.summary())
plt.scatter(col1,col2)
m, b = np.polyfit(col1, col2, deg=1)
plt.plot(col1, col2, '.')
plt.plot(col1, m*col1 + b, 'blue')
plt.show()