1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
############################################################
#                                                          #
#         Virtual Laboratory of Statistics in Python       #
#                                                          #
#        Bivariate descriptive statistics  (01.06.2017)    #
#                                                          #                
#         Complutense University of Madrid, Spain          #
#                                                          #
#   THIS SCRIPT IS PROVIDED BY THE AUTHORS "AS IS" AND     #
#   CAN BE USED BY ANYONE FOR THE PURPOSES OF EDUCATION    #
#   AND RESEARCH.                                          #
#                                                          #
############################################################

import numpy as np 
import scipy.stats as s
import statistics as ss
import matplotlib.pyplot as plt

# importing pandas import pandas from pandas.tools import plotting

import pylab # Declare here the name of the data file # Read data with numpy col1, col2 = np.loadtxt('datafile.dat', unpack=True)
# Statistical summary print(' X Y ')
print('================================')
print('n =',len(col1),' ',len(col2))
print('Minimum = %.2f' % min(col1),' %.2f' %min(col2))
print('Maximum = %.2f' % max(col1),' %.2f' % max(col2))
print('Rank = %.2f' % (max(col1)-min(col1)),' %.2f' % (max(col2)-min(col2)))
print('Average = %.2f' % ss.mean(col1),' %.2f' % ss.mean(col2))
print('Median = %.2f' % ss.median(col1),' %.2f' % ss.median(col2))
print('Q1 = %.2f' % np.percentile(col1,25),' %.2f' % np.percentile(col2,25))
print('Q2 = %.2f' % np.percentile(col1,50),' %.2f' % np.percentile(col2,50))
print('Q3 = %.2f' % np.percentile(col1,75),' %.2f' % np.percentile(col2,75))
print('Variance = %.2f' % ss.variance(col1),' %.2f' % ss.variance(col2))
print('Stand. dev. = %.2f' % ss.stdev(col1),' %.2f' % ss.stdev(col2))
print()
print('================================')
print("Correlation matrix: ")
print(np.corrcoef(col1,col2))
print()
print('================================')
print("Matrix of variance-covariance: ")
print(np.cov(col1,col2))
print()
print('================================')
print()

# Box-and-Whisker plot # basic plot plt.boxplot([col1,col2],0,' ')
# notched plot plt.figure()
plt.boxplot([col1,col2], 1,' ')

# Scatter diagram with pandas # Read data with pandas data = pandas.read_csv('spiders.csv')
plotting.scatter_matrix(data, marker='o')

import statsmodels.api as sm COL1=sm.add_constant(col1)
mod = sm.OLS(col2, COL1)
res = mod.fit()
print (res.summary())
print()

# Method 1. Regression table and scatter diagram regresionlineal=s.linregress(col1,col2)
print('Linear regression = ',regresionlineal)
plt.figure()
plt.scatter(col1, col2, alpha=0.3)

# Method 2. Draw regression line m, b = np.polyfit(col1, col2, deg=1)
plt.plot(col1, col2, '.')
plt.plot(col1, m*col1 + b, 'blue')

plt.ylim(0,12) # Axis limits y plt.xlabel("X") # X axis legend plt.ylabel("Y") # Y axis legend plt.title("Regression adjustment") # Title of the graph pylab.show()