1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
|
############################################################
# #
# Virtual Laboratory of Statistics in Python #
# #
# Bivariate descriptive statistics (01.06.2017) #
# #
# Complutense University of Madrid, Spain #
# #
# THIS SCRIPT IS PROVIDED BY THE AUTHORS "AS IS" AND #
# CAN BE USED BY ANYONE FOR THE PURPOSES OF EDUCATION #
# AND RESEARCH. #
# #
############################################################
import numpy as np import scipy.stats as s import statistics as ss import matplotlib.pyplot as plt
# importing pandas
import pandas
from pandas.tools import plotting
import pylab
# Declare here the name of the data file
# Read data with numpy
col1, col2 = np.loadtxt('datafile.dat', unpack=True) # Statistical summary
print(' X Y ') print('================================') print('n =',len(col1),' ',len(col2)) print('Minimum = %.2f' % min(col1),' %.2f' %min(col2)) print('Maximum = %.2f' % max(col1),' %.2f' % max(col2)) print('Rank = %.2f' % (max(col1)-min(col1)),' %.2f' % (max(col2)-min(col2))) print('Average = %.2f' % ss.mean(col1),' %.2f' % ss.mean(col2)) print('Median = %.2f' % ss.median(col1),' %.2f' % ss.median(col2)) print('Q1 = %.2f' % np.percentile(col1,25),' %.2f' % np.percentile(col2,25)) print('Q2 = %.2f' % np.percentile(col1,50),' %.2f' % np.percentile(col2,50)) print('Q3 = %.2f' % np.percentile(col1,75),' %.2f' % np.percentile(col2,75)) print('Variance = %.2f' % ss.variance(col1),' %.2f' % ss.variance(col2)) print('Stand. dev. = %.2f' % ss.stdev(col1),' %.2f' % ss.stdev(col2)) print() print('================================') print("Correlation matrix: ") print(np.corrcoef(col1,col2)) print() print('================================') print("Matrix of variance-covariance: ") print(np.cov(col1,col2)) print() print('================================') print()
# Box-and-Whisker plot
# basic plot
plt.boxplot([col1,col2],0,' ') # notched plot
plt.figure() plt.boxplot([col1,col2], 1,' ')
# Scatter diagram with pandas
# Read data with pandas
data = pandas.read_csv('spiders.csv') plotting.scatter_matrix(data, marker='o')
import statsmodels.api as sm
COL1=sm.add_constant(col1) mod = sm.OLS(col2, COL1) res = mod.fit() print (res.summary()) print()
# Method 1. Regression table and scatter diagram
regresionlineal=s.linregress(col1,col2) print('Linear regression = ',regresionlineal) plt.figure() plt.scatter(col1, col2, alpha=0.3)
# Method 2. Draw regression line
m, b = np.polyfit(col1, col2, deg=1) plt.plot(col1, col2, '.') plt.plot(col1, m*col1 + b, 'blue')
plt.ylim(0,12) # Axis limits y
plt.xlabel("X") # X axis legend
plt.ylabel("Y") # Y axis legend
plt.title("Regression adjustment") # Title of the graph
pylab.show()
|