############################################################ # # # Virtual Laboratory of Geomathematics in Python # # # # Bivariate descriptive statistics (01.06.2017) # # # # VLG Team, Complutense University of Madrid, Spain # # # # THIS SCRIPT IS PROVIDED BY THE AUTHORS "AS IS" AND # # CAN BE USED BY ANYONE FOR THE PURPOSES OF EDUCATION # # AND RESEARCH. # # # ############################################################ import numpy as np import scipy.stats as s import statistics as ss import matplotlib.pyplot as plt # importing pandas import pandas from pandas.tools import plotting import pylab # Example of measurements of the tarsus and femur in fossil spiders # Read data with numpy col1, col2 = np.loadtxt('spiders.dat', unpack=True) # Statistical summary print(' X Y ') print('================================') print('n =',len(col1),' ',len(col2)) print('Minimum = %.2f' % min(col1),' %.2f' %min(col2)) print('Maximum = %.2f' % max(col1),' %.2f' % max(col2)) print('Rank = %.2f' % (max(col1)-min(col1)),' %.2f' % (max(col2)-min(col2))) print('Average = %.2f' % ss.mean(col1),' %.2f' % ss.mean(col2)) print('Median = %.2f' % ss.median(col1),' %.2f' % ss.median(col2)) print('Q1 = %.2f' % np.percentile(col1,25),' %.2f' % np.percentile(col2,25)) print('Q2 = %.2f' % np.percentile(col1,50),' %.2f' % np.percentile(col2,50)) print('Q3 = %.2f' % np.percentile(col1,75),' %.2f' % np.percentile(col2,75)) print('Variance = %.2f' % ss.variance(col1),' %.2f' % ss.variance(col2)) print('Stand. dev. = %.2f' % ss.stdev(col1),' %.2f' % ss.stdev(col2)) print() print('================================') print("Correlation matrix: ") print(np.corrcoef(col1,col2)) print() print('================================') print("Matrix of variance-covariance: ") print(np.cov(col1,col2)) print() print('================================') print() # Box-and-Whisker plot # basic plot plt.boxplot([col1,col2],0,' ') # notched plot plt.figure() plt.boxplot([col1,col2], 1,' ') # Scatter diagram with pandas # Read data with pandas data = pandas.read_csv('spiders.csv') plotting.scatter_matrix(data, marker='o') import statsmodels.api as sm COL1=sm.add_constant(col1) mod = sm.OLS(col2, COL1) res = mod.fit() print (res.summary()) print() # Method 1. Regression table and scatter diagram regresionlineal=s.linregress(col1,col2) print('Linear regression = ',regresionlineal) plt.figure() plt.scatter(col1, col2, alpha=0.3) # Method 2. Draw regression line m, b = np.polyfit(col1, col2, deg=1) plt.plot(col1, col2, '.') plt.plot(col1, m*col1 + b, 'blue') plt.ylim(0,12) # Axis limits y plt.xlabel("X") # X axis legend plt.ylabel("Y") # Y axis legend plt.title("Regression adjustment") # Title of the graph pylab.show()