############################################################
#                                                          #
#         Virtual Laboratory of Statistics in Python       #
#                                                          #
#        Bivariate descriptive statistics  (01.06.2017)    #
#                                                          #                
#         Complutense University of Madrid, Spain          #
#                                                          #
#   THIS SCRIPT IS PROVIDED BY THE AUTHORS "AS IS" AND     #
#   CAN BE USED BY ANYONE FOR THE PURPOSES OF EDUCATION    #
#   AND RESEARCH.                                          #
#                                                          #
############################################################

import numpy as np 
import scipy.stats as s 
import statistics as ss 
import matplotlib.pyplot as plt 

# importing pandas
import pandas
from pandas.tools import plotting

import pylab

# Declare here the name of the data file
# Read data with numpy
col1, col2 = np.loadtxt('datafile.dat', unpack=True)
# Statistical summary
print('                 X         Y           ')
print('================================')
print('n            =',len(col1),'     ',len(col2))
print('Minimum      = %.2f' % min(col1),'     %.2f' %min(col2))
print('Maximum      = %.2f' % max(col1),'     %.2f' % max(col2))
print('Rank         = %.2f' % (max(col1)-min(col1)),'     %.2f' % (max(col2)-min(col2)))
print('Average      = %.2f' % ss.mean(col1),'     %.2f' % ss.mean(col2))
print('Median       = %.2f' % ss.median(col1),'     %.2f' % ss.median(col2))
print('Q1           = %.2f' % np.percentile(col1,25),'     %.2f' % np.percentile(col2,25))
print('Q2           = %.2f' % np.percentile(col1,50),'     %.2f' % np.percentile(col2,50))
print('Q3           = %.2f' % np.percentile(col1,75),'     %.2f' % np.percentile(col2,75))
print('Variance     = %.2f' % ss.variance(col1),'     %.2f' % ss.variance(col2))
print('Stand. dev.  = %.2f' % ss.stdev(col1),'     %.2f' % ss.stdev(col2))
print()
print('================================')
print("Correlation matrix: ")
print(np.corrcoef(col1,col2))
print()
print('================================')
print("Matrix of variance-covariance: ")
print(np.cov(col1,col2))
print()
print('================================')
print()

# Box-and-Whisker plot
# basic plot
plt.boxplot([col1,col2],0,' ')
# notched plot
plt.figure()
plt.boxplot([col1,col2], 1,' ')

# Scatter diagram with pandas
# Read data with pandas
data = pandas.read_csv('spiders.csv')
plotting.scatter_matrix(data, marker='o')

import statsmodels.api as sm
COL1=sm.add_constant(col1)
mod = sm.OLS(col2, COL1)
res = mod.fit()
print (res.summary())
print()

# Method 1. Regression table and scatter diagram
regresionlineal=s.linregress(col1,col2)
print('Linear regression = ',regresionlineal)
plt.figure()
plt.scatter(col1, col2, alpha=0.3)

# Method 2. Draw regression line
m, b = np.polyfit(col1, col2, deg=1)
plt.plot(col1, col2, '.')
plt.plot(col1, m*col1 + b, 'blue')

plt.ylim(0,12)  # Axis limits y
plt.xlabel("X") # X axis legend
plt.ylabel("Y") # Y axis legend
plt.title("Regression adjustment") # Title of the graph
pylab.show()