############################################################ # # # Virtual Laboratory of Geomathematics in Python # # # # General Linear Models: One way ANOVA (27.07.2017) # # # # VLG Team, Complutense University of Madrid, Spain # # # # THIS SCRIPT IS PROVIDED BY THE AUTHORS "AS IS" AND # # CAN BE USED BY ANYONE FOR THE PURPOSES OF EDUCATION # # AND RESEARCH. # # # ############################################################ import math import numpy as np import scipy.stats as s import statistics as ss import matplotlib.pyplot as plt import pylab #from statsmodels.stats.multicomp import pairwise_tukeyhsd from statsmodels.stats.multicomp import MultiComparison # ANOVA ONE-WAY # Example Presence of copper in a species of plant cultivated in three # different soils col1, col2, col3 = np.loadtxt('copper.dat', unpack=True) col_Cu, col_Group = np.loadtxt('copper2.dat', unpack=True) # Box-and-Whisker plot plt.figure() plt.boxplot([col1,col2, col3], 1,' ') # Scatter plot y_data=[np.random.random() for x in range (0, len(col1))] plt.figure() plt.scatter(col1, y_data, color="red", marker="^") y_data=[np.random.random() for x in range (0, len(col2))] plt.figure() plt.scatter(col2, y_data, color="red", marker="+") y_data=[np.random.random() for x in range (0, len(col3))] plt.figure() plt.scatter(col2, y_data, color="red", marker="+") # Normal probability plot plt.figure() s.probplot(col1, dist="norm", plot=pylab) pylab.show() plt.figure() s.probplot(col2, dist="norm", plot=pylab) pylab.show() plt.figure() s.probplot(col3, dist="norm", plot=pylab) pylab.show() # Histogram # histtype= normed=0 1 'bar' 'step', cumulative=0 1 bins1=round(1+3.222*math.log10(len(col1))) bins2=round(1+3.222*math.log10(len(col2))) bins3=round(1+3.222*math.log10(len(col3))) plt.figure() plt.hist(col1,bins1,normed=0,color='green',alpha=0.8, histtype='bar', cumulative=0) plt.hist(col2,bins2,normed=0,color='yellow',alpha=0.8, histtype='bar', cumulative=0) plt.hist(col3,bins3,normed=0,color='blue',alpha=0.8, histtype='bar', cumulative=0) pylab.show() # Normality tests print() print("Normality tests (Sample 1): ") print() normed_datos=(col1-ss.mean(col1))/ss.stdev(col1) kolmogorov_results=s.kstest(normed_datos,'norm') print("Kolmogorov = ",kolmogorov_results) shapiro_results=s.shapiro(col1) print("Shapiro-Wilks = ",shapiro_results) agostino_results=s.mstats.normaltest(col1) print("D’Agostino = ",agostino_results) anderson_results=s.anderson(normed_datos,'norm') print("Anderson-Darling = ",anderson_results) print() print() print("Normality tests (Sample 2): ") print() normed_datos=(col2-ss.mean(col2))/ss.stdev(col2) kolmogorov_results=s.kstest(normed_datos,'norm') print("Kolmogorov = ",kolmogorov_results) shapiro_results=s.shapiro(col2) print("Shapiro-Wilks = ",shapiro_results) agostino_results=s.mstats.normaltest(col2) print("D’Agostino = ",agostino_results) anderson_results=s.anderson(normed_datos,'norm') print("Anderson-Darling = ",anderson_results) print() print() print("Normality tests (Sample 3): ") print() normed_datos=(col3-ss.mean(col3))/ss.stdev(col3) kolmogorov_results=s.kstest(normed_datos,'norm') print("Kolmogorov = ",kolmogorov_results) shapiro_results=s.shapiro(col3) print("Shapiro-Wilks = ",shapiro_results) agostino_results=s.mstats.normaltest(col2) print("D’Agostino = ",agostino_results) anderson_results=s.anderson(normed_datos,'norm') print("Anderson-Darling = ",anderson_results) print() print() print("Test for homogeneity of variance: ") print() bart_result,p_value=s.bartlett(col1,col2,col3) print('Bartlett test: ',bart_result,' p-value = ',p_value) if p_value<0.05: print("homoscedasticity is not met in the ANOVA") print() levene_result,p_value=s.levene(col1,col2,col3) print('Levene test: ',levene_result,' p-value = ',p_value) if p_value<0.05: print("homoscedasticity is not met in the ANOVA") print() # ANOVA one-way print() k=3 # Define the number of treated groups N=33 # Total data or individuals F_result, p_value=s.f_oneway(col1,col2,col3) print ('One-way ANOVA') print ('===============================') print ('F value:', F_result) print ('p-value:', p_value, '\n') print() print('Between Groups df = ', k-1) print('Within Groups df = ',N-k) print() print('Total df = ', N - 1) print ('===============================') print() mc = MultiComparison(col_Cu,col_Group) result = mc.tukeyhsd() print(result) print(mc.groupsunique) print() print() # Kruskal-Wallis print() H_result, p_value=s.kruskal(col1,col2,col3) print ('Kruskal-Wallis H-test') print ('===============================') print ('H value:', H_result) print ('p-value:', p_value, '\n')