# One Way ANOVA is a technique to compare the population means from two or more normal distributions. # Assumptions: # Response variable residuals are normally distributed (or approximately normally distribution). # Variances of populations are equal. # Responses for a given group are independent and identically distributed normal random variables (not a simple random sample (SRS)). # http://web.utah.edu/stat/introstats/anovaflash.html # http://courses.statistics.com/software/R/R1way.htm donuts=read.table("http://courses.statistics.com/software/data/donuts.txt",header=T) donuts names(donuts) attach(donuts) sdonuts <- stack(donuts) summary(donuts) summary(sdonuts) attach(sdonuts) # The boxplots suggest there there is a difference among the mean values. boxplot(values~ind) oneway.test(values~ind,var.equal = T) # The test below does the same thing as oneway.test anova(lm(values~ind)) # Case Study 12.2.1 from handout # Following Ch 18 example from 'Statistics with R by GH' ns=c(69,52,71,58,59,65) ls=c(55,60,78,58,62,66) ms=c(66,81,70,77,57,79) hs=c(91,72,81,67,95,84) treatment=c(rep(1,length(ns)),rep(2,length(ls)),rep(3,length(ms)),rep(4,length(hs))) treatmentfactor=factor(treatment,labels=c(1,2,3,4)) y=c(ns,ls,ms,hs) y g=lm(y~treatmentfactor) anova(g) # the test below does the same thing as anova(g) oneway.test(y~treatmentfactor,var.equal = T) # Alternatively one can do the following. df=cbind(ns,ls,ms,hs) class(df) # df is a matrix, this is why stack was not working in class. df=as.data.frame(df) class(df) # data.frame sdf=stack(df) summary(sdf) anova(lm(values~ind,data=sdf)) # Analysis of Variance Table # # Response: values # Df Sum Sq Mean Sq F value Pr(>F) # ind 3 1464.1 488.04 6.1203 0.003979 ** # Residuals 20 1594.8 79.74 # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # From built in R dataset trees, the following two are the same mylm=lm(Girth~Height+Volume,data=trees) anova(mylm) # and anova(aov(Girth~Height+Volume,data=trees))