# One Way ANOVA is a technique to compare the population means from two or more normal distributions.
# Assumptions:
# Response variable residuals are normally distributed (or approximately normally distribution).
# Variances of populations are equal.
# Responses for a given group are independent and identically distributed normal random variables (not a simple random sample (SRS)).

# http://web.utah.edu/stat/introstats/anovaflash.html
# http://courses.statistics.com/software/R/R1way.htm
donuts=read.table("http://courses.statistics.com/software/data/donuts.txt",header=T)
donuts
names(donuts)
attach(donuts)
sdonuts <- stack(donuts)
summary(donuts)
summary(sdonuts)
attach(sdonuts)
# The boxplots suggest there there is a difference among the mean values.
boxplot(values~ind)
oneway.test(values~ind,var.equal = T)
# The test below does the same thing as oneway.test
anova(lm(values~ind))

# Case Study 12.2.1 from handout
# Following Ch 18 example from 'Statistics with R by GH'
ns=c(69,52,71,58,59,65)
ls=c(55,60,78,58,62,66)
ms=c(66,81,70,77,57,79)
hs=c(91,72,81,67,95,84)
treatment=c(rep(1,length(ns)),rep(2,length(ls)),rep(3,length(ms)),rep(4,length(hs)))
treatmentfactor=factor(treatment,labels=c(1,2,3,4))
y=c(ns,ls,ms,hs)
y
g=lm(y~treatmentfactor)
anova(g)

# the test below does the same thing as anova(g)
oneway.test(y~treatmentfactor,var.equal = T)

# Alternatively one can do the following.
df=cbind(ns,ls,ms,hs)
class(df)
# df is a matrix, this is why stack was not working in class.
df=as.data.frame(df)
class(df)
# data.frame
sdf=stack(df)
summary(sdf)
anova(lm(values~ind,data=sdf))

# Analysis of Variance Table
# 
# Response: values
#            Df Sum Sq Mean Sq F value   Pr(>F)   
# ind        3 1464.1  488.04  6.1203 0.003979 **
# Residuals 20 1594.8   79.74                    
# ---
# Signif. codes:  0 '***' 0.001 '**'  0.01 '*' 0.05 '.'  0.1 ' '  1

# From built in R dataset trees, the following two are the same
mylm=lm(Girth~Height+Volume,data=trees)
anova(mylm)
# and
anova(aov(Girth~Height+Volume,data=trees))