Anova for Simple Linear Regression

t_cars <- t(cars) # transpose cars

grand_mean <- mean(t_cars[2,]) # Grand mean

xbar <- mean(t_cars[1,]) # mean of x

ybar <- mean(t_cars[2,]) # mean of y



xxbar <- rep(0, length(t_cars[1,]))

yybar <- rep(0, length(t_cars[2,]))

for(i in 1:length(t_cars[1,])){  #calculate x-xbar and y-ybar
  
  xxbar[i] <- (t_cars[1,][i] - xbar)
  
  yybar[i] <- t_cars[2,][i] - ybar
}

ssxx <- sum(xxbar^2) # V(X)

ssyx <- sum((yybar)*(xxbar)) # COV(Y,X)

b1 <- ssyx/ssxx  # slope 

b0 <- ybar - b1*xbar  # intercept

yhat <- rep(0, length(t_cars[1,])) # predicited values (yhat)

for(i in 1:length(t_cars[1,])){
  
  yhat[i] <- b0 + b1*t_cars[1,][i]
}

                           ###_____ Errors______ ###

error_within <- rep(0, length(t_cars[1,])) #  error within (e)

for(i in 1:length(t_cars[1,])){
  
  error_within[i] <-  t_cars[2,][i] - yhat[i]
}


error_between <- rep(0, length(t_cars[1,])) # errors between groups

for(i in 1:length(t_cars[2,])){
  
  error_between[i] <- yhat[i] - grand_mean
}

error_total <-  rep(0, length(t_cars[1,])) # errors total

for(i in  1:length(t_cars[2,])){
  
  error_total[i] <- grand_mean - t_cars[2,][i]
}
           
                         ###____Sum of Squares_____###

ssb <- sum(t(error_between)*error_between) # sum of squares between 

ssw <- sum(t(error_within)*error_within) # sum of squares within

sst <- sum(t(error_total)*error_total) # Total sum of squares

                      ###___Degrees of freedom____###

anova_df <- function(n, constraints) { # degrees of freedom
  
    degreesoffreedom <- n - constraints
    
    return(degreesoffreedom)
}

df_ssw <- anova_df(50,2) # df ssw
df_ssb <- anova_df(2,1) # df ssb
df_sst <- anova_df(50,1) # df sst

                     ###_____Mean Square Errors_____###

mse_between <- ssb/df_ssb # mean square error between
mse_within <- ssw/df_ssw  # mean square error within

                      ###______F Statistic_________###

fcalc <- mse_between/mse_within # f statistic ( fcalc)

ftab <- qf(.95, df1 = 1, df2 = 48) # f tabulated value (ftab)

decision <- if(fcalc > ftab) print("there is linear relationship")

## [1] "there is linear relationship"

                     ###_______standard errors_______###

stderror_residuals <- sqrt(mse_within)
stderror_intercept <- sqrt(mse_within/ssxx)
stderror_slope <- sqrt(mse_within*(1/50 + xbar^2/ssxx))

                         ###____results_____###
cat("b0:", b0, "\nb1", b1,"\nregression:", ssb,"\nResiduals:", ssw, "\nmse_between:",
    mse_between, "\nmse_within:", mse_within, "\nfcalc", fcalc, "\nDecision:", decision)

## b0: -17.57909 
## b1 3.932409 
## regression: 21185.46 
## Residuals: 11353.52 
## mse_between: 21185.46 
## mse_within: 236.5317 
## fcalc 89.56711 
## Decision: there is linear relationship

                     ###______comparision with lm() & anova(lm)_____###

lm(cars$dist ~ cars$speed)

## 
## Call:
## lm(formula = cars$dist ~ cars$speed)
## 
## Coefficients:
## (Intercept)   cars$speed  
##     -17.579        3.932

anova(lm(cars$dist ~ cars$speed))

## Analysis of Variance Table
## 
## Response: cars$dist
##            Df Sum Sq Mean Sq F value   Pr(>F)    
## cars$speed  1  21186 21185.5  89.567 1.49e-12 ***
## Residuals  48  11354   236.5                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Anova for Simple Linear Regression

karthik

May 28, 2017