t_cars <- t(cars) # transpose cars
grand_mean <- mean(t_cars[2,]) # Grand mean
xbar <- mean(t_cars[1,]) # mean of x
ybar <- mean(t_cars[2,]) # mean of y
xxbar <- rep(0, length(t_cars[1,]))
yybar <- rep(0, length(t_cars[2,]))
for(i in 1:length(t_cars[1,])){ #calculate x-xbar and y-ybar
xxbar[i] <- (t_cars[1,][i] - xbar)
yybar[i] <- t_cars[2,][i] - ybar
}
ssxx <- sum(xxbar^2) # V(X)
ssyx <- sum((yybar)*(xxbar)) # COV(Y,X)
b1 <- ssyx/ssxx # slope
b0 <- ybar - b1*xbar # intercept
yhat <- rep(0, length(t_cars[1,])) # predicited values (yhat)
for(i in 1:length(t_cars[1,])){
yhat[i] <- b0 + b1*t_cars[1,][i]
}
###_____ Errors______ ###
error_within <- rep(0, length(t_cars[1,])) # error within (e)
for(i in 1:length(t_cars[1,])){
error_within[i] <- t_cars[2,][i] - yhat[i]
}
error_between <- rep(0, length(t_cars[1,])) # errors between groups
for(i in 1:length(t_cars[2,])){
error_between[i] <- yhat[i] - grand_mean
}
error_total <- rep(0, length(t_cars[1,])) # errors total
for(i in 1:length(t_cars[2,])){
error_total[i] <- grand_mean - t_cars[2,][i]
}
###____Sum of Squares_____###
ssb <- sum(t(error_between)*error_between) # sum of squares between
ssw <- sum(t(error_within)*error_within) # sum of squares within
sst <- sum(t(error_total)*error_total) # Total sum of squares
###___Degrees of freedom____###
anova_df <- function(n, constraints) { # degrees of freedom
degreesoffreedom <- n - constraints
return(degreesoffreedom)
}
df_ssw <- anova_df(50,2) # df ssw
df_ssb <- anova_df(2,1) # df ssb
df_sst <- anova_df(50,1) # df sst
###_____Mean Square Errors_____###
mse_between <- ssb/df_ssb # mean square error between
mse_within <- ssw/df_ssw # mean square error within
###______F Statistic_________###
fcalc <- mse_between/mse_within # f statistic ( fcalc)
ftab <- qf(.95, df1 = 1, df2 = 48) # f tabulated value (ftab)
decision <- if(fcalc > ftab) print("there is linear relationship")
## [1] "there is linear relationship"
###_______standard errors_______###
stderror_residuals <- sqrt(mse_within)
stderror_intercept <- sqrt(mse_within/ssxx)
stderror_slope <- sqrt(mse_within*(1/50 + xbar^2/ssxx))
###____results_____###
cat("b0:", b0, "\nb1", b1,"\nregression:", ssb,"\nResiduals:", ssw, "\nmse_between:",
mse_between, "\nmse_within:", mse_within, "\nfcalc", fcalc, "\nDecision:", decision)
## b0: -17.57909
## b1 3.932409
## regression: 21185.46
## Residuals: 11353.52
## mse_between: 21185.46
## mse_within: 236.5317
## fcalc 89.56711
## Decision: there is linear relationship
###______comparision with lm() & anova(lm)_____###
lm(cars$dist ~ cars$speed)
##
## Call:
## lm(formula = cars$dist ~ cars$speed)
##
## Coefficients:
## (Intercept) cars$speed
## -17.579 3.932
anova(lm(cars$dist ~ cars$speed))
## Analysis of Variance Table
##
## Response: cars$dist
## Df Sum Sq Mean Sq F value Pr(>F)
## cars$speed 1 21186 21185.5 89.567 1.49e-12 ***
## Residuals 48 11354 236.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1