rm(list = ls()) #clear environment and remove all files from the workspace
gc() #clear the unused memory
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 525900 28.1 1167871 62.4 NA 669400 35.8
## Vcells 968912 7.4 8388608 64.0 16384 1851644 14.2
df = sleep
head(df)
## extra group ID
## 1 0.7 1 1
## 2 -1.6 1 2
## 3 -0.2 1 3
## 4 -1.2 1 4
## 5 -0.1 1 5
## 6 3.4 1 6
About the data:
extra : increase in hours of sleep
group : drug given
ID : patient ID
summary(df)
## extra group ID
## Min. :-1.600 1:10 1 :2
## 1st Qu.:-0.025 2:10 2 :2
## Median : 0.950 3 :2
## Mean : 1.540 4 :2
## 3rd Qu.: 3.400 5 :2
## Max. : 5.500 6 :2
## (Other):8
The dependent variable would by extra (Y) and the independent variable would be drug given(X).
# Creating model
model <- lm(extra ~ group, data = df)
# Print the model summary
print(model)
##
## Call:
## lm(formula = extra ~ group, data = df)
##
## Coefficients:
## (Intercept) group2
## 0.75 1.58
# Scatter plot with regression line for each level of 'group'
plot(extra ~ group,
data = df,
main = "Increase in Hours of Sleep vs. Drug Given",
xlab = "Drug Given", ylab = "Increase in Hours of Sleep")
# Adding regression lines for each level of 'group'
abline(lm(extra ~ group, data = df), col = "blue")
When group = 1, the extra hours of sleep is 0.75 while when the group is 2, the hours of sleep increarses by 1.58.
Slope: \[\beta_1 = \frac{\text{Cov}(X, Y)}{\text{Var}(X)}\]
X <- as.numeric(df$group)
Y <- df$extra
covariance <- cov(X, Y)
variance <- var(X)
slope <- covariance / variance
slope
## [1] 1.58
Intercept:
\[\beta_0 = \bar{Y} - \beta_1 \bar{X}\]
# Calculate mean of X and Y
mean_X <- mean(X)
mean_Y <- mean(Y)
intercept <- mean_Y - (slope * mean_X)
intercept
## [1] -0.83