x<-c(0.22, -2.54, 0.52, 0.75, 25)
# and weights given by
w<-c(2, 1, 3, 1, 2)
# Give the value of mu that minimizes the least squares equation
mu <- sum(w * x) / sum(2 * w)
mu
## [1] 2.789444
x<-c(1.8, 1.47, 1.51, 1.73, 1.36, 1.58, 1.57, 1.85, 1.44, 1.42)
y<-c(2.39, 1.72, 2.55, 1.48, 2.19, 0.59, 2.23, 1.65, 2.49, 1.05)
# Fit the regression through the origin and get the slope treating y as the outcome and x is the regressor. (Hint, do not center the data since we want regression through the origin, not through the means of the data.)
regression <- lm(y ~ x - 1)
slope <- coef(regression)
slope
## x
## 1.151408
data(mtcars)
model <- lm(mpg ~ drat, data = mtcars)
slope <- coef(model)['drat']
slope
## drat
## 7.678233
hypothesis_result <- summary(model)$coefficients["drat", "t value"]
hypothesis_result
## [1] 5.096042
correlation <- 0.7
std_dev_outcome <- 1
std_dev_predictor <- std_dev_outcome / 3
slope <- correlation * (std_dev_predictor / std_dev_outcome)
slope
## [1] 0.2333333
correlation <- 0.6
std_dev_wife <- 10
std_dev_husband <- 14
husband_guess <- 40
wife_guess <- correlation * (std_dev_wife / std_dev_husband) * husband_guess
wife_guess
## [1] 17.14286
x <- c(10.45, 9.45, 12.41, 14.46, 15.26)
# What is the value of the first measurement if x were normalized (to have mean 0 and variance 1)?
x_normalized <- (x - mean(x)) / sd(x)
x_normalized[1]
## [1] -0.7835272
x<-c(1.8, 1.47, 1.51, 1.73, 1.36, 1.58, 1.57, 1.85, 1.44, 1.42)
y<-c(2.39, 1.72, 2.55, 1.48, 2.19, 0.59, 2.23, 1.65, 2.49, 1.05)
model <- lm(y ~ x)
intercept <- coef(model)[1]
intercept
## (Intercept)
## 2.247175
x <- c(1.8, 1.47, 1.51, 1.73, 1.36, 1.58, 1.57, 1.85, 1.44, 1.42)
What value minimizes the sum of the squared distances between these points and itself?
mean_value <- mean(x)
mean_value
## [1] 1.573
fit = lm(mpg ~ drat, data = mtcars)
plot(fit)
plot(mtcars$drat, resid(fit))
abline(v = 0, col = "red")
fit = lm(mpg ~ drat, data = mtcars)
sum(resid(fit))
## [1] 5.107026e-15
sum(resid(fit)^2) / (nrow(mtcars) - 2)
## [1] 20.11889
summary(fit)$sigma^2
## [1] 20.11889
summary(fit)
##
## Call:
## lm(formula = mpg ~ drat, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.0775 -2.6803 -0.2095 2.2976 9.0225
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.525 5.477 -1.374 0.18
## drat 7.678 1.507 5.096 1.78e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.485 on 30 degrees of freedom
## Multiple R-squared: 0.464, Adjusted R-squared: 0.4461
## F-statistic: 25.97 on 1 and 30 DF, p-value: 1.776e-05
summary(fit)$r.squared
## [1] 0.4639952
library(ggplot2)
fit = lm(mpg ~ hp, data = mtcars)
temp = mtcars; temp$resid = resid(fit)
g = ggplot(temp, aes(x = hp, y = resid))
g = g + geom_hline(yintercept = 0, col = "red")
g = g + geom_point(alpha = 0.5, cex = 5)
g
fit = lm(mpg ~ hp, data = mtcars)
sum(resid(fit)^2) / (nrow(mtcars) - 2)
## [1] 14.92248
summary(fit)$sigma^2
## [1] 14.92248
summary(fit)
##
## Call:
## lm(formula = mpg ~ hp, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.7121 -2.1122 -0.8854 1.5819 8.2360
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.09886 1.63392 18.421 < 2e-16 ***
## hp -0.06823 0.01012 -6.742 1.79e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.863 on 30 degrees of freedom
## Multiple R-squared: 0.6024, Adjusted R-squared: 0.5892
## F-statistic: 45.46 on 1 and 30 DF, p-value: 1.788e-07
summary(fit)$r.squared
## [1] 0.6024373