df1 <- mtcars
cormt <- cor(mtcars$mpg, mtcars$wt)
cormt
## [1] -0.8676594
#-0.8676594 strong negative correlation
model1 <- lm(mpg ~ wt, data = mtcars)
summary(model1)
##
## Call:
## lm(formula = mpg ~ wt, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5432 -2.3647 -0.1252 1.4096 6.8727
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.2851 1.8776 19.858 < 2e-16 ***
## wt -5.3445 0.5591 -9.559 1.29e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.046 on 30 degrees of freedom
## Multiple R-squared: 0.7528, Adjusted R-squared: 0.7446
## F-statistic: 91.38 on 1 and 30 DF, p-value: 1.294e-10
plot(model1)
plot(model1$fitted.values, model1$residuals)
df2 <- airquality
corair <- cor(airquality$Ozone, airquality$Temp, use = "complete.obs")
corair #moderate positive correlation
## [1] 0.6983603
model2 <- lm(Ozone ~ Temp, data = airquality)
summary(model2)
##
## Call:
## lm(formula = Ozone ~ Temp, data = airquality)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40.729 -17.409 -0.587 11.306 118.271
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -146.9955 18.2872 -8.038 9.37e-13 ***
## Temp 2.4287 0.2331 10.418 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23.71 on 114 degrees of freedom
## (因为不存在,37个观察量被删除了)
## Multiple R-squared: 0.4877, Adjusted R-squared: 0.4832
## F-statistic: 108.5 on 1 and 114 DF, p-value: < 2.2e-16
plot(model2)
plot(model2$fitted.values, model2$residuals)
#In both cases, the residual plots show no clear patterns or trends,that met the assumptions of linearity, homoscedasticity, and independence.
#both datasets show significant relationships between the variables, and the regression models provide slope estimates that quantify the strength of these relationships and can be used to make predictions about the dependent variable based on the independent variable.
#The GaussMarkov assumption stands because:
##1. linear relationship between those two variables in two different data set. (strong negative correlation and moderate positive correlation)
##2. Not all the residuals are on the trend line and when I ploted the residuals plots there is no trend line there. (Idk why)
##3. Variables clearly are independ.
##4. The residuals are normally distributed.