This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the plot. #
3.6.1 to 3.6.6. library(MASS) library(ISLR) data(“Boston”)
attach(Boston) lm.fit <- lm(medv ~ lstat, data = Boston) lm.fit #
Load Boston dataset data(Boston)
lm.fit <- lm(medv ~ lstat, data = Boston)
summary(lm.fit) # Fit a multiple linear regression model lm.multiple <- lm(medv ~ lstat + age + rm, data = Boston) coef(lm.fit) confint(lm.fit) predict(lm.fit, data.frame(lstat = c(5, 10, 15)), interval = “confidence”) predict(lm.fit, data.frame(lstat = c(5, 10, 15)), interval = “prediction”) plot(lstat, medv) abline(lm.fit) abline(lm.fit, lwd = 3) abline(lm.fit, lwd = 3, col = “red”) plot(lstat, medv, pch = 20) plot(lstat, medv, pch = “+”) plot(1:20, 1:20, pch = 1:20) # Diagnostic plots par(mfrow = c(2, 2)) plot(lm.fit) # Plot residuals against fitted values and studentized residuals plot(predict(lm.fit), residuals(lm.fit)) plot(predict(lm.fit), rstudent(lm.fit)) # Compute leverage statistics and plot plot(hatvalues(lm.fit)) which.max(hatvalues(lm.fit)) # 8 library(ISLR) data(“Auto”) # (a) Perform simple linear regression with mpg as the response and horsepower as the predictor lm.fit <- lm(mpg ~ horsepower, data = Auto) summary(lm.fit) # (b) Plot the response and the predictor, and display the least squares regression line plot(Auto\(horsepower, Auto\)mpg, xlab = “Horsepower”, ylab = “MPG”, main = “Simple Linear Regression: MPG vs Horsepower”) abline(lm.fit, col = “red”) # (c) Produce diagnostic plots of the least squares regression fit par(mfrow = c(2, 2)) plot(lm.fit) #10
data(“Carseats”)
lm.fit <- lm(Sales ~ Price + Urban + US, data = Carseats)
summary(lm.fit) # (c) Write out the model in equation form # Sales = β0 + β1 * Price + β2 * UrbanYes + β3 * USYes + ε
lm.fit_smaller <- lm(Sales ~ Price + US, data = Carseats)
summary(lm.fit) summary(lm.fit_smaller) # (g) Obtain 95% confidence intervals for the coefficient(s) in the smallermodel confint(lm.fit_smaller) # (h) Check for evidence of outliers or high leverage observations # Use diagnostic plots such as residuals vs. fitted values plot, Cook’s distance, etc. plot(lm.fit_smaller) #14 # Generate data set.seed(1) x1 <- runif(100) x2 <- 0.5 * x1 + rnorm(100) / 10
correlation <- cor(x1, x2) print(correlation) # Create scatterplot plot(x1, x2, main = “Scatterplot of x1 vs x2”, xlab = “x1”, ylab = “x2”) # Generate data y <- 2 + 2 * x1 + 0.3 * x2 + rnorm(100) # Fit least squares regression using only x1 lm.fit_x1 <- lm(y ~ x1) # Describe the results summary(lm.fit_x1) # Fit least squares regression using only x2 lm.fit_x2 <- lm(y ~ x2) # Describe the results summary # Add the additional observation x1 <- c(x1, 0.1) x2 <- c(x2, 0.8) y <- c(y, 6) # Refit the linear models lm.fit_new <- lm(y ~ x1 + x2) lm.fit_x1_new <- lm(y ~ x1) lm.fit_x2_new <- lm(y ~ x2) # Describe the results summary(lm.fit_new) summary(lm.fit_x1_new) summary(lm.fit_x2_new)