R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)

##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot. # 3.6.1 to 3.6.6. library(MASS) library(ISLR) data(“Boston”) attach(Boston) lm.fit <- lm(medv ~ lstat, data = Boston) lm.fit # Load Boston dataset data(Boston)

Fit a simple linear regression model

lm.fit <- lm(medv ~ lstat, data = Boston)

Print summary of the model

summary(lm.fit) # Fit a multiple linear regression model lm.multiple <- lm(medv ~ lstat + age + rm, data = Boston) coef(lm.fit) confint(lm.fit) predict(lm.fit, data.frame(lstat = c(5, 10, 15)), interval = “confidence”) predict(lm.fit, data.frame(lstat = c(5, 10, 15)), interval = “prediction”) plot(lstat, medv) abline(lm.fit) abline(lm.fit, lwd = 3) abline(lm.fit, lwd = 3, col = “red”) plot(lstat, medv, pch = 20) plot(lstat, medv, pch = “+”) plot(1:20, 1:20, pch = 1:20) # Diagnostic plots par(mfrow = c(2, 2)) plot(lm.fit) # Plot residuals against fitted values and studentized residuals plot(predict(lm.fit), residuals(lm.fit)) plot(predict(lm.fit), rstudent(lm.fit)) # Compute leverage statistics and plot plot(hatvalues(lm.fit)) which.max(hatvalues(lm.fit)) # 8 library(ISLR) data(“Auto”) # (a) Perform simple linear regression with mpg as the response and horsepower as the predictor lm.fit <- lm(mpg ~ horsepower, data = Auto) summary(lm.fit) # (b) Plot the response and the predictor, and display the least squares regression line plot(Auto\(horsepower, Auto\)mpg, xlab = “Horsepower”, ylab = “MPG”, main = “Simple Linear Regression: MPG vs Horsepower”) abline(lm.fit, col = “red”) # (c) Produce diagnostic plots of the least squares regression fit par(mfrow = c(2, 2)) plot(lm.fit) #10

data(“Carseats”)

(a) Fit a multiple regression model to predict Sales using Price, Urban, and US

lm.fit <- lm(Sales ~ Price + Urban + US, data = Carseats)

(b) Interpretation of each coefficient

summary(lm.fit) # (c) Write out the model in equation form # Sales = β0 + β1 * Price + β2 * UrbanYes + β3 * USYes + ε

(d) For which of the predictors can you reject the null hypothesis H0 : βj = 0?

Examine the p-values in the summary output to determine which coefficients are statistically significant

(e) Fit a smaller model that only uses the predictors for which there is evidence of association with the outcome

lm.fit_smaller <- lm(Sales ~ Price + US, data = Carseats)

(f) Assess the model fit for both models (a) and (e)

summary(lm.fit) summary(lm.fit_smaller) # (g) Obtain 95% confidence intervals for the coefficient(s) in the smallermodel confint(lm.fit_smaller) # (h) Check for evidence of outliers or high leverage observations # Use diagnostic plots such as residuals vs. fitted values plot, Cook’s distance, etc. plot(lm.fit_smaller) #14 # Generate data set.seed(1) x1 <- runif(100) x2 <- 0.5 * x1 + rnorm(100) / 10

Calculate correlation

correlation <- cor(x1, x2) print(correlation) # Create scatterplot plot(x1, x2, main = “Scatterplot of x1 vs x2”, xlab = “x1”, ylab = “x2”) # Generate data y <- 2 + 2 * x1 + 0.3 * x2 + rnorm(100) # Fit least squares regression using only x1 lm.fit_x1 <- lm(y ~ x1) # Describe the results summary(lm.fit_x1) # Fit least squares regression using only x2 lm.fit_x2 <- lm(y ~ x2) # Describe the results summary # Add the additional observation x1 <- c(x1, 0.1) x2 <- c(x2, 0.8) y <- c(y, 6) # Refit the linear models lm.fit_new <- lm(y ~ x1 + x2) lm.fit_x1_new <- lm(y ~ x1) lm.fit_x2_new <- lm(y ~ x2) # Describe the results summary(lm.fit_new) summary(lm.fit_x1_new) summary(lm.fit_x2_new)

erdenetulga_hw

2024-03-20