The data I’ll use from R is PlantGrowth.It’s an experiment to
compare yields (as measured by dried weight of plants) obtained under a
control and two different treatment conditions. It’s a data frame of 30
cases on 2 variables.
#load the dataset
data(PlantGrowth)
#now explore the data to get an idea of what variables are included in the dataset and their data types
str(PlantGrowth)
## 'data.frame': 30 obs. of 2 variables:
## $ weight: num 4.17 5.58 5.18 6.11 4.5 4.61 5.17 4.53 5.33 5.14 ...
## $ group : Factor w/ 3 levels "ctrl","trt1",..: 1 1 1 1 1 1 1 1 1 1 ...
#identifying the dependent and independent variables for the regression mode
plant_data <- PlantGrowth[, c("weight", "group")]
# Create the linear regression model
model <- lm(weight ~ group, data = plant_data)
#using summary() function to get more information about the regression model
summary(model)
##
## Call:
## lm(formula = weight ~ group, data = plant_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0710 -0.4180 -0.0060 0.2627 1.3690
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.0320 0.1971 25.527 <2e-16 ***
## grouptrt1 -0.3710 0.2788 -1.331 0.1944
## grouptrt2 0.4940 0.2788 1.772 0.0877 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6234 on 27 degrees of freedom
## Multiple R-squared: 0.2641, Adjusted R-squared: 0.2096
## F-statistic: 4.846 on 2 and 27 DF, p-value: 0.01591
#creating a fitted line plot that overlays the scatterplot
plot(weight ~ group, data = plant_data, main = "Plant Growth by Treatment Group", xlab = "Treatment Group", ylab = "Weight")
abline(lm(weight ~ group, data = plant_data), col = "red")
## Warning in abline(lm(weight ~ group, data = plant_data), col = "red"): only
## using the first two of 3 regression coefficients

# Plot a histogram of the residuals
hist(model$residuals, main = "Histogram of Residuals", xlab = "Residuals")

# Plot a normal probability plot of the residuals
qqnorm(model$residuals, main = "Normal Probability Plot of Residuals")
qqline(model$residuals)

# Plot a scatterplot of the predicted values vs. actual values
plot(model$fitted.values, PlantGrowth$weight, main = "Predicted vs. Actual Values", xlab = "Predicted Values", ylab = "Actual Values")
abline(0, 1)

# Plot a scatterplot of the absolute residuals vs. predicted values
plot(model$fitted.values, abs(model$residuals), main = "Absolute Residuals vs. Predicted Values", xlab = "Predicted Values", ylab = "Absolute Residuals")
