LINEAR MODELS

Author

INUSA YAWUZA MUSA (N1349229)

Loading the mtcars file

library(tidyverse)
library(ggplot2)


mtcars

Summary of data

summary(mtcars)
      mpg             cyl             disp             hp       
 Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
 1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
 Median :19.20   Median :6.000   Median :196.3   Median :123.0  
 Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
 3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
 Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
      drat             wt             qsec             vs        
 Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
 1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
 Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
 Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
 3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
 Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
       am              gear            carb      
 Min.   :0.0000   Min.   :3.000   Min.   :1.000  
 1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
 Median :0.0000   Median :4.000   Median :2.000  
 Mean   :0.4062   Mean   :3.688   Mean   :2.812  
 3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
 Max.   :1.0000   Max.   :5.000   Max.   :8.000  

Linear regression

plot(mtcars$wt, mtcars$mpg, 
     xlab = "Weight (1000 lbs)", 
     ylab = "Miles Per Gallon (mpg)", 
     main = "Effect of Weight on MPG",
     pch = 16, col = "blue") %>%
summary()
Length  Class   Mode 
     0   NULL   NULL 
# linear regression model
model <- lm(mpg ~ wt, data = mtcars)
abline(model, col = "red", lwd = 2) 

summary(model)

Call:
lm(formula = mpg ~ wt, data = mtcars)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.5432 -2.3647 -0.1252  1.4096  6.8727 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  37.2851     1.8776  19.858  < 2e-16 ***
wt           -5.3445     0.5591  -9.559 1.29e-10 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 3.046 on 30 degrees of freedom
Multiple R-squared:  0.7528,    Adjusted R-squared:  0.7446 
F-statistic: 91.38 on 1 and 30 DF,  p-value: 1.294e-10
model$residuals
          Mazda RX4       Mazda RX4 Wag          Datsun 710      Hornet 4 Drive 
         -2.2826106          -0.9197704          -2.0859521           1.2973499 
  Hornet Sportabout             Valiant          Duster 360           Merc 240D 
         -0.2001440          -0.6932545          -3.9053627           4.1637381 
           Merc 230            Merc 280           Merc 280C          Merc 450SE 
          2.3499593           0.2998560          -1.1001440           0.8668731 
         Merc 450SL         Merc 450SLC  Cadillac Fleetwood Lincoln Continental 
         -0.0502472          -1.8830236           1.1733496           2.1032876 
  Chrysler Imperial            Fiat 128         Honda Civic      Toyota Corolla 
          5.9810744           6.8727113           1.7461954           6.4219792 
      Toyota Corona    Dodge Challenger         AMC Javelin          Camaro Z28 
         -2.6110037          -2.9725862          -3.7268663          -3.4623553 
   Pontiac Firebird           Fiat X1-9       Porsche 914-2        Lotus Europa 
          2.4643670           0.3564263           0.1520430           1.2010593 
     Ford Pantera L        Ferrari Dino       Maserati Bora          Volvo 142E 
         -4.5431513          -2.7809399          -3.2053627          -1.0274952 
# Calculate the density of residuals
residual_density <- density(model$residuals)

# Plot the density curve
plot(residual_density, 
     main = "Density Plot of Residuals", 
     xlab = "Residuals", 
     ylab = "Density", 
     lwd = 2)

# Fill the area under the curve with red color
polygon(residual_density, col = "red")

# Shapiro-Wilk test on residuals
shapiro.test(model$residuals)

    Shapiro-Wilk normality test

data:  model$residuals
W = 0.94508, p-value = 0.1044

ANOVA

#Anova 
anova(model)
Analysis of Variance Table

Response: mpg
          Df Sum Sq Mean Sq F value    Pr(>F)    
wt         1 847.73  847.73  91.375 1.294e-10 ***
Residuals 30 278.32    9.28                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1