Last Name - Sharma
First Name - Shivankit
M 10625565
crs <- read.csv("C:/Users/sharm_000/OneDrive/University/BANA 7038/Mid Term Exam/cruise.csv",header = T,sep = ",",stringsAsFactors = T)
names(crs)
## [1] "ShipName" "CruiseLine" "Age"
## [4] "Tonnage" "Passengers" "Length"
## [7] "Cabins" "PassengerDensity" "Crew"
lmpdencru <- lm(Tonnage~PassengerDensity + CruiseLine, data = crs)
Tonnage = \(\beta\)0 + \(\beta\)1PassengerDensity + \(\beta\)2CruiseLine + \(\epsilon\)
#Tonnage=$\beta$~0~+$\beta$~1~ * PassengerDensity + $\beta$~2~ * CruiseLine+ e
carnvl<-crs[crs$CruiseLine=="Carnival",]
norw<-crs[crs$CruiseLine=="Norwegian",]
lmcarnvl<-lm(Tonnage~PassengerDensity, data = carnvl)
lmnorw<-lm(Tonnage~PassengerDensity, data = norw)
summary(lmcarnvl)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.979847 45.251116 -0.2426426 0.81075330
## PassengerDensity 2.653077 1.250452 2.1216941 0.04654884
summary(lmnorw)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -64.33533 24.0337436 -2.676875 0.021528030
## PassengerDensity 3.68454 0.6837951 5.388369 0.000220527
summary(carnvl$Tonnage)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 46.05 70.37 87.25 84.65 101.50 110.20
summary(norw$Tonnage)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 28.00 42.00 75.34 63.72 81.00 93.00
dif<-((mean(carnvl$Tonnage))-(mean(norw$Tonnage)))
dif
## [1] 20.93611
##
## Call:
## lm(formula = Tonnage ~ PassengerDensity, data = carnvl)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.124 -10.239 -9.627 14.829 42.184
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.980 45.251 -0.243 0.8108
## PassengerDensity 2.653 1.250 2.122 0.0465 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.78 on 20 degrees of freedom
## Multiple R-squared: 0.1837, Adjusted R-squared: 0.1429
## F-statistic: 4.502 on 1 and 20 DF, p-value: 0.04655
##
## Call:
## lm(formula = Tonnage ~ PassengerDensity, data = norw)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.2344 -0.4523 3.4261 6.2905 14.1909
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -64.3353 24.0337 -2.677 0.021528 *
## PassengerDensity 3.6845 0.6838 5.388 0.000221 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.93 on 11 degrees of freedom
## Multiple R-squared: 0.7252, Adjusted R-squared: 0.7003
## F-statistic: 29.03 on 1 and 11 DF, p-value: 0.0002205
## [1] 0.8507633
## [1] 0.8994493
library(psych)
pairs.panels(crs[c("CruiseLine","Age","Tonnage","Passengers","Length","Cabins","PassengerDensity","Crew")],gap=0)
lmmulti <- lm(Tonnage ~ CruiseLine + Age + Passengers + Length + Cabins + PassengerDensity + Crew, data = crs)
Tonnage = \(\beta\)0 + \(\beta\)1CruiseLine + \(\beta\)2Age + \(\beta\)3Passengers + \(\beta\)4Length + \(\beta\)5Cabins + \(\beta\)6PassengerDensity + \(\beta\)7Crew + \(\epsilon\)
SSTmulti=sum((crs$Tonnage-mean(crs$Tonnage))^2)
SSResmulti=sum((crs$Tonnage-lmmulti$fitted.values)^2)
SSRmulti=sum((lmmulti$fitted.values-mean(crs$Tonnage))^2)
SSTmulti
## [1] 217608.1
SSRmulti
## [1] 212659.9
SSResmulti
## [1] 4948.187
Rsquaremulti <- SSRmulti/ SSTmulti
n1 <- summary(lm(Tonnage ~ CruiseLine, data = crs))$r.squared
n2 <- summary(lm(Tonnage ~ CruiseLine + Age, data = crs))$r.squared
n3 <- summary(lm(Tonnage ~ CruiseLine + Age + Passengers, data = crs))$r.squared
n4 <- summary(lm(Tonnage ~ CruiseLine + Age + Passengers + Length, data = crs))$r.squared
n5 <- summary(lm(Tonnage ~ CruiseLine + Age + Passengers + Length + Cabins, data = crs))$r.squared
n6 <- summary(lm(Tonnage ~ CruiseLine + Age + Passengers + Length + Cabins + PassengerDensity, data = crs))$r.squared
n7 <- summary(lm(Tonnage ~ CruiseLine + Age + Passengers + Length + Cabins + PassengerDensity + Crew, data = crs))$r.squared
Rsquaremulti*100
## [1] 97.7261
U <- c(n1,n2,n3,n4,n5,n6,n7)
U*100
## [1] 48.91671 74.38753 93.94032 95.32599 96.32241 97.70225 97.72610
Rsquaremulti <- SSRmulti/ SSTmulti
m1 <- summary(lm(Tonnage ~ CruiseLine, data = crs))$adj.r.squared
m2 <- summary(lm(Tonnage ~ CruiseLine + Age, data = crs))$adj.r.squared
m3 <- summary(lm(Tonnage ~ CruiseLine + Age + Passengers, data = crs))$adj.r.squared
m4 <- summary(lm(Tonnage ~ CruiseLine + Age + Passengers + Length, data = crs))$adj.r.squared
m5 <- summary(lm(Tonnage ~ CruiseLine + Age + Passengers + Length + Cabins, data = crs))$adj.r.squared
m6 <- summary(lm(Tonnage ~ CruiseLine + Age + Passengers + Length + Cabins + PassengerDensity, data = crs))$adj.r.squared
m7 <- summary(lm(Tonnage ~ CruiseLine + Age + Passengers + Length + Cabins + PassengerDensity + Crew, data = crs))$adj.r.squared
U <- c(m1,m2,m3,m4,m5,m6,m7)
U*100
## [1] 41.88350 70.64848 93.00464 94.56430 95.69118 97.28762 97.29544
Hence, by gradually adding the regressors, we see that:
The initial adjusted R Square when CruiseLine is the only regressor is 41.88350%
After adding Age, the adjusted R square is 70.64848% :: Increase of (68.67855)%
After adding Passengers, the adjusted R square is 93.00464% :: Increase of (31.64421)%
After adding Length the adjusted R square is 94.56430% :: Increase of (1.67697)%
After adding Cabins, the adjusted R square is 95.69118% :: Increase of (1.19166)%
After adding Passenger Density, the adjusted R square is 97.28762 :: Increase of (1.668325)%
After adding Crew, the adjusted R square is 97.29544 :: Increase of (0.008034107)%
## [1] 0.977261
## Analysis of Variance Table
##
## Response: Tonnage
## Df Sum Sq Mean Sq F value Pr(>F)
## CruiseLine 19 106447 5602 149.4536 < 2.2e-16 ***
## Age 1 55427 55427 1478.5831 < 2.2e-16 ***
## Passengers 1 42548 42548 1135.0412 < 2.2e-16 ***
## Length 1 3015 3015 80.4380 2.553e-15 ***
## Cabins 1 2168 2168 57.8424 4.739e-12 ***
## PassengerDensity 1 3003 3003 80.1000 2.840e-15 ***
## Crew 1 52 52 1.3844 0.2415
## Residuals 132 4948 37
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning: not plotting observations with leverage one:
## 91
## Warning: not plotting observations with leverage one:
## 91
# Stepwise Regression
library(MASS)
step <- stepAIC(lmmulti, direction="backward")
## Start: AIC=596.18
## Tonnage ~ CruiseLine + Age + Passengers + Length + Cabins + PassengerDensity +
## Crew
##
## Df Sum of Sq RSS AIC
## - Crew 1 51.89 5000.1 595.83
## <none> 4948.2 596.18
## - Age 1 79.38 5027.6 596.70
## - Length 1 200.71 5148.9 600.46
## - Cabins 1 281.99 5230.2 602.94
## - CruiseLine 19 1812.35 6760.5 607.49
## - Passengers 1 2043.30 6991.5 648.80
## - PassengerDensity 1 2917.63 7865.8 667.41
##
## Step: AIC=595.83
## Tonnage ~ CruiseLine + Age + Passengers + Length + Cabins + PassengerDensity
##
## Df Sum of Sq RSS AIC
## <none> 5000.1 595.83
## - Age 1 75.91 5076.0 596.21
## - Length 1 320.29 5320.4 603.64
## - CruiseLine 19 1815.96 6816.0 606.78
## - Cabins 1 709.43 5709.5 614.79
## - Passengers 1 1992.72 6992.8 646.83
## - PassengerDensity 1 3002.65 8002.7 668.14
step$anova # display results
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## Tonnage ~ CruiseLine + Age + Passengers + Length + Cabins + PassengerDensity +
## Crew
##
## Final Model:
## Tonnage ~ CruiseLine + Age + Passengers + Length + Cabins + PassengerDensity
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 132 4948.187 596.1807
## 2 - Crew 1 51.89488 133 5000.082 595.8291
anova(lmmulti)
## Analysis of Variance Table
##
## Response: Tonnage
## Df Sum Sq Mean Sq F value Pr(>F)
## CruiseLine 19 106447 5602 149.4536 < 2.2e-16 ***
## Age 1 55427 55427 1478.5831 < 2.2e-16 ***
## Passengers 1 42548 42548 1135.0412 < 2.2e-16 ***
## Length 1 3015 3015 80.4380 2.553e-15 ***
## Cabins 1 2168 2168 57.8424 4.739e-12 ***
## PassengerDensity 1 3003 3003 80.1000 2.840e-15 ***
## Crew 1 52 52 1.3844 0.2415
## Residuals 132 4948 37
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Let us see the effect of dropping Crew from the model
Removing Crew
lmmultinocrew <- lm(Tonnage ~ CruiseLine + Age + Passengers + Length + Cabins + PassengerDensity, data = crs)
summary(lmmultinocrew)$r.squared
## [1] 0.9770225
summary(lmmultinocrew)$adj.r.squared
## [1] 0.9728762
anova(lmmultinocrew,lmmulti)
## Analysis of Variance Table
##
## Model 1: Tonnage ~ CruiseLine + Age + Passengers + Length + Cabins + PassengerDensity
## Model 2: Tonnage ~ CruiseLine + Age + Passengers + Length + Cabins + PassengerDensity +
## Crew
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 133 5000.1
## 2 132 4948.2 1 51.895 1.3844 0.2415
# Stepwise Regression
library(MASS)
step <- stepAIC(lmmultinocrew, direction="backward")
## Start: AIC=595.83
## Tonnage ~ CruiseLine + Age + Passengers + Length + Cabins + PassengerDensity
##
## Df Sum of Sq RSS AIC
## <none> 5000.1 595.83
## - Age 1 75.91 5076.0 596.21
## - Length 1 320.29 5320.4 603.64
## - CruiseLine 19 1815.96 6816.0 606.78
## - Cabins 1 709.43 5709.5 614.79
## - Passengers 1 1992.72 6992.8 646.83
## - PassengerDensity 1 3002.65 8002.7 668.14
step$anova # display results
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## Tonnage ~ CruiseLine + Age + Passengers + Length + Cabins + PassengerDensity
##
## Final Model:
## Tonnage ~ CruiseLine + Age + Passengers + Length + Cabins + PassengerDensity
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 133 5000.082 595.8291
anova(lmmultinocrew)
## Analysis of Variance Table
##
## Response: Tonnage
## Df Sum Sq Mean Sq F value Pr(>F)
## CruiseLine 19 106447 5602 149.023 < 2.2e-16 ***
## Age 1 55427 55427 1474.322 < 2.2e-16 ***
## Passengers 1 42548 42548 1131.770 < 2.2e-16 ***
## Length 1 3015 3015 80.206 2.614e-15 ***
## Cabins 1 2168 2168 57.676 4.878e-12 ***
## PassengerDensity 1 3003 3003 79.869 2.907e-15 ***
## Residuals 133 5000 38
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Removing Cabins
lmmultinocrewnocabins <- lm(Tonnage ~ CruiseLine + Age + Passengers + Length + PassengerDensity, data = crs)
# Stepwise Regression
library(MASS)
step <- stepAIC(lmmultinocrewnocabins, direction="backward")
## Start: AIC=614.79
## Tonnage ~ CruiseLine + Age + Passengers + Length + PassengerDensity
##
## Df Sum of Sq RSS AIC
## - Age 1 36.7 5746.2 613.81
## <none> 5709.5 614.79
## - CruiseLine 19 1885.9 7595.4 621.89
## - Length 1 449.7 6159.2 624.77
## - PassengerDensity 1 4461.5 10171.0 704.02
## - Passengers 1 17504.3 23213.8 834.41
##
## Step: AIC=613.81
## Tonnage ~ CruiseLine + Passengers + Length + PassengerDensity
##
## Df Sum of Sq RSS AIC
## <none> 5746.2 613.81
## - CruiseLine 19 1875.4 7621.6 620.43
## - Length 1 470.5 6216.7 624.24
## - PassengerDensity 1 5824.0 11570.3 722.39
## - Passengers 1 21217.1 26963.3 856.06
step$anova # display results
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## Tonnage ~ CruiseLine + Age + Passengers + Length + PassengerDensity
##
## Final Model:
## Tonnage ~ CruiseLine + Passengers + Length + PassengerDensity
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 134 5709.517 614.7926
## 2 - Age 1 36.716 135 5746.233 613.8054
summary(lmmultinocrewnocabins)$r.squared
## [1] 0.9737624
summary(lmmultinocrewnocabins)$adj.r.squared
## [1] 0.9692589
Removing Age
lmmultinocrewnocabinsnoage <- lm(Tonnage ~ CruiseLine + Passengers + Length + PassengerDensity, data = crs)
# Stepwise Regression
library(MASS)
step <- stepAIC(lmmultinocrewnocabinsnoage, direction="backward")
## Start: AIC=613.81
## Tonnage ~ CruiseLine + Passengers + Length + PassengerDensity
##
## Df Sum of Sq RSS AIC
## <none> 5746.2 613.81
## - CruiseLine 19 1875.4 7621.6 620.43
## - Length 1 470.5 6216.7 624.24
## - PassengerDensity 1 5824.0 11570.3 722.39
## - Passengers 1 21217.1 26963.3 856.06
step$anova # display results
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## Tonnage ~ CruiseLine + Passengers + Length + PassengerDensity
##
## Final Model:
## Tonnage ~ CruiseLine + Passengers + Length + PassengerDensity
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 135 5746.233 613.8054
summary(lmmultinocrewnocabinsnoage)$r.squared
## [1] 0.9735937
summary(lmmultinocrewnocabinsnoage)$adj.r.squared
## [1] 0.9692904
anova(lmmultinocrewnocabinsnoage)
## Analysis of Variance Table
##
## Response: Tonnage
## Df Sum Sq Mean Sq F value Pr(>F)
## CruiseLine 19 106447 5602 131.62 < 2.2e-16 ***
## Passengers 1 94757 94757 2226.20 < 2.2e-16 ***
## Length 1 4834 4834 113.56 < 2.2e-16 ***
## PassengerDensity 1 5824 5824 136.83 < 2.2e-16 ***
## Residuals 135 5746 43
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Removing Length
lmmultinocrewnocabinsnolennoage <- lm(Tonnage ~ CruiseLine + Passengers + PassengerDensity, data = crs)
# Stepwise Regression
library(MASS)
step <- stepAIC(lmmultinocrewnocabinsnolennoage, direction="backward")
## Start: AIC=624.24
## Tonnage ~ CruiseLine + Passengers + PassengerDensity
##
## Df Sum of Sq RSS AIC
## <none> 6217 624.24
## - CruiseLine 19 3557 9774 657.73
## - PassengerDensity 1 10187 16404 775.54
## - Passengers 1 88645 94862 1052.82
step$anova # display results
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## Tonnage ~ CruiseLine + Passengers + PassengerDensity
##
## Final Model:
## Tonnage ~ CruiseLine + Passengers + PassengerDensity
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 136 6216.736 624.24
summary(lmmultinocrewnocabinsnolennoage)$r.squared
## [1] 0.9714315
summary(lmmultinocrewnocabinsnolennoage)$adj.r.squared
## [1] 0.9670202
anova(lmmultinocrewnocabinsnolennoage)
## Analysis of Variance Table
##
## Response: Tonnage
## Df Sum Sq Mean Sq F value Pr(>F)
## CruiseLine 19 106447 5602 122.56 < 2.2e-16 ***
## Passengers 1 94757 94757 2072.95 < 2.2e-16 ***
## PassengerDensity 1 10187 10187 222.86 < 2.2e-16 ***
## Residuals 136 6217 46
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Trying to remove CruiseLine
lmmultinocrewnocabinsnolennoagenocru <- lm(Tonnage ~ Passengers + PassengerDensity, data = crs)
summary(lmmultinocrewnocabinsnolennoagenocru)$r.squared
## [1] 0.9550859
summary(lmmultinocrewnocabinsnolennoagenocru)$adj.r.squared
## [1] 0.9545064
#multicollinearity
library(car,warn.conflicts = FALSE)
vif(lmmultinocrewnocabinsnolennoage)
## GVIF Df GVIF^(1/(2*Df))
## CruiseLine 4.915907 19 1.042798
## Passengers 2.323111 1 1.524176
## PassengerDensity 2.335736 1 1.528312
vif(lmmultinocrewnocabinsnolennoage)>4
## GVIF Df GVIF^(1/(2*Df))
## CruiseLine TRUE TRUE FALSE
## Passengers FALSE FALSE FALSE
## PassengerDensity FALSE FALSE FALSE
## Warning: not plotting observations with leverage one:
## 91
## Warning: not plotting observations with leverage one:
## 91
Tonnage = \(\beta\)0 + \(\beta\)1CruiseLine + \(\beta\)2Passengers + \(\beta\)3PassengerDensity + \(\epsilon\)
summary(lmmultinocrewnocabinsnolennoage)
##
## Call:
## lm(formula = Tonnage ~ CruiseLine + Passengers + PassengerDensity,
## data = crs)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.6256 -1.3918 0.0071 2.4854 24.3132
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -56.45862 6.27272 -9.001 1.75e-15 ***
## CruiseLineCarnival 1.95214 5.23986 0.373 0.710060
## CruiseLineCelebrity 8.98986 5.33329 1.686 0.094164 .
## CruiseLineCosta 2.52851 5.33375 0.474 0.636219
## CruiseLineCrystal -5.35883 6.93074 -0.773 0.440747
## CruiseLineCunard 12.91039 6.30094 2.049 0.042389 *
## CruiseLineDisney 6.40945 6.83084 0.938 0.349748
## CruiseLineHolland American 2.89559 5.14802 0.562 0.574724
## CruiseLineMSC 2.91425 5.53716 0.526 0.599531
## CruiseLineNorwegian 3.74539 5.28449 0.709 0.479692
## CruiseLineOceania -1.93426 6.17389 -0.313 0.754536
## CruiseLineOrient 9.53468 8.42116 1.132 0.259531
## CruiseLineP&O 1.99782 5.66886 0.352 0.725069
## CruiseLinePrincess 4.34820 5.21047 0.835 0.405456
## CruiseLineRegent Seven Seas -15.31570 5.87986 -2.605 0.010216 *
## CruiseLineRoyal Caribbean 3.23981 5.36858 0.603 0.547198
## CruiseLineSeabourn -9.84022 6.21054 -1.584 0.115417
## CruiseLineSilversea -22.09644 6.12268 -3.609 0.000431 ***
## CruiseLineStar -1.21896 5.54960 -0.220 0.826475
## CruiseLineWindstar 3.00249 6.19930 0.484 0.628933
## Passengers 3.74256 0.08499 44.037 < 2e-16 ***
## PassengerDensity 1.42501 0.09546 14.929 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.761 on 136 degrees of freedom
## Multiple R-squared: 0.9714, Adjusted R-squared: 0.967
## F-statistic: 220.2 on 21 and 136 DF, p-value: < 2.2e-16
anova(lmmultinocrewnocabinsnolennoage)
## Analysis of Variance Table
##
## Response: Tonnage
## Df Sum Sq Mean Sq F value Pr(>F)
## CruiseLine 19 106447 5602 122.56 < 2.2e-16 ***
## Passengers 1 94757 94757 2072.95 < 2.2e-16 ***
## PassengerDensity 1 10187 10187 222.86 < 2.2e-16 ***
## Residuals 136 6217 46
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lmmultinocrewnocabinsnolennoage)$r.squared
## [1] 0.9714315
summary(lmmultinocrewnocabinsnolennoage)$adj.r.squared
## [1] 0.9670202
## Tonnage CruiseLine Passengers PassengerDensity .hat .sigma
## 1 30.277 Azamara 6.94 42.64 0.50000000 6.786007
## 2 30.277 Azamara 6.94 42.64 0.50000000 6.786007
## 3 47.262 Carnival 14.86 31.80 0.05958741 6.785599
## 4 110.000 Carnival 29.74 36.99 0.05168190 6.785868
## 5 101.353 Carnival 26.42 38.36 0.04769368 6.782927
## 6 70.367 Carnival 20.52 34.29 0.04727181 6.785652
## .cooksd .fitted .resid .stdresid
## 1 3.094530e-28 30.27700 -3.944622e-13 -8.251040e-14
## 2 9.318068e-29 30.27700 2.164567e-13 4.527665e-14
## 3 4.715056e-05 46.42310 8.388953e-01 1.279490e-01
## 4 1.382635e-05 109.50812 4.918828e-01 7.470900e-02
## 5 2.809660e-04 99.03509 2.317908e+00 3.513146e-01
## 6 3.209432e-05 71.15424 -7.872362e-01 -1.192913e-01
## Tonnage CruiseLine Age Passengers Length Cabins PassengerDensity Crew
## 1 30.277 Azamara 6 6.94 5.94 3.55 42.64 3.55
## 2 30.277 Azamara 6 6.94 5.94 3.55 42.64 3.55
## 3 47.262 Carnival 26 14.86 7.22 7.43 31.80 6.70
## 4 110.000 Carnival 11 29.74 9.53 14.88 36.99 19.10
## 5 101.353 Carnival 17 26.42 8.92 13.21 38.36 10.00
## 6 70.367 Carnival 22 20.52 8.55 10.20 34.29 9.20
## .hat .sigma .cooksd .fitted .resid .stdresid
## 1 0.50000000 6.145927 2.399420e-28 30.27700 -3.419487e-13 -7.898413e-14
## 2 0.50000000 6.145927 2.053761e-28 30.27700 3.163609e-13 7.307380e-14
## 3 0.06434814 6.142756 3.601877e-04 45.07659 2.185413e+00 3.690118e-01
## 4 0.47269691 6.118968 3.983958e-02 114.77913 -4.779132e+00 -1.074937e+00
## 5 0.07480117 6.131295 1.952182e-03 96.68680 4.666205e+00 7.923374e-01
## 6 0.05370167 6.145609 2.983478e-05 71.06334 -6.963353e-01 -1.169145e-01
Here the top plot is the Reduced model and the bottom plot is the original full model.
##
## Call:
## lm(formula = Tonnage ~ Passengers + PassengerDensity, data = crs)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.404 -2.599 1.147 3.419 29.597
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -46.06382 3.69698 -12.46 <2e-16 ***
## Passengers 3.93130 0.06854 57.36 <2e-16 ***
## PassengerDensity 1.12245 0.07677 14.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.941 on 155 degrees of freedom
## Multiple R-squared: 0.9551, Adjusted R-squared: 0.9545
## F-statistic: 1648 on 2 and 155 DF, p-value: < 2.2e-16
Tonnage = \(\beta\)0 + \(\beta\)1Passengers + \(\beta\)2PassengerDensity + \(\epsilon\)
Tonnage = - 46.06382 + 3.93130Passengers + 1.12245PassengerDensity
For example, for the data of Carnival and Norwegian ships, we can predict their Tonnage with over 99% accuracy using just the variables of Passengers and PassengerDensity applied into their equation. This can be seen here by their R squared values
lmredcarnvl<-lm(Tonnage~Passengers+PassengerDensity,data=carnvl)
summary(lmredcarnvl)
##
## Call:
## lm(formula = Tonnage ~ Passengers + PassengerDensity, data = carnvl)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.1003 -0.3574 -0.3574 1.3633 2.1307
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -77.9884 4.7722 -16.34 1.21e-12 ***
## Passengers 3.3449 0.0752 44.48 < 2e-16 ***
## PassengerDensity 2.3352 0.1253 18.63 1.15e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.879 on 19 degrees of freedom
## Multiple R-squared: 0.9922, Adjusted R-squared: 0.9914
## F-statistic: 1214 on 2 and 19 DF, p-value: < 2.2e-16
Tonnagecarnival = -77.9884 + 3.3449 * Passengerscarnival + 2.3352 * Passenger Densitycarnival +\(\epsilon\)carnival
lmrednorw<-lm(Tonnage~Passengers+PassengerDensity,data=norw)
summary(lmrednorw)
##
## Call:
## lm(formula = Tonnage ~ Passengers + PassengerDensity, data = norw)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.3074 -0.5221 -0.3651 0.9195 3.9573
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -56.5263 3.4684 -16.30 1.57e-08 ***
## Passengers 3.5874 0.1568 22.88 5.75e-10 ***
## PassengerDensity 1.6146 0.1335 12.09 2.72e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.858 on 10 degrees of freedom
## Multiple R-squared: 0.9948, Adjusted R-squared: 0.9938
## F-statistic: 965.5 on 2 and 10 DF, p-value: 3.629e-12
TonnageNorwegian = -56.5263 + 3.5874 * PassengersNorwegian + 1.6146 * Passenger DensityNorwegian +\(\epsilon\)Norwegian