data <- read.table("C:/Users/Mehedi Hassan Galib/Desktop/R/vehicle.csv", header = TRUE, sep = ",")
str(data)
## 'data.frame': 1624 obs. of 7 variables:
## $ Vehicle: int 1 2 3 4 5 6 7 8 9 10 ...
## $ fm : num 0 10 15 0 13 21 11 5 8 1 ...
## $ Mileage: num 863 4644 16330 13 22537 ...
## $ lh : num 1.1 2.4 4.2 1 4.5 3.1 0.7 2.9 3.4 0.7 ...
## $ lc : num 66.3 233 325.1 66.6 328.7 ...
## $ mc : num 697 120 175 0 175 ...
## $ State : chr "MS" "CA" "WI" "OR" ...
head(data)
## Vehicle fm Mileage lh lc mc State
## 1 1 0 863 1.1 66.30 697.23 MS
## 2 2 10 4644 2.4 233.03 119.66 CA
## 3 3 15 16330 4.2 325.08 175.46 WI
## 4 4 0 13 1.0 66.64 0.00 OR
## 5 5 13 22537 4.5 328.66 175.46 AZ
## 6 6 21 40931 3.1 205.28 175.46 FL
summary(data)
## Vehicle fm Mileage lh
## Min. : 1.0 Min. :-1.000 Min. : 1 Min. : 0.000
## 1st Qu.: 406.8 1st Qu.: 4.000 1st Qu.: 5819 1st Qu.: 1.500
## Median : 812.5 Median :10.000 Median :17000 Median : 2.600
## Mean : 812.5 Mean : 9.414 Mean :20579 Mean : 3.308
## 3rd Qu.:1218.2 3rd Qu.:14.000 3rd Qu.:30075 3rd Qu.: 4.300
## Max. :1624.0 Max. :23.000 Max. :99983 Max. :35.200
## lc mc State
## Min. : 0.0 Min. : 0.0 Length:1624
## 1st Qu.: 106.4 1st Qu.: 119.7 Class :character
## Median : 195.6 Median : 119.7 Mode :character
## Mean : 242.9 Mean : 179.4
## 3rd Qu.: 317.8 3rd Qu.: 175.5
## Max. :3234.4 Max. :3891.1
str(data)
## 'data.frame': 1624 obs. of 7 variables:
## $ Vehicle: int 1 2 3 4 5 6 7 8 9 10 ...
## $ fm : num 0 10 15 0 13 21 11 5 8 1 ...
## $ Mileage: num 863 4644 16330 13 22537 ...
## $ lh : num 1.1 2.4 4.2 1 4.5 3.1 0.7 2.9 3.4 0.7 ...
## $ lc : num 66.3 233 325.1 66.6 328.7 ...
## $ mc : num 697 120 175 0 175 ...
## $ State : chr "MS" "CA" "WI" "OR" ...
#data$lh[data$lh == 0] <- NA
#data$lc[data$lc == 0] <- NA
#summary(data)
data$lh[data$lh == 0]<- mean(data$lh)
data$lc[data$lc == 0]<- mean(data$lc)
summary(data)
## Vehicle fm Mileage lh
## Min. : 1.0 Min. :-1.000 Min. : 1 Min. : 0.200
## 1st Qu.: 406.8 1st Qu.: 4.000 1st Qu.: 5819 1st Qu.: 1.600
## Median : 812.5 Median :10.000 Median :17000 Median : 2.700
## Mean : 812.5 Mean : 9.414 Mean :20579 Mean : 3.367
## 3rd Qu.:1218.2 3rd Qu.:14.000 3rd Qu.:30075 3rd Qu.: 4.300
## Max. :1624.0 Max. :23.000 Max. :99983 Max. :35.200
## lc mc State
## Min. : 9.45 Min. : 0.0 Length:1624
## 1st Qu.: 113.50 1st Qu.: 119.7 Class :character
## Median : 202.89 Median : 119.7 Mode :character
## Mean : 247.26 Mean : 179.4
## 3rd Qu.: 317.81 3rd Qu.: 175.5
## Max. :3234.41 Max. :3891.1
pairs(data[3:5])
cor(data[3:5])
## Mileage lh lc
## Mileage 1.00000000 0.07922303 0.07011404
## lh 0.07922303 1.00000000 0.97460946
## lc 0.07011404 0.97460946 1.00000000
set.seed(1234)
x <- sample(2, nrow(data), replace= TRUE, prob = c(0.7,0.3))
train <- data[x == 1,]
test <- data[x == 2,]
cbind(summary(train$lc),summary(test$lc))
## [,1] [,2]
## Min. 9.4500 12.7800
## 1st Qu. 111.6225 116.0300
## Median 202.7350 207.6000
## Mean 246.7238 248.5780
## 3rd Qu. 320.6975 302.4825
## Max. 3234.4100 2263.1200
model <- lm(lc~lh+Mileage, data = train)
model
##
## Call:
## lm(formula = lc ~ lh + Mileage, data = train)
##
## Coefficients:
## (Intercept) lh Mileage
## 2.728e+00 7.287e+01 -5.523e-05
summary(model)
##
## Call:
## lm(formula = lc ~ lh + Mileage, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -652.11 -14.90 -0.93 13.53 762.36
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.728e+00 2.776e+00 0.983 0.326
## lh 7.287e+01 5.207e-01 139.937 <2e-16 ***
## Mileage -5.523e-05 8.304e-05 -0.665 0.506
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 51.35 on 1155 degrees of freedom
## Multiple R-squared: 0.9446, Adjusted R-squared: 0.9445
## F-statistic: 9852 on 2 and 1155 DF, p-value: < 2.2e-16
model1 <- lm(lc~lh, data = train)
model1
##
## Call:
## lm(formula = lc ~ lh, data = train)
##
## Coefficients:
## (Intercept) lh
## 1.702 72.838
summary(model1)
##
## Call:
## lm(formula = lc ~ lh, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -650.29 -14.79 -1.00 13.60 763.50
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.7020 2.3069 0.738 0.461
## lh 72.8379 0.5188 140.402 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 51.34 on 1156 degrees of freedom
## Multiple R-squared: 0.9446, Adjusted R-squared: 0.9446
## F-statistic: 1.971e+04 on 1 and 1156 DF, p-value: < 2.2e-16
plot(lc~lh, train)
abline(model1, col = "Red")
par(mfrow= c(2,2))
plot(model1)
data[1620,]
## Vehicle fm Mileage lh lc mc State
## 1620 1620 11 15565 33.9 3234.41 2046.03 HI
p <- predict(model1, test)
head(p)
## 5 14 16 26 28 29
## 329.47259 147.37784 227.49953 154.66163 140.09405 38.12099
predict(model1, data.frame (lh = 10))
## 1
## 730.081
library(faraway)
## Warning: package 'faraway' was built under R version 4.0.2
model2 <- lm(lc~lh+Mileage+mc+fm, data = train)
model2
##
## Call:
## lm(formula = lc ~ lh + Mileage + mc + fm, data = train)
##
## Coefficients:
## (Intercept) lh Mileage mc fm
## -2.3166193 74.3592629 -0.0003268 -0.0399450 1.3535638
vif(model2)
## lh Mileage mc fm
## 1.407668 1.883753 1.423232 1.864401