library(readxl)
ToyotaPrices <- read_excel("C:/RBS/Data Analysis/Homework/Week 4/ToyotaPrices.xlsx")

## Convert Tow_Bar and ABS to Factors with Yes/No Levels
# 2.2 Convert categorical variable to a factor
ToyotaPrices$Mfr_Guarantee <- factor(ToyotaPrices$Mfr_Guarantee)
levels(ToyotaPrices$Mfr_Guarantee) <- c("No", "Yes")
ToyotaPrices$BOVAG_Guarantee <- factor(ToyotaPrices$BOVAG_Guarantee)
levels(ToyotaPrices$BOVAG_Guarantee) <- c("No", "Yes")
ToyotaPrices$ABS <- factor(ToyotaPrices$ABS)
levels(ToyotaPrices$ABS) <- c("No", "Yes")
ToyotaPrices$Airbag_1 <- factor(ToyotaPrices$Airbag_1)
levels(ToyotaPrices$Airbag_1) <- c("No", "Yes")
ToyotaPrices$Airbag_2 <- factor(ToyotaPrices$Airbag_2)
levels(ToyotaPrices$Airbag_2) <- c("No", "Yes")
ToyotaPrices$Airco <- factor(ToyotaPrices$Airco)
levels(ToyotaPrices$Airco) <- c("No", "Yes")
ToyotaPrices$Automatic_airco <- factor(ToyotaPrices$Automatic_airco)
levels(ToyotaPrices$Automatic_airco) <- c("No", "Yes")
ToyotaPrices$Boardcomputer <- factor(ToyotaPrices$Boardcomputer)
levels(ToyotaPrices$Boardcomputer) <- c("No", "Yes")
ToyotaPrices$CD_Player <- factor(ToyotaPrices$CD_Player)
levels(ToyotaPrices$CD_Player) <- c("No", "Yes")
ToyotaPrices$Central_Lock <- factor(ToyotaPrices$Central_Lock)
levels(ToyotaPrices$Central_Lock) <- c("No", "Yes")
ToyotaPrices$Powered_Windows <- factor(ToyotaPrices$Powered_Windows)
levels(ToyotaPrices$Powered_Windows) <- c("No", "Yes")
ToyotaPrices$Powered_Windows <- factor(ToyotaPrices$Powered_Windows)
levels(ToyotaPrices$Powered_Windows) <- c("No", "Yes")
ToyotaPrices$Power_Steering <- factor(ToyotaPrices$Power_Steering)
levels(ToyotaPrices$Power_Steering) <- c("No", "Yes")
ToyotaPrices$Radio <- factor(ToyotaPrices$Radio)
levels(ToyotaPrices$Radio) <- c("No", "Yes")
ToyotaPrices$Mistlamps <- factor(ToyotaPrices$Mistlamps)
levels(ToyotaPrices$Mistlamps) <- c("No", "Yes")
ToyotaPrices$Tow_Bar <- factor(ToyotaPrices$Tow_Bar)
levels(ToyotaPrices$Tow_Bar) <- c("No", "Yes")
ToyotaPrices$Sport_Model <- factor(ToyotaPrices$Sport_Model)
levels(ToyotaPrices$Sport_Model) <- c("No", "Yes")
ToyotaPrices$Backseat_Divider <- factor(ToyotaPrices$Backseat_Divider)
levels(ToyotaPrices$Backseat_Divider) <- c("No", "Yes")
ToyotaPrices$Metallic_Rim <- factor(ToyotaPrices$Metallic_Rim)
levels(ToyotaPrices$Metallic_Rim) <- c("No", "Yes")
ToyotaPrices$Radio_cassette <- factor(ToyotaPrices$Radio_cassette)
levels(ToyotaPrices$Radio_cassette) <- c("No", "Yes")
ToyotaPrices$Automatic <- factor(ToyotaPrices$Automatic)
levels(ToyotaPrices$Automatic) <- c("No", "Yes")

ToyotaPrices$KM[ToyotaPrices$KM < 500] <- NA
ToyotaPrices$cc[ToyotaPrices$cc == 16000] <- NA
ToyotaPrices$Automatic[ToyotaPrices$Automatic == 0] <- NA

# omit na
ToyotaPrices <- na.omit(ToyotaPrices)


summary(ToyotaPrices)
##        Id             Price         Age_08_04       Mfg_Month     
##  Min.   :   1.0   Min.   : 4350   Min.   : 4.00   Min.   : 1.000  
##  1st Qu.: 368.8   1st Qu.: 8450   1st Qu.:44.00   1st Qu.: 3.000  
##  Median : 726.5   Median : 9900   Median :61.00   Median : 5.000  
##  Mean   : 725.1   Mean   :10669   Mean   :56.28   Mean   : 5.537  
##  3rd Qu.:1083.2   3rd Qu.:11900   3rd Qu.:70.00   3rd Qu.: 8.000  
##  Max.   :1441.0   Max.   :31275   Max.   :80.00   Max.   :12.000  
##     Mfg_Year          KM               HP        Automatic        cc      
##  Min.   :1998   Min.   :  1500   Min.   : 69.0   No :1345   Min.   :1300  
##  1st Qu.:1998   1st Qu.: 43427   1st Qu.: 86.0   Yes:  79   1st Qu.:1400  
##  Median :1999   Median : 63831   Median :110.0              Median :1600  
##  Mean   :2000   Mean   : 69096   Mean   :101.5              Mean   :1567  
##  3rd Qu.:2001   3rd Qu.: 87422   3rd Qu.:110.0              3rd Qu.:1600  
##  Max.   :2004   Max.   :243000   Max.   :192.0              Max.   :2000  
##      Doors         Cylinders     Gears       Quarterly_Tax        Weight    
##  Min.   :2.000   Min.   :4   Min.   :3.000   Min.   : 19.00   Min.   :1000  
##  1st Qu.:3.000   1st Qu.:4   1st Qu.:5.000   1st Qu.: 69.00   1st Qu.:1040  
##  Median :4.000   Median :4   Median :5.000   Median : 85.00   Median :1065  
##  Mean   :4.029   Mean   :4   Mean   :5.027   Mean   : 87.24   Mean   :1072  
##  3rd Qu.:5.000   3rd Qu.:4   3rd Qu.:5.000   3rd Qu.: 85.00   3rd Qu.:1085  
##  Max.   :5.000   Max.   :4   Max.   :6.000   Max.   :283.00   Max.   :1615  
##  Mfr_Guarantee BOVAG_Guarantee Guarantee_Period  ABS       Airbag_1  
##  No :839       No : 143        Min.   : 3.00    No : 267   No :  42  
##  Yes:585       Yes:1281        1st Qu.: 3.00    Yes:1157   Yes:1382  
##                                Median : 3.00                         
##                                Mean   : 3.73                         
##                                3rd Qu.: 3.00                         
##                                Max.   :28.00                         
##  Airbag_2   Airco     Automatic_airco Boardcomputer CD_Player  Central_Lock
##  No : 396   No :705   No :1346        No :1010      No :1118   No :602     
##  Yes:1028   Yes:719   Yes:  78        Yes: 414      Yes: 306   Yes:822     
##                                                                            
##                                                                            
##                                                                            
##                                                                            
##  Powered_Windows Power_Steering Radio      Mistlamps  Sport_Model
##  No :628         No :  32       No :1215   No :1057   No :998    
##  Yes:796         Yes:1392       Yes: 209   Yes: 367   Yes:426    
##                                                                  
##                                                                  
##                                                                  
##                                                                  
##  Backseat_Divider Metallic_Rim Radio_cassette Tow_Bar   
##  No : 323         No :1132     No :1216       No :1026  
##  Yes:1101         Yes: 292     Yes: 208       Yes: 398  
##                                                         
##                                                         
##                                                         
## 
# Load library
library(ggplot2)


# 1. Side-by-side Boxplot

boxplot(scale(ToyotaPrices[,c(2:7,9,10,13,14)]))

## Price, KM, Mfg_Month, Mfg_Year HP, CC, Tax, and Weight are skewed to varying degrees.  KM has the most outliers while weight has the largest scaled outliers

# 2. Scatterplot Price vs. KM

library(ggplot2)
# scatterplot
PriceKM <- ggplot(ToyotaPrices) +
  aes(x = KM, y = Price) +
  geom_point(alpha=I(1/4)) +
  geom_smooth(method = "loess", 
            se=FALSE, 
            color = "blue")
PriceKM
## `geom_smooth()` using formula 'y ~ x'

## The relationship looks like a curve.  Eventually the impact of mileage does diminishes the more miles there are/



# 3. Box-Whisker plot of Price versus ABS

plot(Price~ABS, data=ToyotaPrices)

# 4. Correlation between Price and KM

with(ToyotaPrices, cor(Price, KM))
## [1] -0.5639865
# It is a medium strong negative correlation.  Generally, the higher the kilometers, the lower the price



myData_PKWT <- subset(ToyotaPrices, select = c(Price, KM, Weight, Tow_Bar))
myData2<- subset(myData_PKWT, select = -Tow_Bar)

#5. Price vs. Tow Bar
plot(KM~Tow_Bar, data=ToyotaPrices)

# Generally, Toyotas without a tow bar cost slightly less than Toyotas with a tow bar.  The two box plots have similar median prices, and both skew right with numerous outliers at the high end of the data.  The presence of a tow bar does not appear to be a strong driver of price.

#6. KM vs. Tow Bar
plot(KM~Tow_Bar, data=ToyotaPrices)

#  The two boxplots are very similar.  Vehicles with a tow bar seem to have higher mileage, though the outliers with the highest mileage are in vehicles without tow bars.  I don't believe the presence of a tow bar is a strong predictor for high mileage.

# 7. The deepened mystery

PriceKMTowBar <- ggplot(ToyotaPrices) +
  aes(x = KM, y = Price, color = Tow_Bar) +
  geom_point(alpha=I(1))+
  geom_smooth()
PriceKMTowBar
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

# The plot shows that vehicles with the Tow Bar are consistently cheaper than vehicles without the tow bar. The impact of a tow bar on price is most significant at under 50,000 km.