## Data Analysis Week 3Homework
## 1
y = c(5, 5, 4, 4, 5, 5, 4, 5, 4, 4)
### 1.1
mean(y)
## [1] 4.5
mu<-mean(y)
## 1.2
sd(y)
## [1] 0.5270463
s<-sd(y)
## 1.3
qnorm(.975,0,1)
## [1] 1.959964
## 1.4
## What is the 0.975 quantile of the N(0, s)N ( 0 , s ) distribution?
qnorm(.975,0,s)
## [1] 1.032992
## 1.5
## what is the standard error of the mean for a sample of size n = 10n = 10.
1/sqrt(10)
## [1] 0.3162278
mnstderror<-1/sqrt(10)
## 1.6
## What is the 0.975 quantile of the above exercise
qnorm(.975,0,mnstderror)
## [1] 0.619795
## 1.7
## Substitute s for sigma in the standard error, what value is found?
s/sqrt(10)
## [1] 0.1666667
smnstderror<-s/sqrt(10)
## 1.8
## What is the 0.975 quantile distribution using the value of found in the above exercise?
qnorm(.975,0,smnstderror)
## [1] 0.3266607
## 1.9
## Is the change in sales due to new strategy or not?
## B. I believe the strategy increases sales. If we were to use a population mean of 0, the probability that all
## all of the samples would experience increases between 4-5 is statistically improbable.
library(readxl)
ToyotaPrices <- read_excel("C:/RBS/Data Analysis/Homework/Week 3/ToyotaPrices.xlsx")
# 2.1
summary(ToyotaPrices)
## Id Price Age_08_04 Mfg_Month
## Min. : 1.0 Min. : 4350 Min. : 1.00 Min. : 1.000
## 1st Qu.: 361.8 1st Qu.: 8450 1st Qu.:44.00 1st Qu.: 3.000
## Median : 721.5 Median : 9900 Median :61.00 Median : 5.000
## Mean : 721.6 Mean :10731 Mean :55.95 Mean : 5.549
## 3rd Qu.:1081.2 3rd Qu.:11950 3rd Qu.:70.00 3rd Qu.: 8.000
## Max. :1442.0 Max. :32500 Max. :80.00 Max. :12.000
## Mfg_Year KM HP Automatic
## Min. :1998 Min. : 1 Min. : 69.0 Min. :0.00000
## 1st Qu.:1998 1st Qu.: 43000 1st Qu.: 90.0 1st Qu.:0.00000
## Median :1999 Median : 63390 Median :110.0 Median :0.00000
## Mean :2000 Mean : 68533 Mean :101.5 Mean :0.05571
## 3rd Qu.:2001 3rd Qu.: 87021 3rd Qu.:110.0 3rd Qu.:0.00000
## Max. :2004 Max. :243000 Max. :192.0 Max. :1.00000
## cc Doors Cylinders Gears Quarterly_Tax
## Min. : 1300 Min. :2.000 Min. :4 Min. :3.000 Min. : 19.00
## 1st Qu.: 1400 1st Qu.:3.000 1st Qu.:4 1st Qu.:5.000 1st Qu.: 69.00
## Median : 1600 Median :4.000 Median :4 Median :5.000 Median : 85.00
## Mean : 1577 Mean :4.033 Mean :4 Mean :5.026 Mean : 87.12
## 3rd Qu.: 1600 3rd Qu.:5.000 3rd Qu.:4 3rd Qu.:5.000 3rd Qu.: 85.00
## Max. :16000 Max. :5.000 Max. :4 Max. :6.000 Max. :283.00
## Weight Mfr_Guarantee BOVAG_Guarantee Guarantee_Period
## Min. :1000 Min. :0.0000 Min. :0.0000 Min. : 3.000
## 1st Qu.:1040 1st Qu.:0.0000 1st Qu.:1.0000 1st Qu.: 3.000
## Median :1070 Median :0.0000 Median :1.0000 Median : 3.000
## Mean :1072 Mean :0.4095 Mean :0.8955 Mean : 3.815
## 3rd Qu.:1085 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.: 3.000
## Max. :1615 Max. :1.0000 Max. :1.0000 Max. :36.000
## ABS Airbag_1 Airbag_2 Airco
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:1.0000 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :1.0000 Median :1.0000 Median :1.0000
## Mean :0.8134 Mean :0.9708 Mean :0.7228 Mean :0.5084
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## Automatic_airco Boardcomputer CD_Player Central_Lock
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :1.0000
## Mean :0.05641 Mean :0.2946 Mean :0.2187 Mean :0.5801
## 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :1.00000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## Powered_Windows Power_Steering Radio Mistlamps
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :1.000 Median :1.0000 Median :0.0000 Median :0.000
## Mean :0.562 Mean :0.9777 Mean :0.1462 Mean :0.257
## 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:1.000
## Max. :1.000 Max. :1.0000 Max. :1.0000 Max. :1.000
## Sport_Model Backseat_Divider Metallic_Rim Radio_cassette
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :1.0000 Median :0.0000 Median :0.0000
## Mean :0.3001 Mean :0.7702 Mean :0.2047 Mean :0.1455
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## Tow_Bar
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.2779
## 3rd Qu.:1.0000
## Max. :1.0000
## Remove Errors
ToyotaPrices$cc[ToyotaPrices$cc == 16000] <- NA
summary(ToyotaPrices)
## Id Price Age_08_04 Mfg_Month
## Min. : 1.0 Min. : 4350 Min. : 1.00 Min. : 1.000
## 1st Qu.: 361.8 1st Qu.: 8450 1st Qu.:44.00 1st Qu.: 3.000
## Median : 721.5 Median : 9900 Median :61.00 Median : 5.000
## Mean : 721.6 Mean :10731 Mean :55.95 Mean : 5.549
## 3rd Qu.:1081.2 3rd Qu.:11950 3rd Qu.:70.00 3rd Qu.: 8.000
## Max. :1442.0 Max. :32500 Max. :80.00 Max. :12.000
##
## Mfg_Year KM HP Automatic
## Min. :1998 Min. : 1 Min. : 69.0 Min. :0.00000
## 1st Qu.:1998 1st Qu.: 43000 1st Qu.: 90.0 1st Qu.:0.00000
## Median :1999 Median : 63390 Median :110.0 Median :0.00000
## Mean :2000 Mean : 68533 Mean :101.5 Mean :0.05571
## 3rd Qu.:2001 3rd Qu.: 87021 3rd Qu.:110.0 3rd Qu.:0.00000
## Max. :2004 Max. :243000 Max. :192.0 Max. :1.00000
##
## cc Doors Cylinders Gears Quarterly_Tax
## Min. :1300 Min. :2.000 Min. :4 Min. :3.000 Min. : 19.00
## 1st Qu.:1400 1st Qu.:3.000 1st Qu.:4 1st Qu.:5.000 1st Qu.: 69.00
## Median :1600 Median :4.000 Median :4 Median :5.000 Median : 85.00
## Mean :1567 Mean :4.033 Mean :4 Mean :5.026 Mean : 87.12
## 3rd Qu.:1600 3rd Qu.:5.000 3rd Qu.:4 3rd Qu.:5.000 3rd Qu.: 85.00
## Max. :2000 Max. :5.000 Max. :4 Max. :6.000 Max. :283.00
## NA's :1
## Weight Mfr_Guarantee BOVAG_Guarantee Guarantee_Period
## Min. :1000 Min. :0.0000 Min. :0.0000 Min. : 3.000
## 1st Qu.:1040 1st Qu.:0.0000 1st Qu.:1.0000 1st Qu.: 3.000
## Median :1070 Median :0.0000 Median :1.0000 Median : 3.000
## Mean :1072 Mean :0.4095 Mean :0.8955 Mean : 3.815
## 3rd Qu.:1085 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.: 3.000
## Max. :1615 Max. :1.0000 Max. :1.0000 Max. :36.000
##
## ABS Airbag_1 Airbag_2 Airco
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:1.0000 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :1.0000 Median :1.0000 Median :1.0000
## Mean :0.8134 Mean :0.9708 Mean :0.7228 Mean :0.5084
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
##
## Automatic_airco Boardcomputer CD_Player Central_Lock
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :1.0000
## Mean :0.05641 Mean :0.2946 Mean :0.2187 Mean :0.5801
## 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :1.00000 Max. :1.0000 Max. :1.0000 Max. :1.0000
##
## Powered_Windows Power_Steering Radio Mistlamps
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :1.000 Median :1.0000 Median :0.0000 Median :0.000
## Mean :0.562 Mean :0.9777 Mean :0.1462 Mean :0.257
## 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:1.000
## Max. :1.000 Max. :1.0000 Max. :1.0000 Max. :1.000
##
## Sport_Model Backseat_Divider Metallic_Rim Radio_cassette
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :1.0000 Median :0.0000 Median :0.0000
## Mean :0.3001 Mean :0.7702 Mean :0.2047 Mean :0.1455
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
##
## Tow_Bar
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.2779
## 3rd Qu.:1.0000
## Max. :1.0000
##
# 2.2 Convert categorical variable to a factor
ToyotaPrices$Mfr_Guarantee <- factor(ToyotaPrices$Mfr_Guarantee)
levels(ToyotaPrices$Mfr_Guarantee) <- c("Yes", "No")
ToyotaPrices$BOVAG_Guarantee <- factor(ToyotaPrices$BOVAG_Guarantee)
levels(ToyotaPrices$BOVAG_Guarantee) <- c("Yes", "No")
ToyotaPrices$ABS <- factor(ToyotaPrices$ABS)
levels(ToyotaPrices$ABS) <- c("Yes", "No")
ToyotaPrices$Airbag_1 <- factor(ToyotaPrices$Airbag_1)
levels(ToyotaPrices$Airbag_1) <- c("Yes", "No")
ToyotaPrices$Airbag_2 <- factor(ToyotaPrices$Airbag_2)
levels(ToyotaPrices$Airbag_2) <- c("Yes", "No")
ToyotaPrices$Airco <- factor(ToyotaPrices$Airco)
levels(ToyotaPrices$Airco) <- c("Yes", "No")
ToyotaPrices$Automatic_airco <- factor(ToyotaPrices$Automatic_airco)
levels(ToyotaPrices$Automatic_airco) <- c("Yes", "No")
ToyotaPrices$Boardcomputer <- factor(ToyotaPrices$Boardcomputer)
levels(ToyotaPrices$Boardcomputer) <- c("Yes", "No")
ToyotaPrices$CD_Player <- factor(ToyotaPrices$CD_Player)
levels(ToyotaPrices$CD_Player) <- c("Yes", "No")
ToyotaPrices$Central_Lock <- factor(ToyotaPrices$Central_Lock)
levels(ToyotaPrices$Central_Lock) <- c("Yes", "No")
ToyotaPrices$Powered_Windows <- factor(ToyotaPrices$Powered_Windows)
levels(ToyotaPrices$Powered_Windows) <- c("Yes", "No")
ToyotaPrices$Powered_Windows <- factor(ToyotaPrices$Powered_Windows)
levels(ToyotaPrices$Powered_Windows) <- c("Yes", "No")
ToyotaPrices$Power_Steering <- factor(ToyotaPrices$Power_Steering)
levels(ToyotaPrices$Power_Steering) <- c("Yes", "No")
ToyotaPrices$Radio <- factor(ToyotaPrices$Radio)
levels(ToyotaPrices$Radio) <- c("Yes", "No")
ToyotaPrices$Mistlamps <- factor(ToyotaPrices$Mistlamps)
levels(ToyotaPrices$Mistlamps) <- c("Yes", "No")
ToyotaPrices$Tow_Bar <- factor(ToyotaPrices$Tow_Bar)
levels(ToyotaPrices$Tow_Bar) <- c("Yes", "No")
ToyotaPrices$Sport_Model <- factor(ToyotaPrices$Sport_Model)
levels(ToyotaPrices$Sport_Model) <- c("Yes", "No")
ToyotaPrices$Backseat_Divider <- factor(ToyotaPrices$Backseat_Divider)
levels(ToyotaPrices$Backseat_Divider) <- c("Yes", "No")
ToyotaPrices$Metallic_Rim <- factor(ToyotaPrices$Metallic_Rim)
levels(ToyotaPrices$Metallic_Rim) <- c("Yes", "No")
ToyotaPrices$Radio_cassette <- factor(ToyotaPrices$Radio_cassette)
levels(ToyotaPrices$Radio_cassette) <- c("Yes", "No")
summary(ToyotaPrices)
## Id Price Age_08_04 Mfg_Month
## Min. : 1.0 Min. : 4350 Min. : 1.00 Min. : 1.000
## 1st Qu.: 361.8 1st Qu.: 8450 1st Qu.:44.00 1st Qu.: 3.000
## Median : 721.5 Median : 9900 Median :61.00 Median : 5.000
## Mean : 721.6 Mean :10731 Mean :55.95 Mean : 5.549
## 3rd Qu.:1081.2 3rd Qu.:11950 3rd Qu.:70.00 3rd Qu.: 8.000
## Max. :1442.0 Max. :32500 Max. :80.00 Max. :12.000
##
## Mfg_Year KM HP Automatic
## Min. :1998 Min. : 1 Min. : 69.0 Min. :0.00000
## 1st Qu.:1998 1st Qu.: 43000 1st Qu.: 90.0 1st Qu.:0.00000
## Median :1999 Median : 63390 Median :110.0 Median :0.00000
## Mean :2000 Mean : 68533 Mean :101.5 Mean :0.05571
## 3rd Qu.:2001 3rd Qu.: 87021 3rd Qu.:110.0 3rd Qu.:0.00000
## Max. :2004 Max. :243000 Max. :192.0 Max. :1.00000
##
## cc Doors Cylinders Gears Quarterly_Tax
## Min. :1300 Min. :2.000 Min. :4 Min. :3.000 Min. : 19.00
## 1st Qu.:1400 1st Qu.:3.000 1st Qu.:4 1st Qu.:5.000 1st Qu.: 69.00
## Median :1600 Median :4.000 Median :4 Median :5.000 Median : 85.00
## Mean :1567 Mean :4.033 Mean :4 Mean :5.026 Mean : 87.12
## 3rd Qu.:1600 3rd Qu.:5.000 3rd Qu.:4 3rd Qu.:5.000 3rd Qu.: 85.00
## Max. :2000 Max. :5.000 Max. :4 Max. :6.000 Max. :283.00
## NA's :1
## Weight Mfr_Guarantee BOVAG_Guarantee Guarantee_Period ABS
## Min. :1000 Yes:848 Yes: 150 Min. : 3.000 Yes: 268
## 1st Qu.:1040 No :588 No :1286 1st Qu.: 3.000 No :1168
## Median :1070 Median : 3.000
## Mean :1072 Mean : 3.815
## 3rd Qu.:1085 3rd Qu.: 3.000
## Max. :1615 Max. :36.000
##
## Airbag_1 Airbag_2 Airco Automatic_airco Boardcomputer CD_Player
## Yes: 42 Yes: 398 Yes:706 Yes:1355 Yes:1013 Yes:1122
## No :1394 No :1038 No :730 No : 81 No : 423 No : 314
##
##
##
##
##
## Central_Lock Powered_Windows Power_Steering Radio Mistlamps Sport_Model
## Yes:603 Yes:629 Yes: 32 Yes:1226 Yes:1067 Yes:1005
## No :833 No :807 No :1404 No : 210 No : 369 No : 431
##
##
##
##
##
## Backseat_Divider Metallic_Rim Radio_cassette Tow_Bar
## Yes: 330 Yes:1142 Yes:1227 Yes:1037
## No :1106 No : 294 No : 209 No : 399
##
##
##
##
##
# Load library
library(ggplot2)
## 2.3 Explore the distribution of price
# Histogram
Price_Histogram <- ggplot(ToyotaPrices) +
aes(x = Price) +
geom_histogram() +
labs(title = "Histogram")
Price_Histogram
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Density
Price_Density <- ggplot(ToyotaPrices) +
aes(x = Price) +
geom_density() +
labs(title = "Histogram")
Price_Density

# Normal QQ-Plot
Price_QQ <- ggplot(ToyotaPrices) +
aes(sample = Price) +
geom_qq() +
geom_qq_line(color="red")+
labs(title="Normal QQ-Plot", y="Price")
Price_QQ

## The variable is not normal as it skews right. A majority of the samples can be found between ~$7-13k.
## There are additional groupings at ~$16k, ~$18k. and ~$22k.
## 2.4 Put all plots in one screen
library(gridExtra)
grid.arrange(Price_Histogram, Price_Density, Price_QQ, ncol=3)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
