require(dplyr, quietly = T, warn.conflicts = F)
carprice_df <- read.csv('https://raw.githubusercontent.com/mehtablocker/CUNY_bridge/master/carprice.csv')
carprice_df %>% summary()
## X Type Min.Price Price
## Min. : 6.00 Compact: 7 Min. : 6.90 Min. : 7.40
## 1st Qu.:17.75 Large :11 1st Qu.:11.40 1st Qu.:13.47
## Median :29.50 Midsize:10 Median :14.50 Median :16.30
## Mean :36.54 Small : 7 Mean :16.54 Mean :18.57
## 3rd Qu.:60.25 Sporty : 8 3rd Qu.:19.43 3rd Qu.:20.73
## Max. :79.00 Van : 5 Max. :37.50 Max. :40.10
## Max.Price Range.Price RoughRange gpm100
## Min. : 7.90 Min. : 0.000 Min. :-0.020 Min. :2.800
## 1st Qu.:14.97 1st Qu.: 1.700 1st Qu.: 1.705 1st Qu.:3.800
## Median :18.40 Median : 3.300 Median : 3.305 Median :4.200
## Mean :20.63 Mean : 4.092 Mean : 4.089 Mean :4.167
## 3rd Qu.:24.50 3rd Qu.: 5.850 3rd Qu.: 5.853 3rd Qu.:4.550
## Max. :42.70 Max. :14.600 Max. :14.600 Max. :5.700
## MPG.city MPG.highway
## Min. :15.00 Min. :20.00
## 1st Qu.:18.00 1st Qu.:26.00
## Median :20.00 Median :28.00
## Mean :20.96 Mean :28.15
## 3rd Qu.:23.00 3rd Qu.:30.00
## Max. :31.00 Max. :41.00
carprice_df$Price %>% mean(); carprice_df$Price %>% median()
## [1] 18.57292
## [1] 16.3
carprice_df$MPG.highway %>% mean(); carprice_df$MPG.highway %>% median()
## [1] 28.14583
## [1] 28
carprice_filt_df <- carprice_df %>% sample_frac(0.5)
carprice_filt_df <- carprice_filt_df %>%
rename(id = 1,
type = Type,
min_price = Min.Price,
price = Price,
max_price = Max.Price,
range_price = Range.Price,
rough_range = RoughRange,
gpm_100 = gpm100,
mpg_city = MPG.city,
mpg_highway = MPG.highway)
carprice_filt_df %>% summary()
## id type min_price price
## Min. : 6.00 Compact:3 Min. : 8.40 Min. :11.10
## 1st Qu.:17.75 Large :7 1st Qu.:13.03 1st Qu.:15.05
## Median :33.50 Midsize:6 Median :14.85 Median :18.25
## Mean :37.71 Small :2 Mean :17.27 Mean :19.45
## 3rd Qu.:54.25 Sporty :3 3rd Qu.:19.50 3rd Qu.:20.73
## Max. :79.00 Van :3 Max. :37.50 Max. :40.10
## max_price range_price rough_range gpm_100
## Min. :12.20 Min. : 0.000 Min. :-0.020 Min. :3.000
## 1st Qu.:16.70 1st Qu.: 1.800 1st Qu.: 1.812 1st Qu.:4.050
## Median :20.30 Median : 3.400 Median : 3.400 Median :4.200
## Mean :21.65 Mean : 4.383 Mean : 4.383 Mean :4.304
## 3rd Qu.:22.62 3rd Qu.: 6.200 3rd Qu.: 6.200 3rd Qu.:4.550
## Max. :42.70 Max. :10.800 Max. :10.820 Max. :5.700
## mpg_city mpg_highway
## Min. :15.00 Min. :20.00
## 1st Qu.:18.00 1st Qu.:26.00
## Median :19.00 Median :27.50
## Mean :19.75 Mean :27.50
## 3rd Qu.:22.00 3rd Qu.:28.25
## Max. :28.00 Max. :38.00
carprice_filt_df$price %>% mean(); carprice_filt_df$price %>% median()
## [1] 19.45417
## [1] 18.25
carprice_filt_df$mpg_highway %>% mean(); carprice_filt_df$mpg_highway %>% median()
## [1] 27.5
## [1] 27.5
carprice_filt_df <- carprice_filt_df %>%
mutate(type = as.character(type),
type = ifelse(type=="Sporty", "sporty",
ifelse(type=="Small", "small",
ifelse(type=="Van", "van", type))))
knitr::kable(carprice_filt_df)
| id | type | min_price | price | max_price | range_price | rough_range | gpm_100 | mpg_city | mpg_highway |
|---|---|---|---|---|---|---|---|---|---|
| 38 | Large | 20.1 | 20.9 | 21.7 | 1.6 | 1.59 | 4.5 | 18 | 26 |
| 34 | sporty | 10.8 | 15.9 | 21.0 | 10.2 | 10.21 | 3.9 | 22 | 29 |
| 11 | Midsize | 37.5 | 40.1 | 42.7 | 5.2 | 5.18 | 4.9 | 16 | 25 |
| 17 | van | 14.7 | 16.6 | 18.6 | 3.9 | 3.90 | 5.7 | 15 | 20 |
| 25 | Compact | 11.9 | 13.3 | 14.7 | 2.8 | 2.81 | 4.1 | 22 | 27 |
| 37 | Midsize | 15.6 | 20.2 | 24.8 | 9.2 | 9.21 | 3.9 | 21 | 30 |
| 75 | sporty | 14.0 | 17.7 | 21.4 | 7.4 | 7.40 | 4.2 | 19 | 28 |
| 7 | Large | 19.9 | 20.8 | 21.7 | 1.8 | 1.79 | 4.2 | 19 | 28 |
| 27 | Midsize | 14.8 | 15.6 | 16.4 | 1.6 | 1.60 | 4.2 | 21 | 27 |
| 18 | Large | 18.0 | 18.8 | 19.6 | 1.6 | 1.60 | 4.7 | 17 | 26 |
| 70 | van | 19.5 | 19.5 | 19.5 | 0.0 | 0.00 | 4.9 | 18 | 23 |
| 24 | small | 8.4 | 11.3 | 14.2 | 5.8 | 5.80 | 3.8 | 23 | 29 |
| 33 | Compact | 10.4 | 11.3 | 12.2 | 1.8 | 1.82 | 4.1 | 22 | 27 |
| 79 | small | 9.2 | 11.1 | 12.9 | 3.7 | 3.70 | 3.0 | 28 | 38 |
| 30 | Large | 17.5 | 19.3 | 21.2 | 3.7 | 3.69 | 4.2 | 20 | 28 |
| 61 | Midsize | 14.9 | 14.9 | 14.9 | 0.0 | -0.02 | 4.4 | 19 | 26 |
| 52 | Large | 34.4 | 36.1 | 37.8 | 3.4 | 3.42 | 4.5 | 18 | 26 |
| 71 | Large | 19.5 | 20.7 | 21.9 | 2.4 | 2.41 | 4.2 | 19 | 28 |
| 12 | Compact | 8.5 | 13.4 | 18.3 | 9.8 | 9.80 | 3.3 | 25 | 36 |
| 36 | van | 14.5 | 19.9 | 25.3 | 10.8 | 10.82 | 5.7 | 15 | 20 |
| 14 | sporty | 13.4 | 15.1 | 16.8 | 3.4 | 3.38 | 4.2 | 19 | 28 |
| 77 | Large | 19.4 | 24.4 | 29.4 | 10.0 | 10.00 | 4.2 | 19 | 28 |
| 51 | Midsize | 33.3 | 34.3 | 35.3 | 2.0 | 1.99 | 4.7 | 17 | 26 |
| 6 | Midsize | 14.2 | 15.7 | 17.3 | 3.1 | 3.09 | 3.8 | 22 | 31 |
This was done in Step One!