One of the challenges in working with data is wrangling. In this assignment we will use R to perform this task.
Here is a list of data sets: http://vincentarelbundock.github.io/Rdatasets/ (click on the csv index for a list)
Please select one, download it and perform the following tasks:
theURL <- "http://vincentarelbundock.github.io/Rdatasets/csv/DAAG/carprice.csv"
carprice <- read.table(file = theURL, header = TRUE, sep = ",")
summary(carprice)
## X Type Min.Price Price
## Min. : 6.00 Compact: 7 Min. : 6.90 Min. : 7.40
## 1st Qu.:17.75 Large :11 1st Qu.:11.40 1st Qu.:13.47
## Median :29.50 Midsize:10 Median :14.50 Median :16.30
## Mean :36.54 Small : 7 Mean :16.54 Mean :18.57
## 3rd Qu.:60.25 Sporty : 8 3rd Qu.:19.43 3rd Qu.:20.73
## Max. :79.00 Van : 5 Max. :37.50 Max. :40.10
## Max.Price Range.Price RoughRange gpm100
## Min. : 7.90 Min. : 0.000 Min. :-0.020 Min. :2.800
## 1st Qu.:14.97 1st Qu.: 1.700 1st Qu.: 1.705 1st Qu.:3.800
## Median :18.40 Median : 3.300 Median : 3.305 Median :4.200
## Mean :20.63 Mean : 4.092 Mean : 4.089 Mean :4.167
## 3rd Qu.:24.50 3rd Qu.: 5.850 3rd Qu.: 5.853 3rd Qu.:4.550
## Max. :42.70 Max. :14.600 Max. :14.600 Max. :5.700
## MPG.city MPG.highway
## Min. :15.00 Min. :20.00
## 1st Qu.:18.00 1st Qu.:26.00
## Median :20.00 Median :28.00
## Mean :20.96 Mean :28.15
## 3rd Qu.:23.00 3rd Qu.:30.00
## Max. :31.00 Max. :41.00
means <- sapply(carprice[, c("MPG.city", "MPG.highway")], mean)
medians <- sapply(carprice[, c("MPG.city", "MPG.highway")], median)
means_mediansDF <- data.frame(rbind(means, medians))
print(means_mediansDF)
## MPG.city MPG.highway
## means 20.95833 28.14583
## medians 20.00000 28.00000
head(carprice)
## X Type Min.Price Price Max.Price Range.Price RoughRange gpm100
## 1 6 Midsize 14.2 15.7 17.3 3.1 3.09 3.8
## 2 7 Large 19.9 20.8 21.7 1.8 1.79 4.2
## 3 8 Large 22.6 23.7 24.9 2.3 2.31 4.9
## 4 9 Midsize 26.3 26.3 26.3 0.0 -0.01 4.3
## 5 10 Large 33.0 34.7 36.3 3.3 3.30 4.9
## 6 11 Midsize 37.5 40.1 42.7 5.2 5.18 4.9
## MPG.city MPG.highway
## 1 22 31
## 2 19 28
## 3 16 25
## 4 19 27
## 5 16 25
## 6 16 25
carprice_subset <- carprice[1:12, c("X", "Type", "MPG.city", "MPG.highway")]
head(carprice_subset)
## X Type MPG.city MPG.highway
## 1 6 Midsize 22 31
## 2 7 Large 19 28
## 3 8 Large 16 25
## 4 9 Midsize 19 27
## 5 10 Large 16 25
## 6 11 Midsize 16 25
names(carprice_subset)
## [1] "X" "Type" "MPG.city" "MPG.highway"
names(carprice_subset) <- paste(names(carprice_subset), "_Subset", sep = "")
names(carprice_subset)
## [1] "X_Subset" "Type_Subset" "MPG.city_Subset"
## [4] "MPG.highway_Subset"
summary(carprice_subset)
## X_Subset Type_Subset MPG.city_Subset MPG.highway_Subset
## Min. : 6.00 Compact:2 Min. :15.00 Min. :20.00
## 1st Qu.: 8.75 Large :3 1st Qu.:16.00 1st Qu.:25.00
## Median :11.50 Midsize:4 Median :19.00 Median :27.50
## Mean :11.50 Small :0 Mean :19.25 Mean :27.58
## 3rd Qu.:14.25 Sporty :1 3rd Qu.:21.25 3rd Qu.:29.50
## Max. :17.00 Van :2 Max. :25.00 Max. :36.00
means <- sapply(carprice_subset[, c("MPG.city_Subset", "MPG.highway_Subset")], mean)
medians <- sapply(carprice_subset[, c("MPG.city_Subset", "MPG.highway_Subset")], median)
means_medians_SubsetDF <- data.frame(rbind(means, medians))
print(means_medians_SubsetDF)
## MPG.city_Subset MPG.highway_Subset
## means 19.25 27.58333
## medians 19.00 27.50000
means_medians_compare <- means_mediansDF - means_medians_SubsetDF
names(means_medians_compare) <- paste(names(means_medians_compare), "_Diff", sep = "")
print(means_medians_compare)
## MPG.city_Diff MPG.highway_Diff
## means 1.708333 0.5625
## medians 1.000000 0.5000
library("stringr")
vectorx <- carprice_subset[, "Type_Subset"]
vectorx <- sapply(vectorx, as.character)
unique(vectorx)
## [1] "Midsize" "Large" "Compact" "Sporty" "Van"
vectorx <- str_replace(string = vectorx, pattern = "Large", "Grande")
vectorx <- str_replace(string = vectorx, pattern = "Midsize", "Mid")
vectorx <- str_replace(string = vectorx, pattern = "Compact", "Tiny")
unique(vectorx)
## [1] "Mid" "Grande" "Tiny" "Sporty" "Van"
carprice_subset[, "Type_Subset"] <- vectorx
head(carprice_subset)
## X_Subset Type_Subset MPG.city_Subset MPG.highway_Subset
## 1 6 Mid 22 31
## 2 7 Grande 19 28
## 3 8 Grande 16 25
## 4 9 Mid 19 27
## 5 10 Grande 16 25
## 6 11 Mid 16 25
See use of function head() above.
theURL <- "https://raw.githubusercontent.com/ChadRyanBailey/BridgeRHomeworkData/master/carprice.csv"
carpriceBonus <- read.table(file = theURL, header = TRUE, sep = ",")
head(carpriceBonus)
## X Type Min.Price Price Max.Price Range.Price RoughRange gpm100
## 1 6 Midsize 14.2 15.7 17.3 3.1 3.09 3.8
## 2 7 Large 19.9 20.8 21.7 1.8 1.79 4.2
## 3 8 Large 22.6 23.7 24.9 2.3 2.31 4.9
## 4 9 Midsize 26.3 26.3 26.3 0.0 -0.01 4.3
## 5 10 Large 33.0 34.7 36.3 3.3 3.30 4.9
## 6 11 Midsize 37.5 40.1 42.7 5.2 5.18 4.9
## MPG.city MPG.highway
## 1 22 31
## 2 19 28
## 3 16 25
## 4 19 27
## 5 16 25
## 6 16 25
Please submit your .rmd file and the .csv file as well as a link to your RPubs.