- Use the summary function to gain an overview of the data set. Then display the mean and median for at least two attributes.
# require(RCurl)
#fuel_economy<-read.csv(text=getURL("https://raw.githubusercontent.com/Jagdish16/jagdish_r_repo/master/FuelEconomy.csv"), header=T)
theURL<-"https://raw.githubusercontent.com/Jagdish16/jagdish_r_repo/master/FuelEconomy.csv"
fuel_economy<-read.table(file=theURL, header=TRUE, sep=",",quote="\"")
summary(fuel_economy)
## X manufacturer model
## Min. : 1.00 dodge :37 caravan 2wd : 11
## 1st Qu.: 59.25 toyota :34 ram 1500 pickup 4wd: 10
## Median :117.50 volkswagen:27 civic : 9
## Mean :117.50 ford :25 dakota pickup 4wd : 9
## 3rd Qu.:175.75 chevrolet :19 jetta : 9
## Max. :234.00 audi :18 mustang : 9
## (Other) :74 (Other) :177
## displ year cyl trans drv
## Min. :1.600 Min. :1999 Min. :4.000 auto(l4) :83 4:103
## 1st Qu.:2.400 1st Qu.:1999 1st Qu.:4.000 manual(m5):58 f:106
## Median :3.300 Median :2004 Median :6.000 auto(l5) :39 r: 25
## Mean :3.472 Mean :2004 Mean :5.889 manual(m6):19
## 3rd Qu.:4.600 3rd Qu.:2008 3rd Qu.:8.000 auto(s6) :16
## Max. :7.000 Max. :2008 Max. :8.000 auto(l6) : 6
## (Other) :13
## cty hwy fl class
## Min. : 9.00 Min. :12.00 c: 1 2seater : 5
## 1st Qu.:14.00 1st Qu.:18.00 d: 5 compact :47
## Median :17.00 Median :24.00 e: 8 midsize :41
## Mean :16.86 Mean :23.44 p: 52 minivan :11
## 3rd Qu.:19.00 3rd Qu.:27.00 r:168 pickup :33
## Max. :35.00 Max. :44.00 subcompact:35
## suv :62
cat("The mean of cty is ", mean(fuel_economy$cty), "and its median is ", median(fuel_economy$cty), "\n")
## The mean of cty is 16.85897 and its median is 17
#mean(fuel_economy$cty)
cat("The mean of hwy is ", mean(fuel_economy$hwy), "and its median is ", median(fuel_economy$hwy), "\n")
## The mean of hwy is 23.44017 and its median is 24
- Create a new data frame with a subset of the columns and rows. Make sure to rename it.
fe<-data.frame(fuel_economy$model, fuel_economy$cty, fuel_economy$hwy, fuel_economy$class)
#fuel.economy<-subset(fe, fuel_economy$class == "suv",)
fuel.economy<-fe[fe$fuel_economy.class=="suv",]
fuel.economy
## fuel_economy.model fuel_economy.cty fuel_economy.hwy
## 19 c1500 suburban 2wd 14 20
## 20 c1500 suburban 2wd 11 15
## 21 c1500 suburban 2wd 14 20
## 22 c1500 suburban 2wd 13 17
## 23 c1500 suburban 2wd 12 17
## 29 k1500 tahoe 4wd 14 19
## 30 k1500 tahoe 4wd 11 14
## 31 k1500 tahoe 4wd 11 15
## 32 k1500 tahoe 4wd 14 17
## 58 durango 4wd 13 17
## 59 durango 4wd 13 17
## 60 durango 4wd 9 12
## 61 durango 4wd 13 17
## 62 durango 4wd 11 16
## 63 durango 4wd 13 18
## 64 durango 4wd 11 15
## 75 expedition 2wd 11 17
## 76 expedition 2wd 11 17
## 77 expedition 2wd 12 18
## 78 explorer 4wd 14 17
## 79 explorer 4wd 15 19
## 80 explorer 4wd 14 17
## 81 explorer 4wd 13 19
## 82 explorer 4wd 13 19
## 83 explorer 4wd 13 17
## 123 grand cherokee 4wd 17 22
## 124 grand cherokee 4wd 15 19
## 125 grand cherokee 4wd 15 20
## 126 grand cherokee 4wd 14 17
## 127 grand cherokee 4wd 9 12
## 128 grand cherokee 4wd 14 19
## 129 grand cherokee 4wd 13 18
## 130 grand cherokee 4wd 11 14
## 131 range rover 11 15
## 132 range rover 12 18
## 133 range rover 12 18
## 134 range rover 11 15
## 135 navigator 2wd 11 17
## 136 navigator 2wd 11 16
## 137 navigator 2wd 12 18
## 138 mountaineer 4wd 14 17
## 139 mountaineer 4wd 13 19
## 140 mountaineer 4wd 13 19
## 141 mountaineer 4wd 13 17
## 151 pathfinder 4wd 14 17
## 152 pathfinder 4wd 15 17
## 153 pathfinder 4wd 14 20
## 154 pathfinder 4wd 12 18
## 160 forester awd 18 25
## 161 forester awd 18 24
## 162 forester awd 20 27
## 163 forester awd 19 25
## 164 forester awd 20 26
## 165 forester awd 18 23
## 174 4runner 4wd 15 20
## 175 4runner 4wd 16 20
## 176 4runner 4wd 15 19
## 177 4runner 4wd 15 17
## 178 4runner 4wd 16 20
## 179 4runner 4wd 14 17
## 199 land cruiser wagon 4wd 11 15
## 200 land cruiser wagon 4wd 13 18
## fuel_economy.class
## 19 suv
## 20 suv
## 21 suv
## 22 suv
## 23 suv
## 29 suv
## 30 suv
## 31 suv
## 32 suv
## 58 suv
## 59 suv
## 60 suv
## 61 suv
## 62 suv
## 63 suv
## 64 suv
## 75 suv
## 76 suv
## 77 suv
## 78 suv
## 79 suv
## 80 suv
## 81 suv
## 82 suv
## 83 suv
## 123 suv
## 124 suv
## 125 suv
## 126 suv
## 127 suv
## 128 suv
## 129 suv
## 130 suv
## 131 suv
## 132 suv
## 133 suv
## 134 suv
## 135 suv
## 136 suv
## 137 suv
## 138 suv
## 139 suv
## 140 suv
## 141 suv
## 151 suv
## 152 suv
## 153 suv
## 154 suv
## 160 suv
## 161 suv
## 162 suv
## 163 suv
## 164 suv
## 165 suv
## 174 suv
## 175 suv
## 176 suv
## 177 suv
## 178 suv
## 179 suv
## 199 suv
## 200 suv
- Create new column names for the new data frame.
colnames(fuel.economy) = c("CarModel", "CityMileage", "HighwayMileage", "Type")
fuel.economy
## CarModel CityMileage HighwayMileage Type
## 19 c1500 suburban 2wd 14 20 suv
## 20 c1500 suburban 2wd 11 15 suv
## 21 c1500 suburban 2wd 14 20 suv
## 22 c1500 suburban 2wd 13 17 suv
## 23 c1500 suburban 2wd 12 17 suv
## 29 k1500 tahoe 4wd 14 19 suv
## 30 k1500 tahoe 4wd 11 14 suv
## 31 k1500 tahoe 4wd 11 15 suv
## 32 k1500 tahoe 4wd 14 17 suv
## 58 durango 4wd 13 17 suv
## 59 durango 4wd 13 17 suv
## 60 durango 4wd 9 12 suv
## 61 durango 4wd 13 17 suv
## 62 durango 4wd 11 16 suv
## 63 durango 4wd 13 18 suv
## 64 durango 4wd 11 15 suv
## 75 expedition 2wd 11 17 suv
## 76 expedition 2wd 11 17 suv
## 77 expedition 2wd 12 18 suv
## 78 explorer 4wd 14 17 suv
## 79 explorer 4wd 15 19 suv
## 80 explorer 4wd 14 17 suv
## 81 explorer 4wd 13 19 suv
## 82 explorer 4wd 13 19 suv
## 83 explorer 4wd 13 17 suv
## 123 grand cherokee 4wd 17 22 suv
## 124 grand cherokee 4wd 15 19 suv
## 125 grand cherokee 4wd 15 20 suv
## 126 grand cherokee 4wd 14 17 suv
## 127 grand cherokee 4wd 9 12 suv
## 128 grand cherokee 4wd 14 19 suv
## 129 grand cherokee 4wd 13 18 suv
## 130 grand cherokee 4wd 11 14 suv
## 131 range rover 11 15 suv
## 132 range rover 12 18 suv
## 133 range rover 12 18 suv
## 134 range rover 11 15 suv
## 135 navigator 2wd 11 17 suv
## 136 navigator 2wd 11 16 suv
## 137 navigator 2wd 12 18 suv
## 138 mountaineer 4wd 14 17 suv
## 139 mountaineer 4wd 13 19 suv
## 140 mountaineer 4wd 13 19 suv
## 141 mountaineer 4wd 13 17 suv
## 151 pathfinder 4wd 14 17 suv
## 152 pathfinder 4wd 15 17 suv
## 153 pathfinder 4wd 14 20 suv
## 154 pathfinder 4wd 12 18 suv
## 160 forester awd 18 25 suv
## 161 forester awd 18 24 suv
## 162 forester awd 20 27 suv
## 163 forester awd 19 25 suv
## 164 forester awd 20 26 suv
## 165 forester awd 18 23 suv
## 174 4runner 4wd 15 20 suv
## 175 4runner 4wd 16 20 suv
## 176 4runner 4wd 15 19 suv
## 177 4runner 4wd 15 17 suv
## 178 4runner 4wd 16 20 suv
## 179 4runner 4wd 14 17 suv
## 199 land cruiser wagon 4wd 11 15 suv
## 200 land cruiser wagon 4wd 13 18 suv
- Use the summary function to create an overview of your new data frame. The print the mean and median for the same two attributes. Please compare.
summary(fuel.economy)
## CarModel CityMileage HighwayMileage Type
## grand cherokee 4wd: 8 Min. : 9.00 Min. :12.00 2seater : 0
## durango 4wd : 7 1st Qu.:12.00 1st Qu.:17.00 compact : 0
## 4runner 4wd : 6 Median :13.00 Median :17.50 midsize : 0
## explorer 4wd : 6 Mean :13.50 Mean :18.13 minivan : 0
## forester awd : 6 3rd Qu.:14.75 3rd Qu.:19.00 pickup : 0
## c1500 suburban 2wd: 5 Max. :20.00 Max. :27.00 subcompact: 0
## (Other) :24 suv :62
cat("The mean of SUV City Mileage is ", mean(fuel.economy$CityMileage), "and its median is ", median(fuel.economy$CityMileage), ". Compared to the overall population, the mean varies by ", ((mean(fuel.economy$CityMileage) / mean (fuel_economy$cty))-1)*100, "% and the median varies by ", ((median(fuel.economy$CityMileage) / median (fuel_economy$cty))-1)*100 ,"%\n")
## The mean of SUV City Mileage is 13.5 and its median is 13 . Compared to the overall population, the mean varies by -19.92395 % and the median varies by -23.52941 %
#mean(fuel_economy$cty)
cat("The mean of SUV Highway Mileage is ", mean(fuel.economy$HighwayMileage), "and its median is ", median(fuel.economy$HighwayMileage), ". Compared to the overall population, the mean varies by ", ((mean(fuel.economy$HighwayMileage) / mean (fuel_economy$hwy))-1)*100, "% and the median varies by ", ((median(fuel.economy$HighwayMileage) / median (fuel_economy$hwy))-1)*100 ,"%\n")
## The mean of SUV Highway Mileage is 18.12903 and its median is 17.5 . Compared to the overall population, the mean varies by -22.65828 % and the median varies by -27.08333 %
- For at least 3 values in a column please rename so that every value in that column is renamed. For example, suppose I have 20 values of the letter “e” in one column. Rename those values so that all 20 would show as “excellent”.
#grep("grand cherokee 4wd",as.character(fuel.economy$CarModel))
#fuel.economy<-gsub("pathfinder 4wd", "Pathfinder", as.character(fuel.economy$CarModel))
#fuel.economy$CarModel[fuel.economy$CarModel == "grand cherokee 4wd"] <- "Cherokee"
levels(fuel.economy$CarModel)[match("grand cherokee 4wd",levels(fuel.economy$CarModel))] <- "Cherokee"
levels(fuel.economy$CarModel)[match("range rover",levels(fuel.economy$CarModel))] <- "Range Rover"
levels(fuel.economy$CarModel)[match("pathfinder 4wd",levels(fuel.economy$CarModel))] <- "Pathfinder"
- Display enough rows to see examples of all of steps 1-5 above.
fuel.economy
## CarModel CityMileage HighwayMileage Type
## 19 c1500 suburban 2wd 14 20 suv
## 20 c1500 suburban 2wd 11 15 suv
## 21 c1500 suburban 2wd 14 20 suv
## 22 c1500 suburban 2wd 13 17 suv
## 23 c1500 suburban 2wd 12 17 suv
## 29 k1500 tahoe 4wd 14 19 suv
## 30 k1500 tahoe 4wd 11 14 suv
## 31 k1500 tahoe 4wd 11 15 suv
## 32 k1500 tahoe 4wd 14 17 suv
## 58 durango 4wd 13 17 suv
## 59 durango 4wd 13 17 suv
## 60 durango 4wd 9 12 suv
## 61 durango 4wd 13 17 suv
## 62 durango 4wd 11 16 suv
## 63 durango 4wd 13 18 suv
## 64 durango 4wd 11 15 suv
## 75 expedition 2wd 11 17 suv
## 76 expedition 2wd 11 17 suv
## 77 expedition 2wd 12 18 suv
## 78 explorer 4wd 14 17 suv
## 79 explorer 4wd 15 19 suv
## 80 explorer 4wd 14 17 suv
## 81 explorer 4wd 13 19 suv
## 82 explorer 4wd 13 19 suv
## 83 explorer 4wd 13 17 suv
## 123 Cherokee 17 22 suv
## 124 Cherokee 15 19 suv
## 125 Cherokee 15 20 suv
## 126 Cherokee 14 17 suv
## 127 Cherokee 9 12 suv
## 128 Cherokee 14 19 suv
## 129 Cherokee 13 18 suv
## 130 Cherokee 11 14 suv
## 131 Range Rover 11 15 suv
## 132 Range Rover 12 18 suv
## 133 Range Rover 12 18 suv
## 134 Range Rover 11 15 suv
## 135 navigator 2wd 11 17 suv
## 136 navigator 2wd 11 16 suv
## 137 navigator 2wd 12 18 suv
## 138 mountaineer 4wd 14 17 suv
## 139 mountaineer 4wd 13 19 suv
## 140 mountaineer 4wd 13 19 suv
## 141 mountaineer 4wd 13 17 suv
## 151 Pathfinder 14 17 suv
## 152 Pathfinder 15 17 suv
## 153 Pathfinder 14 20 suv
## 154 Pathfinder 12 18 suv
## 160 forester awd 18 25 suv
## 161 forester awd 18 24 suv
## 162 forester awd 20 27 suv
## 163 forester awd 19 25 suv
## 164 forester awd 20 26 suv
## 165 forester awd 18 23 suv
## 174 4runner 4wd 15 20 suv
## 175 4runner 4wd 16 20 suv
## 176 4runner 4wd 15 19 suv
## 177 4runner 4wd 15 17 suv
## 178 4runner 4wd 16 20 suv
## 179 4runner 4wd 14 17 suv
## 199 land cruiser wagon 4wd 11 15 suv
## 200 land cruiser wagon 4wd 13 18 suv
- BONUS - place the original .csv in a github file and have R read from the link. This will be a very useful skill as you progress in your data science education and career.
See answer to question #1