States <-read.csv(file="https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/carData/States.csv", header = TRUE, sep = ",")
States
## X region pop SATV SATM percent dollars pay
## 1 AL ESC 4041 470 514 8 3.648 27
## 2 AK PAC 550 438 476 42 7.887 43
## 3 AZ MTN 3665 445 497 25 4.231 30
## 4 AR WSC 2351 470 511 6 3.334 23
## 5 CA PAC 29760 419 484 45 4.826 39
## 6 CO MTN 3294 456 513 28 4.809 31
## 7 CN NE 3287 430 471 74 7.914 43
## 8 DE SA 666 433 470 58 6.016 35
## 9 DC SA 607 409 441 68 8.210 39
## 10 FL SA 12938 418 466 44 5.154 30
## 11 GA SA 6478 401 443 57 4.860 29
## 12 HI PAC 1108 404 481 52 5.008 32
## 13 ID MTN 1007 466 502 17 3.200 25
## 14 IL ENC 11431 466 528 16 5.062 34
## 15 IN ENC 5544 408 459 54 5.051 32
## 16 IA WNC 2777 511 577 5 4.839 28
## 17 KS WNC 2478 492 548 10 5.009 29
## 18 KY ESC 3685 473 521 10 4.390 29
## 19 LA WSC 4220 476 517 9 4.012 26
## 20 ME NE 1228 423 463 60 5.894 28
## 21 MD SA 4781 430 478 59 6.184 38
## 22 MA NE 6016 427 473 72 6.351 36
## 23 MI ENC 9295 454 514 12 5.257 38
## 24 MN WNC 4375 477 542 14 5.260 33
## 25 MS ESC 2573 477 519 4 3.322 24
## 26 MO WNC 5117 473 522 12 4.415 28
## 27 MT MTN 799 464 523 20 5.184 26
## 28 NE WNC 1578 484 546 10 4.381 26
## 29 NV MTN 1202 434 487 24 4.564 32
## 30 NH NE 1109 442 486 67 5.504 31
## 31 NJ MA 7730 418 473 69 9.159 38
## 32 NM MTN 1515 480 527 12 4.446 26
## 33 NY MA 17990 412 470 70 8.500 42
## 34 NC SA 6629 401 440 55 4.802 29
## 35 ND WNC 639 505 564 6 3.685 23
## 36 OH ENC 10847 450 499 22 5.639 32
## 37 OK WSC 3146 478 523 9 3.742 24
## 38 OR PAC 2842 439 484 49 5.291 32
## 39 PA MA 11882 420 463 64 6.534 36
## 40 RI NE 1003 422 461 62 6.989 37
## 41 SC SA 3487 397 437 54 4.327 28
## 42 SD WNC 696 506 555 5 3.730 22
## 43 TN ESC 4877 483 525 12 3.707 28
## 44 TX WSC 16987 413 461 42 4.238 28
## 45 UT MTN 1723 492 539 5 2.993 25
## 46 VT NE 563 431 466 62 5.740 31
## 47 VA SA 6187 425 470 58 5.360 32
## 48 WA PAC 4867 437 486 44 5.045 33
## 49 WV SA 1793 443 490 15 5.046 26
## 50 WI ENC 4892 476 543 11 5.946 33
## 51 WY MTN 454 458 519 13 5.255 29
summary(States)
## X region pop SATV SATM
## AK : 1 SA : 9 Min. : 454 Min. :397.0 Min. :437.0
## AL : 1 MTN : 8 1st Qu.: 1215 1st Qu.:422.5 1st Qu.:470.0
## AR : 1 WNC : 7 Median : 3294 Median :443.0 Median :490.0
## AZ : 1 NE : 6 Mean : 4877 Mean :448.2 Mean :497.4
## CA : 1 ENC : 5 3rd Qu.: 5780 3rd Qu.:474.5 3rd Qu.:522.5
## CN : 1 PAC : 5 Max. :29760 Max. :511.0 Max. :577.0
## (Other):45 (Other):11
## percent dollars pay
## Min. : 4.00 Min. :2.993 Min. :22.00
## 1st Qu.:11.50 1st Qu.:4.354 1st Qu.:27.50
## Median :25.00 Median :5.045 Median :30.00
## Mean :33.75 Mean :5.175 Mean :30.94
## 3rd Qu.:57.50 3rd Qu.:5.689 3rd Qu.:33.50
## Max. :74.00 Max. :9.159 Max. :43.00
##
mean(States$pay)
## [1] 30.94118
median(States$pay)
## [1] 30
mean(States$pop)
## [1] 4876.647
median(States$pop)
## [1] 3294
States2 <- subset(States, pop < 1000 & pay < 30, c('X','region', 'pop','pay'))
States2
## X region pop pay
## 27 MT MTN 799 26
## 35 ND WNC 639 23
## 42 SD WNC 696 22
## 51 WY MTN 454 29
require package plyr
library(plyr)
rename(States2, c("X"="State","region"="Reg" ,"pop"="Population","pay"="Amt"))
## State Reg Population Amt
## 27 MT MTN 799 26
## 35 ND WNC 639 23
## 42 SD WNC 696 22
## 51 WY MTN 454 29
summary(States2)
## X region pop pay
## MT :1 MTN :2 Min. :454.0 Min. :22.00
## ND :1 WNC :2 1st Qu.:592.8 1st Qu.:22.75
## SD :1 ENC :0 Median :667.5 Median :24.50
## WY :1 ESC :0 Mean :647.0 Mean :25.00
## AK :0 MA :0 3rd Qu.:721.8 3rd Qu.:26.75
## AL :0 NE :0 Max. :799.0 Max. :29.00
## (Other):0 (Other):0
mean(States2$pop)
## [1] 647
median(States$pop)
## [1] 3294
mean(States2$pay)
## [1] 25
median(States$pay)
## [1] 30
States2$X<-revalue(States2$X,c("MT"="AAA"))
States2$X<-revalue(States2$X,c("ND"="AAA"))
States2$X<-revalue(States2$X,c("SD"="AAA"))
States2$X<-revalue(States2$X,c("WY"="AAA"))
States2
## X region pop pay
## 27 AAA MTN 799 26
## 35 AAA WNC 639 23
## 42 AAA WNC 696 22
## 51 AAA MTN 454 29
head(States)
## X region pop SATV SATM percent dollars pay
## 1 AL ESC 4041 470 514 8 3.648 27
## 2 AK PAC 550 438 476 42 7.887 43
## 3 AZ MTN 3665 445 497 25 4.231 30
## 4 AR WSC 2351 470 511 6 3.334 23
## 5 CA PAC 29760 419 484 45 4.826 39
## 6 CO MTN 3294 456 513 28 4.809 31
head(States2)
## X region pop pay
## 27 AAA MTN 799 26
## 35 AAA WNC 639 23
## 42 AAA WNC 696 22
## 51 AAA MTN 454 29
BONUS – place the original .csv in a github file and have R read from the link. This will be a very useful skill as you progress in your data science education and career.
Please See Step 1 above