urlfile <- "https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/carData/Anscombe.csv"
dataset<-read.csv(urlfile)
head(dataset, 52)
## X education income young urban
## 1 ME 189 2824 350.7 508
## 2 NH 169 3259 345.9 564
## 3 VT 230 3072 348.5 322
## 4 MA 168 3835 335.3 846
## 5 RI 180 3549 327.1 871
## 6 CT 193 4256 341.0 774
## 7 NY 261 4151 326.2 856
## 8 NJ 214 3954 333.5 889
## 9 PA 201 3419 326.2 715
## 10 OH 172 3509 354.5 753
## 11 IN 194 3412 359.3 649
## 12 IL 189 3981 348.9 830
## 13 MI 233 3675 369.2 738
## 14 WI 209 3363 360.7 659
## 15 MN 262 3341 365.4 664
## 16 IO 234 3265 343.8 572
## 17 MO 177 3257 336.1 701
## 18 ND 177 2730 369.1 443
## 19 SD 187 2876 368.7 446
## 20 NE 148 3239 349.9 615
## 21 KA 196 3303 339.9 661
## 22 DE 248 3795 375.9 722
## 23 MD 247 3742 364.1 766
## 24 DC 246 4425 352.1 1000
## 25 VA 180 3068 353.0 631
## 26 WV 149 2470 328.8 390
## 27 NC 155 2664 354.1 450
## 28 SC 149 2380 376.7 476
## 29 GA 156 2781 370.6 603
## 30 FL 191 3191 336.0 805
## 31 KY 140 2645 349.3 523
## 32 TN 137 2579 342.8 588
## 33 AL 112 2337 362.2 584
## 34 MS 130 2081 385.2 445
## 35 AR 134 2322 351.9 500
## 36 LA 162 2634 389.6 661
## 37 OK 135 2880 329.8 680
## 38 TX 155 3029 369.4 797
## 39 MT 238 2942 368.9 534
## 40 ID 170 2668 367.7 541
## 41 WY 238 3190 365.6 605
## 42 CO 192 3340 358.1 785
## 43 NM 227 2651 421.5 698
## 44 AZ 207 3027 387.5 796
## 45 UT 201 2790 412.4 804
## 46 NV 225 3957 385.1 809
## 47 WA 215 3688 341.3 726
## 48 OR 233 3317 332.7 671
## 49 CA 273 3968 348.4 909
## 50 AK 372 4146 439.7 484
## 51 HI 212 3513 382.9 831
summary(dataset)
## X education income young
## AK : 1 Min. :112.0 Min. :2081 Min. :326.2
## AL : 1 1st Qu.:165.0 1st Qu.:2786 1st Qu.:342.1
## AR : 1 Median :192.0 Median :3257 Median :354.1
## AZ : 1 Mean :196.3 Mean :3225 Mean :358.9
## CA : 1 3rd Qu.:228.5 3rd Qu.:3612 3rd Qu.:369.1
## CO : 1 Max. :372.0 Max. :4425 Max. :439.7
## (Other):45
## urban
## Min. : 322.0
## 1st Qu.: 552.5
## Median : 664.0
## Mean : 664.5
## 3rd Qu.: 790.5
## Max. :1000.0
##
mean(dataset$education)
## [1] 196.3137
median(dataset$education)
## [1] 192
mean(dataset$income)
## [1] 3225.294
median(dataset$income)
## [1] 3257
newframe<-c(dataset [1:20, c(1,2,3)]) # new subset with columns 1,2 and 3
newdataset<-(data.frame(newframe))
head(newdataset, 20)
## X education income
## 1 ME 189 2824
## 2 NH 169 3259
## 3 VT 230 3072
## 4 MA 168 3835
## 5 RI 180 3549
## 6 CT 193 4256
## 7 NY 261 4151
## 8 NJ 214 3954
## 9 PA 201 3419
## 10 OH 172 3509
## 11 IN 194 3412
## 12 IL 189 3981
## 13 MI 233 3675
## 14 WI 209 3363
## 15 MN 262 3341
## 16 IO 234 3265
## 17 MO 177 3257
## 18 ND 177 2730
## 19 SD 187 2876
## 20 NE 148 3239
names(newdataset)<-c("States", "Education Expences($)", "Per-Capita Income($)")
head(newdataset, 20)
## States Education Expences($) Per-Capita Income($)
## 1 ME 189 2824
## 2 NH 169 3259
## 3 VT 230 3072
## 4 MA 168 3835
## 5 RI 180 3549
## 6 CT 193 4256
## 7 NY 261 4151
## 8 NJ 214 3954
## 9 PA 201 3419
## 10 OH 172 3509
## 11 IN 194 3412
## 12 IL 189 3981
## 13 MI 233 3675
## 14 WI 209 3363
## 15 MN 262 3341
## 16 IO 234 3265
## 17 MO 177 3257
## 18 ND 177 2730
## 19 SD 187 2876
## 20 NE 148 3239
summary(newdataset)
## States Education Expences($) Per-Capita Income($)
## CT : 1 Min. :148.0 Min. :2730
## IL : 1 1st Qu.:177.0 1st Qu.:3252
## IN : 1 Median :191.0 Median :3388
## IO : 1 Mean :199.3 Mean :3448
## MA : 1 3rd Qu.:218.0 3rd Qu.:3715
## ME : 1 Max. :262.0 Max. :4256
## (Other):14
mean(newdataset$`Education Expences($)`)
## [1] 199.35
median(newdataset$`Education Expences($)`)
## [1] 191
mean(newdataset$`Per-Capita Income($)`)
## [1] 3448.35
median(newdataset$`Per-Capita Income($)`)
## [1] 3387.5
summary(dataset)
## X education income young
## AK : 1 Min. :112.0 Min. :2081 Min. :326.2
## AL : 1 1st Qu.:165.0 1st Qu.:2786 1st Qu.:342.1
## AR : 1 Median :192.0 Median :3257 Median :354.1
## AZ : 1 Mean :196.3 Mean :3225 Mean :358.9
## CA : 1 3rd Qu.:228.5 3rd Qu.:3612 3rd Qu.:369.1
## CO : 1 Max. :372.0 Max. :4425 Max. :439.7
## (Other):45
## urban
## Min. : 322.0
## 1st Qu.: 552.5
## Median : 664.0
## Mean : 664.5
## 3rd Qu.: 790.5
## Max. :1000.0
##
summary(newdataset)
## States Education Expences($) Per-Capita Income($)
## CT : 1 Min. :148.0 Min. :2730
## IL : 1 1st Qu.:177.0 1st Qu.:3252
## IN : 1 Median :191.0 Median :3388
## IO : 1 Mean :199.3 Mean :3448
## MA : 1 3rd Qu.:218.0 3rd Qu.:3715
## ME : 1 Max. :262.0 Max. :4256
## (Other):14
urlfile <- "https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/carData/States.csv"
dataset2<-read.csv(urlfile)
head(dataset2, 51)
## X region pop SATV SATM percent dollars pay
## 1 AL ESC 4041 470 514 8 3.648 27
## 2 AK PAC 550 438 476 42 7.887 43
## 3 AZ MTN 3665 445 497 25 4.231 30
## 4 AR WSC 2351 470 511 6 3.334 23
## 5 CA PAC 29760 419 484 45 4.826 39
## 6 CO MTN 3294 456 513 28 4.809 31
## 7 CN NE 3287 430 471 74 7.914 43
## 8 DE SA 666 433 470 58 6.016 35
## 9 DC SA 607 409 441 68 8.210 39
## 10 FL SA 12938 418 466 44 5.154 30
## 11 GA SA 6478 401 443 57 4.860 29
## 12 HI PAC 1108 404 481 52 5.008 32
## 13 ID MTN 1007 466 502 17 3.200 25
## 14 IL ENC 11431 466 528 16 5.062 34
## 15 IN ENC 5544 408 459 54 5.051 32
## 16 IA WNC 2777 511 577 5 4.839 28
## 17 KS WNC 2478 492 548 10 5.009 29
## 18 KY ESC 3685 473 521 10 4.390 29
## 19 LA WSC 4220 476 517 9 4.012 26
## 20 ME NE 1228 423 463 60 5.894 28
## 21 MD SA 4781 430 478 59 6.184 38
## 22 MA NE 6016 427 473 72 6.351 36
## 23 MI ENC 9295 454 514 12 5.257 38
## 24 MN WNC 4375 477 542 14 5.260 33
## 25 MS ESC 2573 477 519 4 3.322 24
## 26 MO WNC 5117 473 522 12 4.415 28
## 27 MT MTN 799 464 523 20 5.184 26
## 28 NE WNC 1578 484 546 10 4.381 26
## 29 NV MTN 1202 434 487 24 4.564 32
## 30 NH NE 1109 442 486 67 5.504 31
## 31 NJ MA 7730 418 473 69 9.159 38
## 32 NM MTN 1515 480 527 12 4.446 26
## 33 NY MA 17990 412 470 70 8.500 42
## 34 NC SA 6629 401 440 55 4.802 29
## 35 ND WNC 639 505 564 6 3.685 23
## 36 OH ENC 10847 450 499 22 5.639 32
## 37 OK WSC 3146 478 523 9 3.742 24
## 38 OR PAC 2842 439 484 49 5.291 32
## 39 PA MA 11882 420 463 64 6.534 36
## 40 RI NE 1003 422 461 62 6.989 37
## 41 SC SA 3487 397 437 54 4.327 28
## 42 SD WNC 696 506 555 5 3.730 22
## 43 TN ESC 4877 483 525 12 3.707 28
## 44 TX WSC 16987 413 461 42 4.238 28
## 45 UT MTN 1723 492 539 5 2.993 25
## 46 VT NE 563 431 466 62 5.740 31
## 47 VA SA 6187 425 470 58 5.360 32
## 48 WA PAC 4867 437 486 44 5.045 33
## 49 WV SA 1793 443 490 15 5.046 26
## 50 WI ENC 4892 476 543 11 5.946 33
## 51 WY MTN 454 458 519 13 5.255 29
# Creating a subset out of the original table
newframe2<-c(dataset2 [1:20, c(1,2,3,7)]) # new subset with columns 1,2,3 and 7
newdataset2<-(data.frame(newframe2))
head(newdataset2, 20)
## X region pop dollars
## 1 AL ESC 4041 3.648
## 2 AK PAC 550 7.887
## 3 AZ MTN 3665 4.231
## 4 AR WSC 2351 3.334
## 5 CA PAC 29760 4.826
## 6 CO MTN 3294 4.809
## 7 CN NE 3287 7.914
## 8 DE SA 666 6.016
## 9 DC SA 607 8.210
## 10 FL SA 12938 5.154
## 11 GA SA 6478 4.860
## 12 HI PAC 1108 5.008
## 13 ID MTN 1007 3.200
## 14 IL ENC 11431 5.062
## 15 IN ENC 5544 5.051
## 16 IA WNC 2777 4.839
## 17 KS WNC 2478 5.009
## 18 KY ESC 3685 4.390
## 19 LA WSC 4220 4.012
## 20 ME NE 1228 5.894
# rename columns
names(newdataset2)<-c("States", "Census Regions", "Population", "State Spending on Education")
head(newdataset2, 20)
## States Census Regions Population State Spending on Education
## 1 AL ESC 4041 3.648
## 2 AK PAC 550 7.887
## 3 AZ MTN 3665 4.231
## 4 AR WSC 2351 3.334
## 5 CA PAC 29760 4.826
## 6 CO MTN 3294 4.809
## 7 CN NE 3287 7.914
## 8 DE SA 666 6.016
## 9 DC SA 607 8.210
## 10 FL SA 12938 5.154
## 11 GA SA 6478 4.860
## 12 HI PAC 1108 5.008
## 13 ID MTN 1007 3.200
## 14 IL ENC 11431 5.062
## 15 IN ENC 5544 5.051
## 16 IA WNC 2777 4.839
## 17 KS WNC 2478 5.009
## 18 KY ESC 3685 4.390
## 19 LA WSC 4220 4.012
## 20 ME NE 1228 5.894
# plyr package required
require(plyr)
## Loading required package: plyr
## Warning: package 'plyr' was built under R version 3.6.1
# rename the values in Census region column.
x<-mapvalues(newdataset2[,2], c("ESC","PAC","MTN","WSC","NE","SA","ENC","WNC"), c("East South Central","Pacific", "Mountain", "West South Central","New England","South Atlantic","East North Central", "West North Central"))
newdataset2[,2]<-x
head(newdataset2,20)
## States Census Regions Population State Spending on Education
## 1 AL East South Central 4041 3.648
## 2 AK Pacific 550 7.887
## 3 AZ Mountain 3665 4.231
## 4 AR West South Central 2351 3.334
## 5 CA Pacific 29760 4.826
## 6 CO Mountain 3294 4.809
## 7 CN New England 3287 7.914
## 8 DE South Atlantic 666 6.016
## 9 DC South Atlantic 607 8.210
## 10 FL South Atlantic 12938 5.154
## 11 GA South Atlantic 6478 4.860
## 12 HI Pacific 1108 5.008
## 13 ID Mountain 1007 3.200
## 14 IL East North Central 11431 5.062
## 15 IN East North Central 5544 5.051
## 16 IA West North Central 2777 4.839
## 17 KS West North Central 2478 5.009
## 18 KY East South Central 3685 4.390
## 19 LA West South Central 4220 4.012
## 20 ME New England 1228 5.894