setwd("C://users//apagan//Documents//MSDSBridge//R//datasets")
stdt <- read.csv("States.csv", sep=",")
theurl <-"https://raw.githubusercontent.com/apag101/MSDSBridge/master/States.csv"
stdtgh <- read.table(file = theurl, header = TRUE, sep = ",")
head(stdtgh)
## X region pop SATV SATM percent dollars pay
## 1 AL ESC 4041 470 514 8 3.648 27
## 2 AK PAC 550 438 476 42 7.887 43
## 3 AZ MTN 3665 445 497 25 4.231 30
## 4 AR WSC 2351 470 511 6 3.334 23
## 5 CA PAC 29760 419 484 45 4.826 39
## 6 CO MTN 3294 456 513 28 4.809 31
names(stdt)
## [1] "X" "region" "pop" "SATV" "SATM" "percent" "dollars"
## [8] "pay"
names(stdt) <-c("State","Region","Population","Verbal","Math", "%Grad", "EduSpend1000s", "AvgPay1000s")
names(stdt)
## [1] "State" "Region" "Population" "Verbal"
## [5] "Math" "%Grad" "EduSpend1000s" "AvgPay1000s"
stdtt<-cbind(stdt$EduSpend1000s/stdt$AvgPay1000s)
colnames(stdtt)<- c("StdInvPerTeachPay")
stdt <-cbind(stdt,stdtt)
head(stdt)
## State Region Population Verbal Math %Grad EduSpend1000s AvgPay1000s
## 1 AL ESC 4041 470 514 8 3.648 27
## 2 AK PAC 550 438 476 42 7.887 43
## 3 AZ MTN 3665 445 497 25 4.231 30
## 4 AR WSC 2351 470 511 6 3.334 23
## 5 CA PAC 29760 419 484 45 4.826 39
## 6 CO MTN 3294 456 513 28 4.809 31
## StdInvPerTeachPay
## 1 0.1351111
## 2 0.1834186
## 3 0.1410333
## 4 0.1449565
## 5 0.1237436
## 6 0.1551290
summary(stdt)
## State Region Population Verbal Math
## AK : 1 SA : 9 Min. : 454 Min. :397.0 Min. :437.0
## AL : 1 MTN : 8 1st Qu.: 1215 1st Qu.:422.5 1st Qu.:470.0
## AR : 1 WNC : 7 Median : 3294 Median :443.0 Median :490.0
## AZ : 1 NE : 6 Mean : 4877 Mean :448.2 Mean :497.4
## CA : 1 ENC : 5 3rd Qu.: 5780 3rd Qu.:474.5 3rd Qu.:522.5
## CN : 1 PAC : 5 Max. :29760 Max. :511.0 Max. :577.0
## (Other):45 (Other):11
## %Grad EduSpend1000s AvgPay1000s StdInvPerTeachPay
## Min. : 4.00 Min. :2.993 Min. :22.00 Min. :0.1197
## 1st Qu.:11.50 1st Qu.:4.354 1st Qu.:27.50 1st Qu.:0.1521
## Median :25.00 Median :5.045 Median :30.00 Median :0.1656
## Mean :33.75 Mean :5.175 Mean :30.94 Mean :0.1659
## 3rd Qu.:57.50 3rd Qu.:5.689 3rd Qu.:33.50 3rd Qu.:0.1789
## Max. :74.00 Max. :9.159 Max. :43.00 Max. :0.2410
##
plot(aggregate (stdt$AvgPay1000s~ stdt$`%Grad`, stdt, mean))
plot(aggregate (stdt$EduSpend1000s~ stdt$`%Grad`, stdt, mean))
hist(stdt$EduSpend1000s)
hist(stdt$AvgPay1000s)
hist(stdt$`%Grad`)
subset(stdt, `%Grad`>mean(`%Grad`) & EduSpend1000s > mean(EduSpend1000s) & AvgPay1000s > mean(AvgPay1000s), select = c(State, Region, `%Grad`, EduSpend1000s, AvgPay1000s, Math, Verbal))
## State Region %Grad EduSpend1000s AvgPay1000s Math Verbal
## 2 AK PAC 42 7.887 43 476 438
## 7 CN NE 74 7.914 43 471 430
## 8 DE SA 58 6.016 35 470 433
## 9 DC SA 68 8.210 39 441 409
## 21 MD SA 59 6.184 38 478 430
## 22 MA NE 72 6.351 36 473 427
## 30 NH NE 67 5.504 31 486 442
## 31 NJ MA 69 9.159 38 473 418
## 33 NY MA 70 8.500 42 470 412
## 38 OR PAC 49 5.291 32 484 439
## 39 PA MA 64 6.534 36 463 420
## 40 RI NE 62 6.989 37 461 422
## 46 VT NE 62 5.740 31 466 431
## 47 VA SA 58 5.360 32 470 425
stdt.stats <- rbind(min(stdt$EduSpend1000s), min(stdt$AvgPay1000s),min(stdt$Math),min(stdt$Verbal), min(stdt$`%Grad`),mean(stdt$EduSpend1000s), mean(stdt$AvgPay1000s),mean(stdt$Math),mean(stdt$Verbal), mean(stdt$`%Grad`),max(stdt$EduSpend1000s), max(stdt$AvgPay1000s),max(stdt$Math),max(stdt$Verbal), max(stdt$`%Grad`))
rownames(stdt.stats)<-c("MinEduSpend","MinAvgPay","MinMath","MinVerbal","MinGrads","MeanEduSpend","MeanAvgPay","MeanMath","MeanVerbal","MeanGrads","MaxEduSpend","MaxAvgPay","MaxMath","MaxVerbal","MaxGrads")
stdt.stats
## [,1]
## MinEduSpend 2.99300
## MinAvgPay 22.00000
## MinMath 437.00000
## MinVerbal 397.00000
## MinGrads 4.00000
## MeanEduSpend 5.17549
## MeanAvgPay 30.94118
## MeanMath 497.39216
## MeanVerbal 448.15686
## MeanGrads 33.74510
## MaxEduSpend 9.15900
## MaxAvgPay 43.00000
## MaxMath 577.00000
## MaxVerbal 511.00000
## MaxGrads 74.00000
[DOC] http://vincentarelbundock.github.io/Rdatasets/doc/carData/States.html
[DataSet] http://vincentarelbundock.github.io/Rdatasets/csv/carData/States.csv