## Source: local data frame [50 x 7]
##
## stateNames Population Income Illiteracy LifeExp Murder HSGrad
## (fctr) (int) (int) (dbl) (dbl) (dbl) (dbl)
## 1 Alabama 3615 3624 2.1 69.05 15.1 41.3
## 2 Alaska 365 6315 1.5 69.31 11.3 66.7
## 3 Arizona 2212 4530 1.8 70.55 7.8 58.1
## 4 Arkansas 2110 3378 1.9 70.66 10.1 39.9
## 5 California 21198 5114 1.1 71.71 10.3 62.6
## 6 Colorado 2541 4884 0.7 72.06 6.8 63.9
## 7 Connecticut 3100 5348 1.1 72.48 3.1 56.0
## 8 Delaware 579 4809 0.9 70.06 6.2 54.6
## 9 Florida 8277 4815 1.3 70.66 10.7 52.6
## 10 Georgia 4931 4091 2.0 68.54 13.9 40.6
## .. ... ... ... ... ... ... ...
## stateNames Population Income Illiteracy
## Alabama : 1 Min. : 365 Min. :3098 Min. :0.500
## Alaska : 1 1st Qu.: 1080 1st Qu.:3993 1st Qu.:0.625
## Arizona : 1 Median : 2838 Median :4519 Median :0.950
## Arkansas : 1 Mean : 4246 Mean :4436 Mean :1.170
## California: 1 3rd Qu.: 4968 3rd Qu.:4814 3rd Qu.:1.575
## Colorado : 1 Max. :21198 Max. :6315 Max. :2.800
## (Other) :44
## LifeExp Murder HSGrad
## Min. :67.96 Min. : 1.400 Min. :37.80
## 1st Qu.:70.12 1st Qu.: 4.350 1st Qu.:48.05
## Median :70.67 Median : 6.850 Median :53.25
## Mean :70.88 Mean : 7.378 Mean :53.11
## 3rd Qu.:71.89 3rd Qu.:10.675 3rd Qu.:59.15
## Max. :73.60 Max. :15.100 Max. :67.30
##
## Classes 'tbl_df', 'tbl' and 'data.frame': 50 obs. of 7 variables:
## $ stateNames: Factor w/ 50 levels "Alabama","Alaska",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Population: int 3615 365 2212 2110 21198 2541 3100 579 8277 4931 ...
## $ Income : int 3624 6315 4530 3378 5114 4884 5348 4809 4815 4091 ...
## $ Illiteracy: num 2.1 1.5 1.8 1.9 1.1 0.7 1.1 0.9 1.3 2 ...
## $ LifeExp : num 69 69.3 70.5 70.7 71.7 ...
## $ Murder : num 15.1 11.3 7.8 10.1 10.3 6.8 3.1 6.2 10.7 13.9 ...
## $ HSGrad : num 41.3 66.7 58.1 39.9 62.6 63.9 56 54.6 52.6 40.6 ...
## Source: local data frame [6 x 7]
##
## stateNames Population Income Illiteracy LifeExp Murder HSGrad
## (fctr) (int) (int) (dbl) (dbl) (dbl) (dbl)
## 1 Alabama 3615 3624 2.1 69.05 15.1 41.3
## 2 Alaska 365 6315 1.5 69.31 11.3 66.7
## 3 Arizona 2212 4530 1.8 70.55 7.8 58.1
## 4 Arkansas 2110 3378 1.9 70.66 10.1 39.9
## 5 California 21198 5114 1.1 71.71 10.3 62.6
## 6 Colorado 2541 4884 0.7 72.06 6.8 63.9
States <- Stateindic %>%
select(Population, Income, Illiteracy, LifeExp, Murder, HSGrad)
States
## Source: local data frame [50 x 6]
##
## Population Income Illiteracy LifeExp Murder HSGrad
## (int) (int) (dbl) (dbl) (dbl) (dbl)
## 1 3615 3624 2.1 69.05 15.1 41.3
## 2 365 6315 1.5 69.31 11.3 66.7
## 3 2212 4530 1.8 70.55 7.8 58.1
## 4 2110 3378 1.9 70.66 10.1 39.9
## 5 21198 5114 1.1 71.71 10.3 62.6
## 6 2541 4884 0.7 72.06 6.8 63.9
## 7 3100 5348 1.1 72.48 3.1 56.0
## 8 579 4809 0.9 70.06 6.2 54.6
## 9 8277 4815 1.3 70.66 10.7 52.6
## 10 4931 4091 2.0 68.54 13.9 40.6
## .. ... ... ... ... ... ...
source("http://www.sthda.com/upload/rquery_cormat.r")
rquery.cormat(States)
## $r
## LifeExp Income HSGrad Population Illiteracy Murder
## LifeExp 1
## Income 0.34 1
## HSGrad 0.58 0.62 1
## Population -0.068 0.21 -0.098 1
## Illiteracy -0.59 -0.44 -0.66 0.11 1
## Murder -0.78 -0.23 -0.49 0.34 0.7 1
##
## $p
## LifeExp Income HSGrad Population Illiteracy Murder
## LifeExp 0
## Income 0.016 0
## HSGrad 9.2e-06 1.6e-06 0
## Population 0.64 0.15 0.5 0
## Illiteracy 7e-06 0.0015 2.2e-07 0.46 0
## Murder 2.3e-11 0.11 0.00032 0.015 1.3e-08 0
##
## $sym
## LifeExp Income HSGrad Population Illiteracy Murder
## LifeExp 1
## Income . 1
## HSGrad . , 1
## Population 1
## Illiteracy . . , 1
## Murder , . . , 1
## attr(,"legend")
## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1
####a. plots that demonstrate the relationship between ####i. HSGrad and Income
States %>% ggvis(x=~HSGrad, y=~Income) %>% layer_points() %>% layer_smooths()
States %>% ggvis(x=~Illiteracy, y=~Income) %>% layer_points() %>% layer_smooths()
####b. A scatterplot of Murder by Illiteracy grouped by HSGrad
States %>% ggvis(x=~Illiteracy, y= ~Murder) %>% layer_points(fill= ~factor(HSGrad)) %>% group_by(HSGrad)
median(States$HSGrad)
## [1] 53.25
aboveHSGrad<-filter(Stateindic, HSGrad > 53.25)
aboveHSGrad
## Source: local data frame [25 x 7]
##
## stateNames Population Income Illiteracy LifeExp Murder HSGrad
## (fctr) (int) (int) (dbl) (dbl) (dbl) (dbl)
## 1 Alaska 365 6315 1.5 69.31 11.3 66.7
## 2 Arizona 2212 4530 1.8 70.55 7.8 58.1
## 3 California 21198 5114 1.1 71.71 10.3 62.6
## 4 Colorado 2541 4884 0.7 72.06 6.8 63.9
## 5 Connecticut 3100 5348 1.1 72.48 3.1 56.0
## 6 Delaware 579 4809 0.9 70.06 6.2 54.6
## 7 Hawaii 868 4963 1.9 73.60 6.2 61.9
## 8 Idaho 813 4119 0.6 71.87 5.3 59.5
## 9 Iowa 2861 4628 0.5 72.56 2.3 59.0
## 10 Kansas 2280 4669 0.6 72.58 4.5 59.9
## .. ... ... ... ... ... ... ...
I.aboveHSGrad <-aboveHSGrad$Income
lessHSGrad <- filter (Stateindic, HSGrad <= 53.25)
lessHSGrad
## Source: local data frame [25 x 7]
##
## stateNames Population Income Illiteracy LifeExp Murder HSGrad
## (fctr) (int) (int) (dbl) (dbl) (dbl) (dbl)
## 1 Alabama 3615 3624 2.1 69.05 15.1 41.3
## 2 Arkansas 2110 3378 1.9 70.66 10.1 39.9
## 3 Florida 8277 4815 1.3 70.66 10.7 52.6
## 4 Georgia 4931 4091 2.0 68.54 13.9 40.6
## 5 Illinois 11197 5107 0.9 70.14 10.3 52.6
## 6 Indiana 5313 4458 0.7 70.88 7.1 52.9
## 7 Kentucky 3387 3712 1.6 70.10 10.6 38.5
## 8 Louisiana 3806 3545 2.8 68.76 13.2 42.2
## 9 Maryland 4122 5299 0.9 70.22 8.5 52.3
## 10 Michigan 9111 4751 0.9 70.63 11.1 52.8
## .. ... ... ... ... ... ... ...
I.lesseHSGrad<- lessHSGrad$Income
t.test(I.aboveHSGrad, I.lesseHSGrad, var.eq=TRUE)
##
## Two Sample t-test
##
## data: I.aboveHSGrad and I.lesseHSGrad
## t = 1.9642, df = 48, p-value = 0.05531
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -7.838476 671.518476
## sample estimates:
## mean of x mean of y
## 4601.72 4269.88
Group1 <- c("Alabama", "Alaska", "Arkansas", "Georgia", "Illinois", "Kentucky", "Louisiana","Mississippi", "Michigan")
Group1f<-filter (Stateindic, stateNames %in% Group1)
Group1f
## Source: local data frame [9 x 7]
##
## stateNames Population Income Illiteracy LifeExp Murder HSGrad
## (fctr) (int) (int) (dbl) (dbl) (dbl) (dbl)
## 1 Alabama 3615 3624 2.1 69.05 15.1 41.3
## 2 Alaska 365 6315 1.5 69.31 11.3 66.7
## 3 Arkansas 2110 3378 1.9 70.66 10.1 39.9
## 4 Georgia 4931 4091 2.0 68.54 13.9 40.6
## 5 Illinois 11197 5107 0.9 70.14 10.3 52.6
## 6 Kentucky 3387 3712 1.6 70.10 10.6 38.5
## 7 Louisiana 3806 3545 2.8 68.76 13.2 42.2
## 8 Michigan 9111 4751 0.9 70.63 11.1 52.8
## 9 Mississippi 2341 3098 2.4 68.09 12.5 41.0
Group2 <- c ("Arizona", "Connecticut", "Iowa", "Kansas", "Maine", "Minnesota", "Nebraska", "New Hampshire", "North Dakota")
Group2f<-filter (Stateindic, stateNames %in% Group2)
Group2f
## Source: local data frame [9 x 7]
##
## stateNames Population Income Illiteracy LifeExp Murder HSGrad
## (fctr) (int) (int) (dbl) (dbl) (dbl) (dbl)
## 1 Arizona 2212 4530 1.8 70.55 7.8 58.1
## 2 Connecticut 3100 5348 1.1 72.48 3.1 56.0
## 3 Iowa 2861 4628 0.5 72.56 2.3 59.0
## 4 Kansas 2280 4669 0.6 72.58 4.5 59.9
## 5 Maine 1058 3694 0.7 70.39 2.7 54.7
## 6 Minnesota 3921 4675 0.6 72.96 2.3 57.6
## 7 Nebraska 1544 4508 0.6 72.60 2.9 59.3
## 8 New Hampshire 812 4281 0.7 71.23 3.3 57.6
## 9 North Dakota 637 5087 0.8 72.78 1.4 50.3
m.Group1 <-Group1f$Murder
m.Group1
## [1] 15.1 11.3 10.1 13.9 10.3 10.6 13.2 11.1 12.5
m.Group2 <-Group2f$Murder
m.Group2
## [1] 7.8 3.1 2.3 4.5 2.7 2.3 2.9 3.3 1.4
t.test(m.Group1, m.Group2, var.eq=TRUE)
##
## Two Sample t-test
##
## data: m.Group1 and m.Group2
## t = 10.124, df = 16, p-value = 2.312e-08
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 6.834422 10.454467
## sample estimates:
## mean of x mean of y
## 12.011111 3.366667