require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
require(ggvis)
## Loading required package: ggvis
require(magrittr)
## Loading required package: magrittr
require(Ecdat)
## Loading required package: Ecdat
## Loading required package: Ecfun
##
## Attaching package: 'Ecdat'
##
## The following object is masked from 'package:datasets':
##
## Orange
require(corrplot)
## Loading required package: corrplot
data2<-read.csv(url("http://www.personal.psu.edu/dlp/w540/StateIndicator.csv"))
data2<-tbl_df(data2)
data2
## Source: local data frame [50 x 7]
##
## stateNames Population Income Illiteracy LifeExp Murder HSGrad
## 1 Alabama 3615 3624 2.1 69.05 15.1 41.3
## 2 Alaska 365 6315 1.5 69.31 11.3 66.7
## 3 Arizona 2212 4530 1.8 70.55 7.8 58.1
## 4 Arkansas 2110 3378 1.9 70.66 10.1 39.9
## 5 California 21198 5114 1.1 71.71 10.3 62.6
## 6 Colorado 2541 4884 0.7 72.06 6.8 63.9
## 7 Connecticut 3100 5348 1.1 72.48 3.1 56.0
## 8 Delaware 579 4809 0.9 70.06 6.2 54.6
## 9 Florida 8277 4815 1.3 70.66 10.7 52.6
## 10 Georgia 4931 4091 2.0 68.54 13.9 40.6
## .. ... ... ... ... ... ... ...
source("http://www.sthda.com/upload/rquery_cormat.r")
state <- data2 %>% select(Population, Income, Illiteracy, LifeExp, Murder, HSGrad)
rquery.cormat(state)
## $r
## LifeExp Income HSGrad Population Illiteracy Murder
## LifeExp 1
## Income 0.34 1
## HSGrad 0.58 0.62 1
## Population -0.068 0.21 -0.098 1
## Illiteracy -0.59 -0.44 -0.66 0.11 1
## Murder -0.78 -0.23 -0.49 0.34 0.7 1
##
## $p
## LifeExp Income HSGrad Population Illiteracy Murder
## LifeExp 0
## Income 0.016 0
## HSGrad 9.2e-06 1.6e-06 0
## Population 0.64 0.15 0.5 0
## Illiteracy 7e-06 0.0015 2.2e-07 0.46 0
## Murder 2.3e-11 0.11 0.00032 0.015 1.3e-08 0
##
## $sym
## LifeExp Income HSGrad Population Illiteracy Murder
## LifeExp 1
## Income . 1
## HSGrad . , 1
## Population 1
## Illiteracy . . , 1
## Murder , . . , 1
## attr(,"legend")
## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1
plots that demonstrate the relationship between
HSGrad and Income, and
state %>% ggvis(~HSGrad, ~Income) %>% layer_points() %>% layer_smooths()
state %>% ggvis(~Illiteracy, ~Income) %>% layer_points() %>% layer_smooths()
state %>% ggvis(~Illiteracy, ~Murder) %>% layer_points(fill = ~factor(HSGrad))
state$median<-ifelse(state$Income>median(state$Income),1,0)
t.test(state$Income~state$median)
##
## Welch Two Sample t-test
##
## data: state$Income by state$median
## t = -9.0052, df = 47.995, p-value = 6.973e-12
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1179.5304 -748.9496
## sample estimates:
## mean in group 0 mean in group 1
## 3953.68 4917.92
According to the result of t-test, the null hypothesis is rejected. In other words, there is difference in income between states above median HSGrad and states less than or equal to median HSGrad.
require(car)
## Loading required package: car
##
## Attaching package: 'car'
##
## The following object is masked from 'package:Ecdat':
##
## Mroz
state$state<- recode(data2$stateNames, '"Alabama" = 1; "Alaska" =1; "Arkansas"=1; "Georgia"=1; "Illinois"=1; "Kentucky"=1; "Louisiana"=1; "Mississippi"=1; "Michigan"=1; else=0')
t.test(state$Murder~state$state)
##
## Welch Two Sample t-test
##
## data: state$Murder by state$state
## t = -7.3491, df = 21.544, p-value = 2.667e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -7.246531 -4.053740
## sample estimates:
## mean in group 0 mean in group 1
## 6.360976 12.011111
According to the result of t-test, the null hypothesis is rejected. That is, there is statistically significant difference between two groups.