## Source: local data frame [50 x 7]
## 
##     stateNames Population Income Illiteracy LifeExp Murder HSGrad
##         (fctr)      (int)  (int)      (dbl)   (dbl)  (dbl)  (dbl)
## 1      Alabama       3615   3624        2.1   69.05   15.1   41.3
## 2       Alaska        365   6315        1.5   69.31   11.3   66.7
## 3      Arizona       2212   4530        1.8   70.55    7.8   58.1
## 4     Arkansas       2110   3378        1.9   70.66   10.1   39.9
## 5   California      21198   5114        1.1   71.71   10.3   62.6
## 6     Colorado       2541   4884        0.7   72.06    6.8   63.9
## 7  Connecticut       3100   5348        1.1   72.48    3.1   56.0
## 8     Delaware        579   4809        0.9   70.06    6.2   54.6
## 9      Florida       8277   4815        1.3   70.66   10.7   52.6
## 10     Georgia       4931   4091        2.0   68.54   13.9   40.6
## ..         ...        ...    ...        ...     ...    ...    ...
##       stateNames   Population        Income       Illiteracy   
##  Alabama   : 1   Min.   :  365   Min.   :3098   Min.   :0.500  
##  Alaska    : 1   1st Qu.: 1080   1st Qu.:3993   1st Qu.:0.625  
##  Arizona   : 1   Median : 2838   Median :4519   Median :0.950  
##  Arkansas  : 1   Mean   : 4246   Mean   :4436   Mean   :1.170  
##  California: 1   3rd Qu.: 4968   3rd Qu.:4814   3rd Qu.:1.575  
##  Colorado  : 1   Max.   :21198   Max.   :6315   Max.   :2.800  
##  (Other)   :44                                                 
##     LifeExp          Murder           HSGrad     
##  Min.   :67.96   Min.   : 1.400   Min.   :37.80  
##  1st Qu.:70.12   1st Qu.: 4.350   1st Qu.:48.05  
##  Median :70.67   Median : 6.850   Median :53.25  
##  Mean   :70.88   Mean   : 7.378   Mean   :53.11  
##  3rd Qu.:71.89   3rd Qu.:10.675   3rd Qu.:59.15  
##  Max.   :73.60   Max.   :15.100   Max.   :67.30  
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    50 obs. of  7 variables:
##  $ stateNames: Factor w/ 50 levels "Alabama","Alaska",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Population: int  3615 365 2212 2110 21198 2541 3100 579 8277 4931 ...
##  $ Income    : int  3624 6315 4530 3378 5114 4884 5348 4809 4815 4091 ...
##  $ Illiteracy: num  2.1 1.5 1.8 1.9 1.1 0.7 1.1 0.9 1.3 2 ...
##  $ LifeExp   : num  69 69.3 70.5 70.7 71.7 ...
##  $ Murder    : num  15.1 11.3 7.8 10.1 10.3 6.8 3.1 6.2 10.7 13.9 ...
##  $ HSGrad    : num  41.3 66.7 58.1 39.9 62.6 63.9 56 54.6 52.6 40.6 ...
## Source: local data frame [6 x 7]
## 
##   stateNames Population Income Illiteracy LifeExp Murder HSGrad
##       (fctr)      (int)  (int)      (dbl)   (dbl)  (dbl)  (dbl)
## 1    Alabama       3615   3624        2.1   69.05   15.1   41.3
## 2     Alaska        365   6315        1.5   69.31   11.3   66.7
## 3    Arizona       2212   4530        1.8   70.55    7.8   58.1
## 4   Arkansas       2110   3378        1.9   70.66   10.1   39.9
## 5 California      21198   5114        1.1   71.71   10.3   62.6
## 6   Colorado       2541   4884        0.7   72.06    6.8   63.9

Use Stateindic built-in dataset and retain in States only the continuous variables from Stateindic.

States <- Stateindic %>%
  select(Population, Income, Illiteracy, LifeExp, Murder, HSGrad)
States
## Source: local data frame [50 x 6]
## 
##    Population Income Illiteracy LifeExp Murder HSGrad
##         (int)  (int)      (dbl)   (dbl)  (dbl)  (dbl)
## 1        3615   3624        2.1   69.05   15.1   41.3
## 2         365   6315        1.5   69.31   11.3   66.7
## 3        2212   4530        1.8   70.55    7.8   58.1
## 4        2110   3378        1.9   70.66   10.1   39.9
## 5       21198   5114        1.1   71.71   10.3   62.6
## 6        2541   4884        0.7   72.06    6.8   63.9
## 7        3100   5348        1.1   72.48    3.1   56.0
## 8         579   4809        0.9   70.06    6.2   54.6
## 9        8277   4815        1.3   70.66   10.7   52.6
## 10       4931   4091        2.0   68.54   13.9   40.6
## ..        ...    ...        ...     ...    ...    ...

Load the cormat functions.

source("http://www.sthda.com/upload/rquery_cormat.r")

1. Compute and report correlations among these six variables and plot a correlogram representing these correlations.

rquery.cormat(States)

## $r
##            LifeExp Income HSGrad Population Illiteracy Murder
## LifeExp          1                                           
## Income        0.34      1                                    
## HSGrad        0.58   0.62      1                             
## Population  -0.068   0.21 -0.098          1                  
## Illiteracy   -0.59  -0.44  -0.66       0.11          1       
## Murder       -0.78  -0.23  -0.49       0.34        0.7      1
## 
## $p
##            LifeExp  Income  HSGrad Population Illiteracy Murder
## LifeExp          0                                             
## Income       0.016       0                                     
## HSGrad     9.2e-06 1.6e-06       0                             
## Population    0.64    0.15     0.5          0                  
## Illiteracy   7e-06  0.0015 2.2e-07       0.46          0       
## Murder     2.3e-11    0.11 0.00032      0.015    1.3e-08      0
## 
## $sym
##            LifeExp Income HSGrad Population Illiteracy Murder
## LifeExp    1                                                 
## Income     .       1                                         
## HSGrad     .       ,      1                                  
## Population                       1                           
## Illiteracy .       .      ,                 1                
## Murder     ,              .      .          ,          1     
## attr(,"legend")
## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1

2. Using ggvis, construct

####a. plots that demonstrate the relationship between ####i. HSGrad and Income

States %>% ggvis(x=~HSGrad, y=~Income) %>% layer_points() %>% layer_smooths()

####ii. Illiteracy and Income

States %>% ggvis(x=~Illiteracy, y=~Income) %>% layer_points() %>% layer_smooths()

####b. A scatterplot of Murder by Illiteracy grouped by HSGrad

States %>% ggvis(x=~Illiteracy, y= ~Murder) %>% layer_points(fill= ~factor(HSGrad)) %>% group_by(HSGrad) 

3. Report tests of null hypotheses that

a. there is no difference in Income between states above median HSGrad and states less than or equal to median HSGrad, and

median(States$HSGrad)
## [1] 53.25
aboveHSGrad<-filter(Stateindic, HSGrad > 53.25)
aboveHSGrad
## Source: local data frame [25 x 7]
## 
##     stateNames Population Income Illiteracy LifeExp Murder HSGrad
##         (fctr)      (int)  (int)      (dbl)   (dbl)  (dbl)  (dbl)
## 1       Alaska        365   6315        1.5   69.31   11.3   66.7
## 2      Arizona       2212   4530        1.8   70.55    7.8   58.1
## 3   California      21198   5114        1.1   71.71   10.3   62.6
## 4     Colorado       2541   4884        0.7   72.06    6.8   63.9
## 5  Connecticut       3100   5348        1.1   72.48    3.1   56.0
## 6     Delaware        579   4809        0.9   70.06    6.2   54.6
## 7       Hawaii        868   4963        1.9   73.60    6.2   61.9
## 8        Idaho        813   4119        0.6   71.87    5.3   59.5
## 9         Iowa       2861   4628        0.5   72.56    2.3   59.0
## 10      Kansas       2280   4669        0.6   72.58    4.5   59.9
## ..         ...        ...    ...        ...     ...    ...    ...
I.aboveHSGrad <-aboveHSGrad$Income
lessHSGrad <- filter (Stateindic, HSGrad <= 53.25)
lessHSGrad
## Source: local data frame [25 x 7]
## 
##    stateNames Population Income Illiteracy LifeExp Murder HSGrad
##        (fctr)      (int)  (int)      (dbl)   (dbl)  (dbl)  (dbl)
## 1     Alabama       3615   3624        2.1   69.05   15.1   41.3
## 2    Arkansas       2110   3378        1.9   70.66   10.1   39.9
## 3     Florida       8277   4815        1.3   70.66   10.7   52.6
## 4     Georgia       4931   4091        2.0   68.54   13.9   40.6
## 5    Illinois      11197   5107        0.9   70.14   10.3   52.6
## 6     Indiana       5313   4458        0.7   70.88    7.1   52.9
## 7    Kentucky       3387   3712        1.6   70.10   10.6   38.5
## 8   Louisiana       3806   3545        2.8   68.76   13.2   42.2
## 9    Maryland       4122   5299        0.9   70.22    8.5   52.3
## 10   Michigan       9111   4751        0.9   70.63   11.1   52.8
## ..        ...        ...    ...        ...     ...    ...    ...
I.lesseHSGrad<- lessHSGrad$Income

t.test(I.aboveHSGrad, I.lesseHSGrad, var.eq=TRUE)
## 
##  Two Sample t-test
## 
## data:  I.aboveHSGrad and I.lesseHSGrad
## t = 1.9642, df = 48, p-value = 0.05531
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   -7.838476 671.518476
## sample estimates:
## mean of x mean of y 
##   4601.72   4269.88
The p-value associated with a t-value of 1.9642 equals 0.05531 which is >α. Therefore, null cannot be rejected at α = .05. We fail to reject the null hypothesis that there is no difference in Income between states above median HSGrad and states less than or equal to median HSGrad.
b. there is no difference in Murder between one group of states (to include Alabama, Alaska, Arkansas, Georgia, Illinois, Kentucky, Louisiana, Mississippi, and Michigan) and another group of states (Arizona, Connecticut, Iowa, Kansas, Maine, Minnesota, Nebraska, New Hampshire, North Dakota).
Group1 <- c("Alabama", "Alaska", "Arkansas", "Georgia", "Illinois", "Kentucky", "Louisiana","Mississippi", "Michigan")
Group1f<-filter (Stateindic, stateNames %in% Group1)
Group1f
## Source: local data frame [9 x 7]
## 
##    stateNames Population Income Illiteracy LifeExp Murder HSGrad
##        (fctr)      (int)  (int)      (dbl)   (dbl)  (dbl)  (dbl)
## 1     Alabama       3615   3624        2.1   69.05   15.1   41.3
## 2      Alaska        365   6315        1.5   69.31   11.3   66.7
## 3    Arkansas       2110   3378        1.9   70.66   10.1   39.9
## 4     Georgia       4931   4091        2.0   68.54   13.9   40.6
## 5    Illinois      11197   5107        0.9   70.14   10.3   52.6
## 6    Kentucky       3387   3712        1.6   70.10   10.6   38.5
## 7   Louisiana       3806   3545        2.8   68.76   13.2   42.2
## 8    Michigan       9111   4751        0.9   70.63   11.1   52.8
## 9 Mississippi       2341   3098        2.4   68.09   12.5   41.0
Group2 <- c ("Arizona", "Connecticut", "Iowa", "Kansas", "Maine", "Minnesota", "Nebraska", "New Hampshire", "North Dakota")
Group2f<-filter (Stateindic, stateNames %in% Group2)
Group2f
## Source: local data frame [9 x 7]
## 
##      stateNames Population Income Illiteracy LifeExp Murder HSGrad
##          (fctr)      (int)  (int)      (dbl)   (dbl)  (dbl)  (dbl)
## 1       Arizona       2212   4530        1.8   70.55    7.8   58.1
## 2   Connecticut       3100   5348        1.1   72.48    3.1   56.0
## 3          Iowa       2861   4628        0.5   72.56    2.3   59.0
## 4        Kansas       2280   4669        0.6   72.58    4.5   59.9
## 5         Maine       1058   3694        0.7   70.39    2.7   54.7
## 6     Minnesota       3921   4675        0.6   72.96    2.3   57.6
## 7      Nebraska       1544   4508        0.6   72.60    2.9   59.3
## 8 New Hampshire        812   4281        0.7   71.23    3.3   57.6
## 9  North Dakota        637   5087        0.8   72.78    1.4   50.3
m.Group1 <-Group1f$Murder
m.Group1
## [1] 15.1 11.3 10.1 13.9 10.3 10.6 13.2 11.1 12.5
m.Group2 <-Group2f$Murder
m.Group2
## [1] 7.8 3.1 2.3 4.5 2.7 2.3 2.9 3.3 1.4
t.test(m.Group1, m.Group2, var.eq=TRUE)
## 
##  Two Sample t-test
## 
## data:  m.Group1 and m.Group2
## t = 10.124, df = 16, p-value = 2.312e-08
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   6.834422 10.454467
## sample estimates:
## mean of x mean of y 
## 12.011111  3.366667
The p-value associated with a t-value of 10.124 equals 2.312e-08 which is <α. Therefore, null hyphotheses is rejected at α = .05. We reject the null hypothesis that there is no difference in Murder between one group of states (to include Alabama, Alaska, Arkansas, Georgia, Illinois, Kentucky, Louisiana, Mississippi, and Michigan) and another group of states (Arizona, Connecticut, Iowa, Kansas, Maine, Minnesota, Nebraska, New Hampshire, North Dakota).