finaltask2.R

##Title: WFED 540 Final Task 2
##Author: Andrew Leigey
##Date: December 2, 2015
##Output: html_document

require(dplyr)

## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

require(ggvis)

## Loading required package: ggvis

require(magrittr)

## Loading required package: magrittr

require(knitr)

## Loading required package: knitr

require(rmarkdown)

## Loading required package: rmarkdown

require(corrplot)

## Loading required package: corrplot

stateindc<-read.csv(file ="Stateindicator.csv", header = TRUE, sep=",")

stateindc

##        stateNames Population Income Illiteracy LifeExp Murder HSGrad
## 1         Alabama       3615   3624        2.1   69.05   15.1   41.3
## 2          Alaska        365   6315        1.5   69.31   11.3   66.7
## 3         Arizona       2212   4530        1.8   70.55    7.8   58.1
## 4        Arkansas       2110   3378        1.9   70.66   10.1   39.9
## 5      California      21198   5114        1.1   71.71   10.3   62.6
## 6        Colorado       2541   4884        0.7   72.06    6.8   63.9
## 7     Connecticut       3100   5348        1.1   72.48    3.1   56.0
## 8        Delaware        579   4809        0.9   70.06    6.2   54.6
## 9         Florida       8277   4815        1.3   70.66   10.7   52.6
## 10        Georgia       4931   4091        2.0   68.54   13.9   40.6
## 11         Hawaii        868   4963        1.9   73.60    6.2   61.9
## 12          Idaho        813   4119        0.6   71.87    5.3   59.5
## 13       Illinois      11197   5107        0.9   70.14   10.3   52.6
## 14        Indiana       5313   4458        0.7   70.88    7.1   52.9
## 15           Iowa       2861   4628        0.5   72.56    2.3   59.0
## 16         Kansas       2280   4669        0.6   72.58    4.5   59.9
## 17       Kentucky       3387   3712        1.6   70.10   10.6   38.5
## 18      Louisiana       3806   3545        2.8   68.76   13.2   42.2
## 19          Maine       1058   3694        0.7   70.39    2.7   54.7
## 20       Maryland       4122   5299        0.9   70.22    8.5   52.3
## 21  Massachusetts       5814   4755        1.1   71.83    3.3   58.5
## 22       Michigan       9111   4751        0.9   70.63   11.1   52.8
## 23      Minnesota       3921   4675        0.6   72.96    2.3   57.6
## 24    Mississippi       2341   3098        2.4   68.09   12.5   41.0
## 25       Missouri       4767   4254        0.8   70.69    9.3   48.8
## 26        Montana        746   4347        0.6   70.56    5.0   59.2
## 27       Nebraska       1544   4508        0.6   72.60    2.9   59.3
## 28         Nevada        590   5149        0.5   69.03   11.5   65.2
## 29  New Hampshire        812   4281        0.7   71.23    3.3   57.6
## 30     New Jersey       7333   5237        1.1   70.93    5.2   52.5
## 31     New Mexico       1144   3601        2.2   70.32    9.7   55.2
## 32       New York      18076   4903        1.4   70.55   10.9   52.7
## 33 North Carolina       5441   3875        1.8   69.21   11.1   38.5
## 34   North Dakota        637   5087        0.8   72.78    1.4   50.3
## 35           Ohio      10735   4561        0.8   70.82    7.4   53.2
## 36       Oklahoma       2715   3983        1.1   71.42    6.4   51.6
## 37         Oregon       2284   4660        0.6   72.13    4.2   60.0
## 38   Pennsylvania      11860   4449        1.0   70.43    6.1   50.2
## 39   Rhode Island        931   4558        1.3   71.90    2.4   46.4
## 40 South Carolina       2816   3635        2.3   67.96   11.6   37.8
## 41   South Dakota        681   4167        0.5   72.08    1.7   53.3
## 42      Tennessee       4173   3821        1.7   70.11   11.0   41.8
## 43          Texas      12237   4188        2.2   70.90   12.2   47.4
## 44           Utah       1203   4022        0.6   72.90    4.5   67.3
## 45        Vermont        472   3907        0.6   71.64    5.5   57.1
## 46       Virginia       4981   4701        1.4   70.08    9.5   47.8
## 47     Washington       3559   4864        0.6   71.72    4.3   63.5
## 48  West Virginia       1799   3617        1.4   69.48    6.7   41.6
## 49      Wisconsin       4589   4468        0.7   72.48    3.0   54.5
## 50        Wyoming        376   4566        0.6   70.29    6.9   62.9

summarise(stateindc)

## data frame with 0 columns and 0 rows

#1. Correlogram

#Correlations: When I computed the six variables from the State Indicators file I found 
#that there was a medium positive correlation between Income and Life Expectancy 
#of 0.34. A strong positive correlation between High School Grad and Life Expectancy
#of 0.58. A strong positive correlation between High School Grad and Income of 0.62.
#A small positive correlation between Population and Income of 0.21. A strong negative
#correlation between Illiteracy and Life Expectancy of -0.59. A medium negative 
#correlation between Illiteracy and Income of -0.44. A strong negative correlation 
#between Illiteracy and High School Grad of -0.66. A strong negative correlation
#between Murder and Life Expectancy of -0.78. A small negative correlation between
#Murder and Income of -0.23. A medium negative correlation between Murder and High 
#School Grad of -0.49. A medium positive correlation between Murder and Population
#of 0.34, and a strong positive correlation between Murder and Iliteracy of 0.7. Which
#I found interesting, usually the higher the Illiteracy rate the higher the Murder and
#non-negligent manslaughter rate was for the state.


#Here I am selecting the six variables for the Correlogram.
stateindc6var<-stateindc%>% select(Population, Income, Illiteracy, LifeExp, Murder, HSGrad)

#Here I am checking the six variables I selected to make sure it was successful.
stateindc6var

##    Population Income Illiteracy LifeExp Murder HSGrad
## 1        3615   3624        2.1   69.05   15.1   41.3
## 2         365   6315        1.5   69.31   11.3   66.7
## 3        2212   4530        1.8   70.55    7.8   58.1
## 4        2110   3378        1.9   70.66   10.1   39.9
## 5       21198   5114        1.1   71.71   10.3   62.6
## 6        2541   4884        0.7   72.06    6.8   63.9
## 7        3100   5348        1.1   72.48    3.1   56.0
## 8         579   4809        0.9   70.06    6.2   54.6
## 9        8277   4815        1.3   70.66   10.7   52.6
## 10       4931   4091        2.0   68.54   13.9   40.6
## 11        868   4963        1.9   73.60    6.2   61.9
## 12        813   4119        0.6   71.87    5.3   59.5
## 13      11197   5107        0.9   70.14   10.3   52.6
## 14       5313   4458        0.7   70.88    7.1   52.9
## 15       2861   4628        0.5   72.56    2.3   59.0
## 16       2280   4669        0.6   72.58    4.5   59.9
## 17       3387   3712        1.6   70.10   10.6   38.5
## 18       3806   3545        2.8   68.76   13.2   42.2
## 19       1058   3694        0.7   70.39    2.7   54.7
## 20       4122   5299        0.9   70.22    8.5   52.3
## 21       5814   4755        1.1   71.83    3.3   58.5
## 22       9111   4751        0.9   70.63   11.1   52.8
## 23       3921   4675        0.6   72.96    2.3   57.6
## 24       2341   3098        2.4   68.09   12.5   41.0
## 25       4767   4254        0.8   70.69    9.3   48.8
## 26        746   4347        0.6   70.56    5.0   59.2
## 27       1544   4508        0.6   72.60    2.9   59.3
## 28        590   5149        0.5   69.03   11.5   65.2
## 29        812   4281        0.7   71.23    3.3   57.6
## 30       7333   5237        1.1   70.93    5.2   52.5
## 31       1144   3601        2.2   70.32    9.7   55.2
## 32      18076   4903        1.4   70.55   10.9   52.7
## 33       5441   3875        1.8   69.21   11.1   38.5
## 34        637   5087        0.8   72.78    1.4   50.3
## 35      10735   4561        0.8   70.82    7.4   53.2
## 36       2715   3983        1.1   71.42    6.4   51.6
## 37       2284   4660        0.6   72.13    4.2   60.0
## 38      11860   4449        1.0   70.43    6.1   50.2
## 39        931   4558        1.3   71.90    2.4   46.4
## 40       2816   3635        2.3   67.96   11.6   37.8
## 41        681   4167        0.5   72.08    1.7   53.3
## 42       4173   3821        1.7   70.11   11.0   41.8
## 43      12237   4188        2.2   70.90   12.2   47.4
## 44       1203   4022        0.6   72.90    4.5   67.3
## 45        472   3907        0.6   71.64    5.5   57.1
## 46       4981   4701        1.4   70.08    9.5   47.8
## 47       3559   4864        0.6   71.72    4.3   63.5
## 48       1799   3617        1.4   69.48    6.7   41.6
## 49       4589   4468        0.7   72.48    3.0   54.5
## 50        376   4566        0.6   70.29    6.9   62.9

#Next, I download cormat from the web.
source("http://www.sthda.com/upload/rquery_cormat.r")

#And here is my Correlogram along with a heat map as a Christmas bonus for you.
rquery.cormat(stateindc6var)

## $r
##            LifeExp Income HSGrad Population Illiteracy Murder
## LifeExp          1                                           
## Income        0.34      1                                    
## HSGrad        0.58   0.62      1                             
## Population  -0.068   0.21 -0.098          1                  
## Illiteracy   -0.59  -0.44  -0.66       0.11          1       
## Murder       -0.78  -0.23  -0.49       0.34        0.7      1
## 
## $p
##            LifeExp  Income  HSGrad Population Illiteracy Murder
## LifeExp          0                                             
## Income       0.016       0                                     
## HSGrad     9.2e-06 1.6e-06       0                             
## Population    0.64    0.15     0.5          0                  
## Illiteracy   7e-06  0.0015 2.2e-07       0.46          0       
## Murder     2.3e-11    0.11 0.00032      0.015    1.3e-08      0
## 
## $sym
##            LifeExp Income HSGrad Population Illiteracy Murder
## LifeExp    1                                                 
## Income     .       1                                         
## HSGrad     .       ,      1                                  
## Population                       1                           
## Illiteracy .       .      ,                 1                
## Murder     ,              .      .          ,          1     
## attr(,"legend")
## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1

#heatmap
cormat<-rquery.cormat(stateindc6var, graphType="heatmap")

#Task 2. Using ggvis, construct a.) plots that demonstrate the relationship between
# i.) HSGrad and Income

ggvis(stateindc6var, x = ~HSGrad, y = ~Income)%>% layer_points()%>% layer_smooths()

# ii.) Illiteracy and Income

ggvis(stateindc6var, x = ~Illiteracy, y = ~Income)%>% layer_points()%>% layer_smooths()

# b.) A scatterplot of Murder by Illiteracy grouped by HSGrad

stateindc6var %>% 
  ggvis(~Murder, ~Illiteracy, fill = ~factor(HSGrad)) %>% 
  layer_points() %>% 
  group_by(HSGrad) %>% 
  layer_model_predictions(model = "lm")

## Guessing formula = Illiteracy ~ Murder

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading

# 3. Report tests of null hypotheses that: a. there is no difference in 
# Income between states above median HSGrad and states less than or equal 
# to median HSGrad

# Here I am finding what the Median HSGrad is.

median(stateindc6var$HSGrad)

## [1] 53.25

# Here I am changing the HSGrad variable to code it as 1 if it is above the median and 
# to 0 if equal to or below the median.

stateindc6var$HSGrad<-ifelse(stateindc6var$HSGrad>=53.26, 1, 0) 

stateindc6var

##    Population Income Illiteracy LifeExp Murder HSGrad
## 1        3615   3624        2.1   69.05   15.1      0
## 2         365   6315        1.5   69.31   11.3      1
## 3        2212   4530        1.8   70.55    7.8      1
## 4        2110   3378        1.9   70.66   10.1      0
## 5       21198   5114        1.1   71.71   10.3      1
## 6        2541   4884        0.7   72.06    6.8      1
## 7        3100   5348        1.1   72.48    3.1      1
## 8         579   4809        0.9   70.06    6.2      1
## 9        8277   4815        1.3   70.66   10.7      0
## 10       4931   4091        2.0   68.54   13.9      0
## 11        868   4963        1.9   73.60    6.2      1
## 12        813   4119        0.6   71.87    5.3      1
## 13      11197   5107        0.9   70.14   10.3      0
## 14       5313   4458        0.7   70.88    7.1      0
## 15       2861   4628        0.5   72.56    2.3      1
## 16       2280   4669        0.6   72.58    4.5      1
## 17       3387   3712        1.6   70.10   10.6      0
## 18       3806   3545        2.8   68.76   13.2      0
## 19       1058   3694        0.7   70.39    2.7      1
## 20       4122   5299        0.9   70.22    8.5      0
## 21       5814   4755        1.1   71.83    3.3      1
## 22       9111   4751        0.9   70.63   11.1      0
## 23       3921   4675        0.6   72.96    2.3      1
## 24       2341   3098        2.4   68.09   12.5      0
## 25       4767   4254        0.8   70.69    9.3      0
## 26        746   4347        0.6   70.56    5.0      1
## 27       1544   4508        0.6   72.60    2.9      1
## 28        590   5149        0.5   69.03   11.5      1
## 29        812   4281        0.7   71.23    3.3      1
## 30       7333   5237        1.1   70.93    5.2      0
## 31       1144   3601        2.2   70.32    9.7      1
## 32      18076   4903        1.4   70.55   10.9      0
## 33       5441   3875        1.8   69.21   11.1      0
## 34        637   5087        0.8   72.78    1.4      0
## 35      10735   4561        0.8   70.82    7.4      0
## 36       2715   3983        1.1   71.42    6.4      0
## 37       2284   4660        0.6   72.13    4.2      1
## 38      11860   4449        1.0   70.43    6.1      0
## 39        931   4558        1.3   71.90    2.4      0
## 40       2816   3635        2.3   67.96   11.6      0
## 41        681   4167        0.5   72.08    1.7      1
## 42       4173   3821        1.7   70.11   11.0      0
## 43      12237   4188        2.2   70.90   12.2      0
## 44       1203   4022        0.6   72.90    4.5      1
## 45        472   3907        0.6   71.64    5.5      1
## 46       4981   4701        1.4   70.08    9.5      0
## 47       3559   4864        0.6   71.72    4.3      1
## 48       1799   3617        1.4   69.48    6.7      0
## 49       4589   4468        0.7   72.48    3.0      1
## 50        376   4566        0.6   70.29    6.9      1

t.test(Income~HSGrad,stateindc6var, var.equal=TRUE)

## 
##  Two Sample t-test
## 
## data:  Income by HSGrad
## t = -1.9642, df = 48, p-value = 0.05531
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -671.518476    7.838476
## sample estimates:
## mean in group 0 mean in group 1 
##         4269.88         4601.72

# Here I fail to reject the null hypothesis that there is no difference in 
# Income between states above median HSGrad and states less than or equal 
# to median HSGrad because the p-value was greater than .05 level.
# t=-1.9642, df=48, p=0.05531, 95% CI [-671.518476, 7.838476]




# b. there is no difference in Murder between one group of states (to include Alabama, 
# Alaska, Arkansas, Georgia, Illinois, Kentucky, Louisiana, Mississippi, and Michigan) 
# and another group of states (Arizona, Connecticut, Iowa, Kansas, Maine, Minnesota, 
# Nebraska, New Hampshire, North Dakota).


stateindc2var<-stateindc%>% select(stateNames, Murder)

stateindc2var

##        stateNames Murder
## 1         Alabama   15.1
## 2          Alaska   11.3
## 3         Arizona    7.8
## 4        Arkansas   10.1
## 5      California   10.3
## 6        Colorado    6.8
## 7     Connecticut    3.1
## 8        Delaware    6.2
## 9         Florida   10.7
## 10        Georgia   13.9
## 11         Hawaii    6.2
## 12          Idaho    5.3
## 13       Illinois   10.3
## 14        Indiana    7.1
## 15           Iowa    2.3
## 16         Kansas    4.5
## 17       Kentucky   10.6
## 18      Louisiana   13.2
## 19          Maine    2.7
## 20       Maryland    8.5
## 21  Massachusetts    3.3
## 22       Michigan   11.1
## 23      Minnesota    2.3
## 24    Mississippi   12.5
## 25       Missouri    9.3
## 26        Montana    5.0
## 27       Nebraska    2.9
## 28         Nevada   11.5
## 29  New Hampshire    3.3
## 30     New Jersey    5.2
## 31     New Mexico    9.7
## 32       New York   10.9
## 33 North Carolina   11.1
## 34   North Dakota    1.4
## 35           Ohio    7.4
## 36       Oklahoma    6.4
## 37         Oregon    4.2
## 38   Pennsylvania    6.1
## 39   Rhode Island    2.4
## 40 South Carolina   11.6
## 41   South Dakota    1.7
## 42      Tennessee   11.0
## 43          Texas   12.2
## 44           Utah    4.5
## 45        Vermont    5.5
## 46       Virginia    9.5
## 47     Washington    4.3
## 48  West Virginia    6.7
## 49      Wisconsin    3.0
## 50        Wyoming    6.9

stategrp1<-filter(stateindc2var, stateNames == "Alabama" | stateNames == "Alaska" | stateNames == "Arkansas" | stateNames == "Georgia" | stateNames == "Illinois" | stateNames == "Kentucky" | stateNames == "Louisiana" | stateNames == "Michigan" | stateNames == "Mississippi")

stategrp1

##    stateNames Murder
## 1     Alabama   15.1
## 2      Alaska   11.3
## 3    Arkansas   10.1
## 4     Georgia   13.9
## 5    Illinois   10.3
## 6    Kentucky   10.6
## 7   Louisiana   13.2
## 8    Michigan   11.1
## 9 Mississippi   12.5

stategrp1["groupnum"]<- 0

stategrp1

##    stateNames Murder groupnum
## 1     Alabama   15.1        0
## 2      Alaska   11.3        0
## 3    Arkansas   10.1        0
## 4     Georgia   13.9        0
## 5    Illinois   10.3        0
## 6    Kentucky   10.6        0
## 7   Louisiana   13.2        0
## 8    Michigan   11.1        0
## 9 Mississippi   12.5        0

stategrp2<-filter(stateindc2var, stateNames == "Arizona" | stateNames == "Connecticut" | stateNames == "Iowa" | stateNames == "Kansas" | stateNames == "Maine" | stateNames == "Minnesota" | stateNames == "Nebraska" | stateNames == "New Hampshire" | stateNames == "North Dakota")

stategrp2

##      stateNames Murder
## 1       Arizona    7.8
## 2   Connecticut    3.1
## 3          Iowa    2.3
## 4        Kansas    4.5
## 5         Maine    2.7
## 6     Minnesota    2.3
## 7      Nebraska    2.9
## 8 New Hampshire    3.3
## 9  North Dakota    1.4

stategrp2["groupnum"]<- 1

stategrp2

##      stateNames Murder groupnum
## 1       Arizona    7.8        1
## 2   Connecticut    3.1        1
## 3          Iowa    2.3        1
## 4        Kansas    4.5        1
## 5         Maine    2.7        1
## 6     Minnesota    2.3        1
## 7      Nebraska    2.9        1
## 8 New Hampshire    3.3        1
## 9  North Dakota    1.4        1

stategrp3<-rbind(stategrp1, stategrp2)

stategrp3

##       stateNames Murder groupnum
## 1        Alabama   15.1        0
## 2         Alaska   11.3        0
## 3       Arkansas   10.1        0
## 4        Georgia   13.9        0
## 5       Illinois   10.3        0
## 6       Kentucky   10.6        0
## 7      Louisiana   13.2        0
## 8       Michigan   11.1        0
## 9    Mississippi   12.5        0
## 10       Arizona    7.8        1
## 11   Connecticut    3.1        1
## 12          Iowa    2.3        1
## 13        Kansas    4.5        1
## 14         Maine    2.7        1
## 15     Minnesota    2.3        1
## 16      Nebraska    2.9        1
## 17 New Hampshire    3.3        1
## 18  North Dakota    1.4        1

t.test(Murder~groupnum,stategrp3, var.equal=TRUE)

## 
##  Two Sample t-test
## 
## data:  Murder by groupnum
## t = 10.124, df = 16, p-value = 2.312e-08
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   6.834422 10.454467
## sample estimates:
## mean in group 0 mean in group 1 
##       12.011111        3.366667

# Here I reject the null hypothesis that there is no difference in Murder between 
# one group of states (to include Alabama, Alaska, Arkansas, Georgia, Illinois,
# Kentucky, Louisiana, Mississippi, and Michigan) and another group of states 
# (Arizona, Connecticut, Iowa, Kansas, Maine, Minnesota, Nebraska, New Hampshire, 
# North Dakota) because the p-value was less than .05 level.
# t=10.124, df=16, p=2.312e-08, 95% CI [6.834422, 10.454467]

```

finaltask2.R

aleigey

Wed Dec 9 17:03:35 2015