##Title: WFED 540 Final Task 2
##Author: Andrew Leigey
##Date: December 2, 2015
##Output: html_document
require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
require(ggvis)
## Loading required package: ggvis
require(magrittr)
## Loading required package: magrittr
require(knitr)
## Loading required package: knitr
require(rmarkdown)
## Loading required package: rmarkdown
require(corrplot)
## Loading required package: corrplot
stateindc<-read.csv(file ="Stateindicator.csv", header = TRUE, sep=",")
stateindc
## stateNames Population Income Illiteracy LifeExp Murder HSGrad
## 1 Alabama 3615 3624 2.1 69.05 15.1 41.3
## 2 Alaska 365 6315 1.5 69.31 11.3 66.7
## 3 Arizona 2212 4530 1.8 70.55 7.8 58.1
## 4 Arkansas 2110 3378 1.9 70.66 10.1 39.9
## 5 California 21198 5114 1.1 71.71 10.3 62.6
## 6 Colorado 2541 4884 0.7 72.06 6.8 63.9
## 7 Connecticut 3100 5348 1.1 72.48 3.1 56.0
## 8 Delaware 579 4809 0.9 70.06 6.2 54.6
## 9 Florida 8277 4815 1.3 70.66 10.7 52.6
## 10 Georgia 4931 4091 2.0 68.54 13.9 40.6
## 11 Hawaii 868 4963 1.9 73.60 6.2 61.9
## 12 Idaho 813 4119 0.6 71.87 5.3 59.5
## 13 Illinois 11197 5107 0.9 70.14 10.3 52.6
## 14 Indiana 5313 4458 0.7 70.88 7.1 52.9
## 15 Iowa 2861 4628 0.5 72.56 2.3 59.0
## 16 Kansas 2280 4669 0.6 72.58 4.5 59.9
## 17 Kentucky 3387 3712 1.6 70.10 10.6 38.5
## 18 Louisiana 3806 3545 2.8 68.76 13.2 42.2
## 19 Maine 1058 3694 0.7 70.39 2.7 54.7
## 20 Maryland 4122 5299 0.9 70.22 8.5 52.3
## 21 Massachusetts 5814 4755 1.1 71.83 3.3 58.5
## 22 Michigan 9111 4751 0.9 70.63 11.1 52.8
## 23 Minnesota 3921 4675 0.6 72.96 2.3 57.6
## 24 Mississippi 2341 3098 2.4 68.09 12.5 41.0
## 25 Missouri 4767 4254 0.8 70.69 9.3 48.8
## 26 Montana 746 4347 0.6 70.56 5.0 59.2
## 27 Nebraska 1544 4508 0.6 72.60 2.9 59.3
## 28 Nevada 590 5149 0.5 69.03 11.5 65.2
## 29 New Hampshire 812 4281 0.7 71.23 3.3 57.6
## 30 New Jersey 7333 5237 1.1 70.93 5.2 52.5
## 31 New Mexico 1144 3601 2.2 70.32 9.7 55.2
## 32 New York 18076 4903 1.4 70.55 10.9 52.7
## 33 North Carolina 5441 3875 1.8 69.21 11.1 38.5
## 34 North Dakota 637 5087 0.8 72.78 1.4 50.3
## 35 Ohio 10735 4561 0.8 70.82 7.4 53.2
## 36 Oklahoma 2715 3983 1.1 71.42 6.4 51.6
## 37 Oregon 2284 4660 0.6 72.13 4.2 60.0
## 38 Pennsylvania 11860 4449 1.0 70.43 6.1 50.2
## 39 Rhode Island 931 4558 1.3 71.90 2.4 46.4
## 40 South Carolina 2816 3635 2.3 67.96 11.6 37.8
## 41 South Dakota 681 4167 0.5 72.08 1.7 53.3
## 42 Tennessee 4173 3821 1.7 70.11 11.0 41.8
## 43 Texas 12237 4188 2.2 70.90 12.2 47.4
## 44 Utah 1203 4022 0.6 72.90 4.5 67.3
## 45 Vermont 472 3907 0.6 71.64 5.5 57.1
## 46 Virginia 4981 4701 1.4 70.08 9.5 47.8
## 47 Washington 3559 4864 0.6 71.72 4.3 63.5
## 48 West Virginia 1799 3617 1.4 69.48 6.7 41.6
## 49 Wisconsin 4589 4468 0.7 72.48 3.0 54.5
## 50 Wyoming 376 4566 0.6 70.29 6.9 62.9
summarise(stateindc)
## data frame with 0 columns and 0 rows
#1. Correlogram
#Correlations: When I computed the six variables from the State Indicators file I found
#that there was a medium positive correlation between Income and Life Expectancy
#of 0.34. A strong positive correlation between High School Grad and Life Expectancy
#of 0.58. A strong positive correlation between High School Grad and Income of 0.62.
#A small positive correlation between Population and Income of 0.21. A strong negative
#correlation between Illiteracy and Life Expectancy of -0.59. A medium negative
#correlation between Illiteracy and Income of -0.44. A strong negative correlation
#between Illiteracy and High School Grad of -0.66. A strong negative correlation
#between Murder and Life Expectancy of -0.78. A small negative correlation between
#Murder and Income of -0.23. A medium negative correlation between Murder and High
#School Grad of -0.49. A medium positive correlation between Murder and Population
#of 0.34, and a strong positive correlation between Murder and Iliteracy of 0.7. Which
#I found interesting, usually the higher the Illiteracy rate the higher the Murder and
#non-negligent manslaughter rate was for the state.
#Here I am selecting the six variables for the Correlogram.
stateindc6var<-stateindc%>% select(Population, Income, Illiteracy, LifeExp, Murder, HSGrad)
#Here I am checking the six variables I selected to make sure it was successful.
stateindc6var
## Population Income Illiteracy LifeExp Murder HSGrad
## 1 3615 3624 2.1 69.05 15.1 41.3
## 2 365 6315 1.5 69.31 11.3 66.7
## 3 2212 4530 1.8 70.55 7.8 58.1
## 4 2110 3378 1.9 70.66 10.1 39.9
## 5 21198 5114 1.1 71.71 10.3 62.6
## 6 2541 4884 0.7 72.06 6.8 63.9
## 7 3100 5348 1.1 72.48 3.1 56.0
## 8 579 4809 0.9 70.06 6.2 54.6
## 9 8277 4815 1.3 70.66 10.7 52.6
## 10 4931 4091 2.0 68.54 13.9 40.6
## 11 868 4963 1.9 73.60 6.2 61.9
## 12 813 4119 0.6 71.87 5.3 59.5
## 13 11197 5107 0.9 70.14 10.3 52.6
## 14 5313 4458 0.7 70.88 7.1 52.9
## 15 2861 4628 0.5 72.56 2.3 59.0
## 16 2280 4669 0.6 72.58 4.5 59.9
## 17 3387 3712 1.6 70.10 10.6 38.5
## 18 3806 3545 2.8 68.76 13.2 42.2
## 19 1058 3694 0.7 70.39 2.7 54.7
## 20 4122 5299 0.9 70.22 8.5 52.3
## 21 5814 4755 1.1 71.83 3.3 58.5
## 22 9111 4751 0.9 70.63 11.1 52.8
## 23 3921 4675 0.6 72.96 2.3 57.6
## 24 2341 3098 2.4 68.09 12.5 41.0
## 25 4767 4254 0.8 70.69 9.3 48.8
## 26 746 4347 0.6 70.56 5.0 59.2
## 27 1544 4508 0.6 72.60 2.9 59.3
## 28 590 5149 0.5 69.03 11.5 65.2
## 29 812 4281 0.7 71.23 3.3 57.6
## 30 7333 5237 1.1 70.93 5.2 52.5
## 31 1144 3601 2.2 70.32 9.7 55.2
## 32 18076 4903 1.4 70.55 10.9 52.7
## 33 5441 3875 1.8 69.21 11.1 38.5
## 34 637 5087 0.8 72.78 1.4 50.3
## 35 10735 4561 0.8 70.82 7.4 53.2
## 36 2715 3983 1.1 71.42 6.4 51.6
## 37 2284 4660 0.6 72.13 4.2 60.0
## 38 11860 4449 1.0 70.43 6.1 50.2
## 39 931 4558 1.3 71.90 2.4 46.4
## 40 2816 3635 2.3 67.96 11.6 37.8
## 41 681 4167 0.5 72.08 1.7 53.3
## 42 4173 3821 1.7 70.11 11.0 41.8
## 43 12237 4188 2.2 70.90 12.2 47.4
## 44 1203 4022 0.6 72.90 4.5 67.3
## 45 472 3907 0.6 71.64 5.5 57.1
## 46 4981 4701 1.4 70.08 9.5 47.8
## 47 3559 4864 0.6 71.72 4.3 63.5
## 48 1799 3617 1.4 69.48 6.7 41.6
## 49 4589 4468 0.7 72.48 3.0 54.5
## 50 376 4566 0.6 70.29 6.9 62.9
#Next, I download cormat from the web.
source("http://www.sthda.com/upload/rquery_cormat.r")
#And here is my Correlogram along with a heat map as a Christmas bonus for you.
rquery.cormat(stateindc6var)
## $r
## LifeExp Income HSGrad Population Illiteracy Murder
## LifeExp 1
## Income 0.34 1
## HSGrad 0.58 0.62 1
## Population -0.068 0.21 -0.098 1
## Illiteracy -0.59 -0.44 -0.66 0.11 1
## Murder -0.78 -0.23 -0.49 0.34 0.7 1
##
## $p
## LifeExp Income HSGrad Population Illiteracy Murder
## LifeExp 0
## Income 0.016 0
## HSGrad 9.2e-06 1.6e-06 0
## Population 0.64 0.15 0.5 0
## Illiteracy 7e-06 0.0015 2.2e-07 0.46 0
## Murder 2.3e-11 0.11 0.00032 0.015 1.3e-08 0
##
## $sym
## LifeExp Income HSGrad Population Illiteracy Murder
## LifeExp 1
## Income . 1
## HSGrad . , 1
## Population 1
## Illiteracy . . , 1
## Murder , . . , 1
## attr(,"legend")
## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1
#heatmap
cormat<-rquery.cormat(stateindc6var, graphType="heatmap")
#Task 2. Using ggvis, construct a.) plots that demonstrate the relationship between
# i.) HSGrad and Income
ggvis(stateindc6var, x = ~HSGrad, y = ~Income)%>% layer_points()%>% layer_smooths()
# ii.) Illiteracy and Income
ggvis(stateindc6var, x = ~Illiteracy, y = ~Income)%>% layer_points()%>% layer_smooths()
# b.) A scatterplot of Murder by Illiteracy grouped by HSGrad
stateindc6var %>%
ggvis(~Murder, ~Illiteracy, fill = ~factor(HSGrad)) %>%
layer_points() %>%
group_by(HSGrad) %>%
layer_model_predictions(model = "lm")
## Guessing formula = Illiteracy ~ Murder
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(model, newdata = grid, se = se, level = level,
## interval = if (se) "confidence" else "none"): prediction from a rank-
## deficient fit may be misleading
# 3. Report tests of null hypotheses that: a. there is no difference in
# Income between states above median HSGrad and states less than or equal
# to median HSGrad
# Here I am finding what the Median HSGrad is.
median(stateindc6var$HSGrad)
## [1] 53.25
# Here I am changing the HSGrad variable to code it as 1 if it is above the median and
# to 0 if equal to or below the median.
stateindc6var$HSGrad<-ifelse(stateindc6var$HSGrad>=53.26, 1, 0)
stateindc6var
## Population Income Illiteracy LifeExp Murder HSGrad
## 1 3615 3624 2.1 69.05 15.1 0
## 2 365 6315 1.5 69.31 11.3 1
## 3 2212 4530 1.8 70.55 7.8 1
## 4 2110 3378 1.9 70.66 10.1 0
## 5 21198 5114 1.1 71.71 10.3 1
## 6 2541 4884 0.7 72.06 6.8 1
## 7 3100 5348 1.1 72.48 3.1 1
## 8 579 4809 0.9 70.06 6.2 1
## 9 8277 4815 1.3 70.66 10.7 0
## 10 4931 4091 2.0 68.54 13.9 0
## 11 868 4963 1.9 73.60 6.2 1
## 12 813 4119 0.6 71.87 5.3 1
## 13 11197 5107 0.9 70.14 10.3 0
## 14 5313 4458 0.7 70.88 7.1 0
## 15 2861 4628 0.5 72.56 2.3 1
## 16 2280 4669 0.6 72.58 4.5 1
## 17 3387 3712 1.6 70.10 10.6 0
## 18 3806 3545 2.8 68.76 13.2 0
## 19 1058 3694 0.7 70.39 2.7 1
## 20 4122 5299 0.9 70.22 8.5 0
## 21 5814 4755 1.1 71.83 3.3 1
## 22 9111 4751 0.9 70.63 11.1 0
## 23 3921 4675 0.6 72.96 2.3 1
## 24 2341 3098 2.4 68.09 12.5 0
## 25 4767 4254 0.8 70.69 9.3 0
## 26 746 4347 0.6 70.56 5.0 1
## 27 1544 4508 0.6 72.60 2.9 1
## 28 590 5149 0.5 69.03 11.5 1
## 29 812 4281 0.7 71.23 3.3 1
## 30 7333 5237 1.1 70.93 5.2 0
## 31 1144 3601 2.2 70.32 9.7 1
## 32 18076 4903 1.4 70.55 10.9 0
## 33 5441 3875 1.8 69.21 11.1 0
## 34 637 5087 0.8 72.78 1.4 0
## 35 10735 4561 0.8 70.82 7.4 0
## 36 2715 3983 1.1 71.42 6.4 0
## 37 2284 4660 0.6 72.13 4.2 1
## 38 11860 4449 1.0 70.43 6.1 0
## 39 931 4558 1.3 71.90 2.4 0
## 40 2816 3635 2.3 67.96 11.6 0
## 41 681 4167 0.5 72.08 1.7 1
## 42 4173 3821 1.7 70.11 11.0 0
## 43 12237 4188 2.2 70.90 12.2 0
## 44 1203 4022 0.6 72.90 4.5 1
## 45 472 3907 0.6 71.64 5.5 1
## 46 4981 4701 1.4 70.08 9.5 0
## 47 3559 4864 0.6 71.72 4.3 1
## 48 1799 3617 1.4 69.48 6.7 0
## 49 4589 4468 0.7 72.48 3.0 1
## 50 376 4566 0.6 70.29 6.9 1
t.test(Income~HSGrad,stateindc6var, var.equal=TRUE)
##
## Two Sample t-test
##
## data: Income by HSGrad
## t = -1.9642, df = 48, p-value = 0.05531
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -671.518476 7.838476
## sample estimates:
## mean in group 0 mean in group 1
## 4269.88 4601.72
# Here I fail to reject the null hypothesis that there is no difference in
# Income between states above median HSGrad and states less than or equal
# to median HSGrad because the p-value was greater than .05 level.
# t=-1.9642, df=48, p=0.05531, 95% CI [-671.518476, 7.838476]
# b. there is no difference in Murder between one group of states (to include Alabama,
# Alaska, Arkansas, Georgia, Illinois, Kentucky, Louisiana, Mississippi, and Michigan)
# and another group of states (Arizona, Connecticut, Iowa, Kansas, Maine, Minnesota,
# Nebraska, New Hampshire, North Dakota).
stateindc2var<-stateindc%>% select(stateNames, Murder)
stateindc2var
## stateNames Murder
## 1 Alabama 15.1
## 2 Alaska 11.3
## 3 Arizona 7.8
## 4 Arkansas 10.1
## 5 California 10.3
## 6 Colorado 6.8
## 7 Connecticut 3.1
## 8 Delaware 6.2
## 9 Florida 10.7
## 10 Georgia 13.9
## 11 Hawaii 6.2
## 12 Idaho 5.3
## 13 Illinois 10.3
## 14 Indiana 7.1
## 15 Iowa 2.3
## 16 Kansas 4.5
## 17 Kentucky 10.6
## 18 Louisiana 13.2
## 19 Maine 2.7
## 20 Maryland 8.5
## 21 Massachusetts 3.3
## 22 Michigan 11.1
## 23 Minnesota 2.3
## 24 Mississippi 12.5
## 25 Missouri 9.3
## 26 Montana 5.0
## 27 Nebraska 2.9
## 28 Nevada 11.5
## 29 New Hampshire 3.3
## 30 New Jersey 5.2
## 31 New Mexico 9.7
## 32 New York 10.9
## 33 North Carolina 11.1
## 34 North Dakota 1.4
## 35 Ohio 7.4
## 36 Oklahoma 6.4
## 37 Oregon 4.2
## 38 Pennsylvania 6.1
## 39 Rhode Island 2.4
## 40 South Carolina 11.6
## 41 South Dakota 1.7
## 42 Tennessee 11.0
## 43 Texas 12.2
## 44 Utah 4.5
## 45 Vermont 5.5
## 46 Virginia 9.5
## 47 Washington 4.3
## 48 West Virginia 6.7
## 49 Wisconsin 3.0
## 50 Wyoming 6.9
stategrp1<-filter(stateindc2var, stateNames == "Alabama" | stateNames == "Alaska" | stateNames == "Arkansas" | stateNames == "Georgia" | stateNames == "Illinois" | stateNames == "Kentucky" | stateNames == "Louisiana" | stateNames == "Michigan" | stateNames == "Mississippi")
stategrp1
## stateNames Murder
## 1 Alabama 15.1
## 2 Alaska 11.3
## 3 Arkansas 10.1
## 4 Georgia 13.9
## 5 Illinois 10.3
## 6 Kentucky 10.6
## 7 Louisiana 13.2
## 8 Michigan 11.1
## 9 Mississippi 12.5
stategrp1["groupnum"]<- 0
stategrp1
## stateNames Murder groupnum
## 1 Alabama 15.1 0
## 2 Alaska 11.3 0
## 3 Arkansas 10.1 0
## 4 Georgia 13.9 0
## 5 Illinois 10.3 0
## 6 Kentucky 10.6 0
## 7 Louisiana 13.2 0
## 8 Michigan 11.1 0
## 9 Mississippi 12.5 0
stategrp2<-filter(stateindc2var, stateNames == "Arizona" | stateNames == "Connecticut" | stateNames == "Iowa" | stateNames == "Kansas" | stateNames == "Maine" | stateNames == "Minnesota" | stateNames == "Nebraska" | stateNames == "New Hampshire" | stateNames == "North Dakota")
stategrp2
## stateNames Murder
## 1 Arizona 7.8
## 2 Connecticut 3.1
## 3 Iowa 2.3
## 4 Kansas 4.5
## 5 Maine 2.7
## 6 Minnesota 2.3
## 7 Nebraska 2.9
## 8 New Hampshire 3.3
## 9 North Dakota 1.4
stategrp2["groupnum"]<- 1
stategrp2
## stateNames Murder groupnum
## 1 Arizona 7.8 1
## 2 Connecticut 3.1 1
## 3 Iowa 2.3 1
## 4 Kansas 4.5 1
## 5 Maine 2.7 1
## 6 Minnesota 2.3 1
## 7 Nebraska 2.9 1
## 8 New Hampshire 3.3 1
## 9 North Dakota 1.4 1
stategrp3<-rbind(stategrp1, stategrp2)
stategrp3
## stateNames Murder groupnum
## 1 Alabama 15.1 0
## 2 Alaska 11.3 0
## 3 Arkansas 10.1 0
## 4 Georgia 13.9 0
## 5 Illinois 10.3 0
## 6 Kentucky 10.6 0
## 7 Louisiana 13.2 0
## 8 Michigan 11.1 0
## 9 Mississippi 12.5 0
## 10 Arizona 7.8 1
## 11 Connecticut 3.1 1
## 12 Iowa 2.3 1
## 13 Kansas 4.5 1
## 14 Maine 2.7 1
## 15 Minnesota 2.3 1
## 16 Nebraska 2.9 1
## 17 New Hampshire 3.3 1
## 18 North Dakota 1.4 1
t.test(Murder~groupnum,stategrp3, var.equal=TRUE)
##
## Two Sample t-test
##
## data: Murder by groupnum
## t = 10.124, df = 16, p-value = 2.312e-08
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 6.834422 10.454467
## sample estimates:
## mean in group 0 mean in group 1
## 12.011111 3.366667
# Here I reject the null hypothesis that there is no difference in Murder between
# one group of states (to include Alabama, Alaska, Arkansas, Georgia, Illinois,
# Kentucky, Louisiana, Mississippi, and Michigan) and another group of states
# (Arizona, Connecticut, Iowa, Kansas, Maine, Minnesota, Nebraska, New Hampshire,
# North Dakota) because the p-value was less than .05 level.
# t=10.124, df=16, p=2.312e-08, 95% CI [6.834422, 10.454467]
```