getwd()
## [1] "C:/Users/Jerome/Documents/0000_Work_Files/0000_Coursera/Statistics_with_R_Specialization/Course_1_Probability_&_Data/Week5_Project"
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(statsr)
## Warning: package 'statsr' was built under R version 4.0.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble 3.0.4 v purrr 0.3.4
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## Warning: package 'tibble' was built under R version 4.0.3
## Warning: package 'tidyr' was built under R version 4.0.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
load("brfss2013.RData")
hims0 <- subset(brfss2013, brfss2013$X_state == "Hawaii" | brfss2013$X_state == "Mississippi")
write.csv(hims0, "hims0.csv", row.names = FALSE)
hims0 <- read.csv("hims0.csv", header = TRUE)
hims0 = filter(hims0, smoke100 != "NA")
write.csv(hims0, "hims0.csv", row.names = FALSE)
hims0 <-read.csv("hims0.csv", header = TRUE)
table(hims0$smoke100)
##
## No Yes
## 8574 6350
hims0 <- filter(hims0, usenow3 != "NA")
hims0 <- filter(hims0, usenow3 != "Some days")
table(hims0$usenow3)
##
## Every day Not at all
## 300 14341
write.csv(hims0, "hims0.csv", row.names = FALSE)
hims0 <-read.csv("hims0.csv", header = TRUE)
hims1 <- mutate(hims0, nicotine = ifelse(smoke100 == "Yes" | usenow3 == "Every day", 1,0))
write.csv(hims1, "hims1.csv", row.names = FALSE)
hims1 <-read.csv("hims1.csv", header = TRUE)
table(hims1$nicotine)
##
## 0 1
## 8311 6330
table(hims1$X_state, hims1$nicotine)
##
## 0 1
## Hawaii 4379 3219
## Mississippi 3932 3111
4379+3219
## [1] 7598
3932+3111
## [1] 7043
3219/7598
## [1] 0.4236641
3111/7043
## [1] 0.4417152
hims2 = mutate(hims1,nicotine1=ifelse(smoke100 == "Yes" | usenow3 == "Every day", "Yes","No"))
write.csv(hims2, "hims2.csv", row.names = FALSE)
hims2 <-read.csv("hims2.csv", header = TRUE)
ggplot(data=hims2, aes(x=X_state, fill = nicotine1)) + geom_bar()
### As can be seen from the plot, the 2 states are just about equal in terms of tobacco use, confirming the calculated percentages.
table(hims2$nicotine1, hims2$nicotine)
##
## 0 1
## No 8311 0
## Yes 0 6330
nicotine_use <- matrix(c(3219, 3111, 4379,3932),ncol = 2)
colnames (nicotine_use) <- c("Use", "Don't Use")
rownames (nicotine_use) <- c("Hawaii", "Mississippi")
nicotine_use
## Use Don't Use
## Hawaii 3219 4379
## Mississippi 3111 3932
result.prop <- prop.test(nicotine_use)
result.prop
##
## 2-sample test for equality of proportions with continuity correction
##
## data: nicotine_use
## X-squared = 4.7793, df = 1, p-value = 0.0288
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.034248843 -0.001853269
## sample estimates:
## prop 1 prop 2
## 0.4236641 0.4417152
hims65 <- filter(hims2, X_age65yr != "NA")
hims65 <- filter(hims65, X_age65yr != "Age 18 to 64")
table(hims65$flushot6)
##
## No Yes
## 1637 2957
table(hims65$X_age65yr, hims65$X_state)
##
## Hawaii Mississippi
## Age 65 or older 2203 2586
write.csv(hims65, file = "hims65.csv", row.names = FALSE)
hims65 <- read.csv("hims65.csv", header = TRUE )
table(hims65$flushot6)
##
## No Yes
## 1637 2957
table(hims65$pneuvac3)
##
## No Yes
## 1428 2913
hims65 <- mutate(hims65, vax = ifelse(flushot6 == "Yes" | pneuvac3 == "Yes", "Yes", "No"))
hims65 <- filter(hims65, vax != "NA")
table(hims65$X_state, hims65$vax)
##
## No Yes
## Hawaii 363 1696
## Mississippi 489 1958
write.csv(hims65, file = "hims65.csv", row.names = FALSE)
hims65 <- read.csv("hims65.csv", header = TRUE )
vaccinated <- matrix(c(1696, 1958, 363,489),ncol = 2)
colnames (vaccinated) <- c("Vaxed", "Not Vaxed")
rownames (vaccinated) <- c("Hawaii", "Mississippi")
vaccinated
## Vaxed Not Vaxed
## Hawaii 1696 363
## Mississippi 1958 489
result.prop <- prop.test(vaccinated)
result.prop
##
## 2-sample test for equality of proportions with continuity correction
##
## data: vaccinated
## X-squared = 3.888, df = 1, p-value = 0.04863
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.0002438377 0.0468308826
## sample estimates:
## prop 1 prop 2
## 0.8237008 0.8001635
ggplot(data=hims65, aes(x=X_state, fill = vax)) + geom_bar()
table(hims2$genhlth)
##
## Excellent Fair Good Poor Very good
## 2369 2141 4905 1056 4134
hims2 <- filter(hims2, genhlth != "NA")
write.csv(hims2, file = "hims2.csv", row.names = FALSE)
hims2 <- read.csv("hims2.csv", header = TRUE )
hims3 <- mutate(hims2, health_status = ifelse(genhlth == "Excellent" | genhlth == "Very good" | genhlth == "Good", "Good","Poor"))
table(hims3$X_state, hims3$health_status)
##
## Good Poor
## Hawaii 6449 1141
## Mississippi 4959 2056
healthy <- matrix(c(6449, 4959, 1141,2056),ncol = 2)
colnames (healthy) <- c("Good", "Poor")
rownames (healthy) <- c("Hawaii", "Mississippi")
healthy
## Good Poor
## Hawaii 6449 1141
## Mississippi 4959 2056
result.prop <- prop.test(healthy)
result.prop
##
## 2-sample test for equality of proportions with continuity correction
##
## data: healthy
## X-squared = 433.69, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.1292742 0.1562396
## sample estimates:
## prop 1 prop 2
## 0.8496706 0.7069138
ggplot(data=hims3, aes(x=X_state, fill = health_status)) + geom_bar()
write.csv(hims3, file = "hims3.csv", row.names = FALSE)
hims2 <- read.csv("hims3.csv", header = TRUE )