Research Question: Do Smoking and Drinking Habits Affect the

Amount of Unhealthy Days a Person has and Does this Trend

Stay Constant in Different States?

The States that are going to be observed in this study are New York, Minnesota and Texas in the year of 2016.

Importing CSV Data into R

The readr package allowed for the csv file to be imported into R from Social Explorer.

The head function allowed for the first few variables to be displayed.

library(readr)
health <- read_csv("/Users/paulkim/Desktop/csvdata2.csv", col_names = TRUE)
## Parsed with column specification:
## cols(
##   Geo_FIPS = col_integer(),
##   Geo_NAME = col_character(),
##   Geo_QNAME = col_character(),
##   Geo_STATE = col_integer(),
##   Geo_COUNTY = col_integer(),
##   SE_T001_001 = col_double(),
##   SE_T001_002 = col_double(),
##   SE_T011_001 = col_double(),
##   SE_T011_002 = col_double(),
##   SE_T012_001 = col_double(),
##   SE_T012_002 = col_double(),
##   SE_T012_003 = col_double(),
##   SE_T012_004 = col_double(),
##   SE_T012_005 = col_double(),
##   SE_T013_001 = col_double()
## )
head(health)
## # A tibble: 6 x 15
##   Geo_FIPS Geo_NAME Geo_QNAME Geo_STATE Geo_COUNTY SE_T001_001 SE_T001_002
##      <int> <chr>    <chr>         <int>      <int>       <dbl>       <dbl>
## 1    27001 Aitkin … Aitkin C…        27          1        3.10        2.90
## 2    27003 Anoka C… Anoka Co…        27          3        2.70        2.80
## 3    27005 Becker … Becker C…        27          5        2.90        2.90
## 4    27007 Beltram… Beltrami…        27          7        3.50        3.30
## 5    27009 Benton … Benton C…        27          9        2.80        2.90
## 6    27011 Big Sto… Big Ston…        27         11        2.80        2.80
## # ... with 8 more variables: SE_T011_001 <dbl>, SE_T011_002 <dbl>,
## #   SE_T012_001 <dbl>, SE_T012_002 <dbl>, SE_T012_003 <dbl>,
## #   SE_T012_004 <dbl>, SE_T012_005 <dbl>, SE_T013_001 <dbl>

Renaming, Selecting and Filtering the Data

The rename function was used to give the variables more suitable names.

For this study, only a few variables need to be observed. The select function was used to select the variables need for the observation.

In order to filter the data to look at the certain states for the observation, the filter function was used. In the code below, the filter function was used to filter Minnesota which had a state numebr of “27”.

The original data set had physically unhealthy days and mentally unhealthy days separated so the mutate function was used to create a new variable called total_unhealhty_days.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
health2 <- rename(health, 
                  county = Geo_COUNTY,
                  state = Geo_STATE,
                  physically_unhealthy_days_per_month = SE_T001_001,
                  mentally_unhealthy_days_per_month = SE_T001_002,
                  current_smoker = SE_T011_001,
                  drinking_adults = SE_T011_002,
                  persons_with_limited_access_to_healthy_foods = SE_T012_001,
                  persons_with_access_to_exercise_opportunities = SE_T012_002,
                  obese_persons = SE_T012_003,
                  physically_inactive_persons = SE_T012_004,
                  children_eligible_free_lunch = SE_T012_005,
                  food_environment_index = SE_T013_001)
 
health2 <- select(health2, state, physically_unhealthy_days_per_month, current_smoker, drinking_adults)

healthMinnesota <- health %>%
            rename(county = Geo_COUNTY,
                  state = Geo_STATE,
                  physically_unhealthy_days_per_month = SE_T001_001,
                  mentally_unhealthy_days_per_month = SE_T001_002,
                  current_smoker = SE_T011_001,
                  drinking_adults = SE_T011_002,
                  persons_with_limited_access_to_healthy_foods = SE_T012_001,
                  persons_with_access_to_exercise_opportunities = SE_T012_002,
                  obese_persons = SE_T012_003,
                  physically_inactive_persons = SE_T012_004,
                  children_eligible_free_lunch = SE_T012_005,
                  food_environment_index = SE_T013_001) %>%
            select(county,
                   state,
                   physically_unhealthy_days_per_month,
                   mentally_unhealthy_days_per_month,
                   current_smoker,
                   drinking_adults) %>%
            filter(state == "27") %>%
            mutate(total_unhealthy_days = physically_unhealthy_days_per_month + mentally_unhealthy_days_per_month)

The code below uses the package ggplot2 in order to display a scatterplot with all the variables selected in the data set.

This scatterplot shows the total amount of unhealthy days per month and the percentage of current smokers for Minnesota.

library(ggplot2)
ggplot(data = healthMinnesota) + 
  geom_point(aes(x = current_smoker, y = total_unhealthy_days))

This scatterplot shows the total amount of unhealthy days per month and the percentage of drinking adults for Minnesota.

library(ggplot2)
ggplot(data = healthMinnesota) + 
    geom_point(aes(x = drinking_adults, y = total_unhealthy_days))

healthNewYork <- health %>%
            rename(county = Geo_COUNTY,
                  state = Geo_STATE,
                  physically_unhealthy_days_per_month = SE_T001_001,
                  mentally_unhealthy_days_per_month = SE_T001_002,
                  current_smoker = SE_T011_001,
                  drinking_adults = SE_T011_002,
                  persons_with_limited_access_to_healthy_foods = SE_T012_001,
                  persons_with_access_to_exercise_opportunities = SE_T012_002,
                  obese_persons = SE_T012_003,
                  physically_inactive_persons = SE_T012_004,
                  children_eligible_free_lunch = SE_T012_005,
                  food_environment_index = SE_T013_001) %>%
            select(county,
                   state,
                   physically_unhealthy_days_per_month,
                   mentally_unhealthy_days_per_month,
                   current_smoker,
                   drinking_adults) %>%
            filter(state == "36") %>%
            mutate(total_unhealthy_days = physically_unhealthy_days_per_month + mentally_unhealthy_days_per_month)

This scatterplot shows the total amount of unhealthy days per month and the percentage of current smokers for New York.

library(ggplot2)
ggplot(data = healthNewYork) + 
  geom_point(aes(x = current_smoker, y = total_unhealthy_days))

This scatterplot shows the total amount of unhealthy days per month and the percentage of drinking adults for New York.

library(ggplot2)
ggplot(data = healthNewYork) + 
    geom_point(aes(x = drinking_adults, y = total_unhealthy_days))

healthTexas <- health %>%
            rename(county = Geo_COUNTY,
                  state = Geo_STATE,
                  physically_unhealthy_days_per_month = SE_T001_001,
                  mentally_unhealthy_days_per_month = SE_T001_002,
                  current_smoker = SE_T011_001,
                  drinking_adults = SE_T011_002,
                  persons_with_limited_access_to_healthy_foods = SE_T012_001,
                  persons_with_access_to_exercise_opportunities = SE_T012_002,
                  obese_persons = SE_T012_003,
                  physically_inactive_persons = SE_T012_004,
                  children_eligible_free_lunch = SE_T012_005,
                  food_environment_index = SE_T013_001) %>%
            select(county,
                   state,
                   physically_unhealthy_days_per_month,
                   mentally_unhealthy_days_per_month,
                   current_smoker,
                   drinking_adults) %>%
            filter(state == "48") %>%
            mutate(total_unhealthy_days = physically_unhealthy_days_per_month + mentally_unhealthy_days_per_month)

This scatterplot shows the total amount of unhealthy days per month and the percentage of current smokers for Texas.

library(ggplot2)
ggplot(data = healthTexas) + 
  geom_point(aes(x = current_smoker, y = total_unhealthy_days))

This scatterplot shows the total amount of unhealthy days per month and the percentage of drinking adults for Texas.

library(ggplot2)
ggplot(data = healthTexas) + 
    geom_point(aes(x = drinking_adults, y = total_unhealthy_days))

Findings

The findings show that per States, the amount of totatl unheathly days and current smoker have a positive trend line.

For the amount of total unhealthy days and percentage of drinking adults, there was a negative trend line.