Importing CSV Data into R
The readr package allowed for the csv file to be imported into R from Social Explorer.
The head function allowed for the first few variables to be displayed.
library(readr)
health <- read_csv("/Users/paulkim/Desktop/csvdata2.csv", col_names = TRUE)
## Parsed with column specification:
## cols(
## Geo_FIPS = col_integer(),
## Geo_NAME = col_character(),
## Geo_QNAME = col_character(),
## Geo_STATE = col_integer(),
## Geo_COUNTY = col_integer(),
## SE_T001_001 = col_double(),
## SE_T001_002 = col_double(),
## SE_T011_001 = col_double(),
## SE_T011_002 = col_double(),
## SE_T012_001 = col_double(),
## SE_T012_002 = col_double(),
## SE_T012_003 = col_double(),
## SE_T012_004 = col_double(),
## SE_T012_005 = col_double(),
## SE_T013_001 = col_double()
## )
head(health)
## # A tibble: 6 x 15
## Geo_FIPS Geo_NAME Geo_QNAME Geo_STATE Geo_COUNTY SE_T001_001 SE_T001_002
## <int> <chr> <chr> <int> <int> <dbl> <dbl>
## 1 27001 Aitkin … Aitkin C… 27 1 3.10 2.90
## 2 27003 Anoka C… Anoka Co… 27 3 2.70 2.80
## 3 27005 Becker … Becker C… 27 5 2.90 2.90
## 4 27007 Beltram… Beltrami… 27 7 3.50 3.30
## 5 27009 Benton … Benton C… 27 9 2.80 2.90
## 6 27011 Big Sto… Big Ston… 27 11 2.80 2.80
## # ... with 8 more variables: SE_T011_001 <dbl>, SE_T011_002 <dbl>,
## # SE_T012_001 <dbl>, SE_T012_002 <dbl>, SE_T012_003 <dbl>,
## # SE_T012_004 <dbl>, SE_T012_005 <dbl>, SE_T013_001 <dbl>
Renaming, Selecting and Filtering the Data
The rename function was used to give the variables more suitable names.
For this study, only a few variables need to be observed. The select function was used to select the variables need for the observation.
In order to filter the data to look at the certain states for the observation, the filter function was used. In the code below, the filter function was used to filter Minnesota which had a state numebr of “27”.
The original data set had physically unhealthy days and mentally unhealthy days separated so the mutate function was used to create a new variable called total_unhealhty_days.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
health2 <- rename(health,
county = Geo_COUNTY,
state = Geo_STATE,
physically_unhealthy_days_per_month = SE_T001_001,
mentally_unhealthy_days_per_month = SE_T001_002,
current_smoker = SE_T011_001,
drinking_adults = SE_T011_002,
persons_with_limited_access_to_healthy_foods = SE_T012_001,
persons_with_access_to_exercise_opportunities = SE_T012_002,
obese_persons = SE_T012_003,
physically_inactive_persons = SE_T012_004,
children_eligible_free_lunch = SE_T012_005,
food_environment_index = SE_T013_001)
health2 <- select(health2, state, physically_unhealthy_days_per_month, current_smoker, drinking_adults)
healthMinnesota <- health %>%
rename(county = Geo_COUNTY,
state = Geo_STATE,
physically_unhealthy_days_per_month = SE_T001_001,
mentally_unhealthy_days_per_month = SE_T001_002,
current_smoker = SE_T011_001,
drinking_adults = SE_T011_002,
persons_with_limited_access_to_healthy_foods = SE_T012_001,
persons_with_access_to_exercise_opportunities = SE_T012_002,
obese_persons = SE_T012_003,
physically_inactive_persons = SE_T012_004,
children_eligible_free_lunch = SE_T012_005,
food_environment_index = SE_T013_001) %>%
select(county,
state,
physically_unhealthy_days_per_month,
mentally_unhealthy_days_per_month,
current_smoker,
drinking_adults) %>%
filter(state == "27") %>%
mutate(total_unhealthy_days = physically_unhealthy_days_per_month + mentally_unhealthy_days_per_month)
The code below uses the package ggplot2 in order to display a scatterplot with all the variables selected in the data set.
This scatterplot shows the total amount of unhealthy days per month and the percentage of current smokers for Minnesota.
library(ggplot2)
ggplot(data = healthMinnesota) +
geom_point(aes(x = current_smoker, y = total_unhealthy_days))

This scatterplot shows the total amount of unhealthy days per month and the percentage of drinking adults for Minnesota.
library(ggplot2)
ggplot(data = healthMinnesota) +
geom_point(aes(x = drinking_adults, y = total_unhealthy_days))

healthNewYork <- health %>%
rename(county = Geo_COUNTY,
state = Geo_STATE,
physically_unhealthy_days_per_month = SE_T001_001,
mentally_unhealthy_days_per_month = SE_T001_002,
current_smoker = SE_T011_001,
drinking_adults = SE_T011_002,
persons_with_limited_access_to_healthy_foods = SE_T012_001,
persons_with_access_to_exercise_opportunities = SE_T012_002,
obese_persons = SE_T012_003,
physically_inactive_persons = SE_T012_004,
children_eligible_free_lunch = SE_T012_005,
food_environment_index = SE_T013_001) %>%
select(county,
state,
physically_unhealthy_days_per_month,
mentally_unhealthy_days_per_month,
current_smoker,
drinking_adults) %>%
filter(state == "36") %>%
mutate(total_unhealthy_days = physically_unhealthy_days_per_month + mentally_unhealthy_days_per_month)
This scatterplot shows the total amount of unhealthy days per month and the percentage of current smokers for New York.
library(ggplot2)
ggplot(data = healthNewYork) +
geom_point(aes(x = current_smoker, y = total_unhealthy_days))

This scatterplot shows the total amount of unhealthy days per month and the percentage of drinking adults for New York.
library(ggplot2)
ggplot(data = healthNewYork) +
geom_point(aes(x = drinking_adults, y = total_unhealthy_days))

healthTexas <- health %>%
rename(county = Geo_COUNTY,
state = Geo_STATE,
physically_unhealthy_days_per_month = SE_T001_001,
mentally_unhealthy_days_per_month = SE_T001_002,
current_smoker = SE_T011_001,
drinking_adults = SE_T011_002,
persons_with_limited_access_to_healthy_foods = SE_T012_001,
persons_with_access_to_exercise_opportunities = SE_T012_002,
obese_persons = SE_T012_003,
physically_inactive_persons = SE_T012_004,
children_eligible_free_lunch = SE_T012_005,
food_environment_index = SE_T013_001) %>%
select(county,
state,
physically_unhealthy_days_per_month,
mentally_unhealthy_days_per_month,
current_smoker,
drinking_adults) %>%
filter(state == "48") %>%
mutate(total_unhealthy_days = physically_unhealthy_days_per_month + mentally_unhealthy_days_per_month)
This scatterplot shows the total amount of unhealthy days per month and the percentage of current smokers for Texas.
library(ggplot2)
ggplot(data = healthTexas) +
geom_point(aes(x = current_smoker, y = total_unhealthy_days))

This scatterplot shows the total amount of unhealthy days per month and the percentage of drinking adults for Texas.
library(ggplot2)
ggplot(data = healthTexas) +
geom_point(aes(x = drinking_adults, y = total_unhealthy_days))
