Places CDC
library(readr)
places_23 <- read_csv("PLACES__Local_Data_for_Better_Health__Census_Tract_Data_2023_release_20240321.csv")
## Rows: 2555113 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (16): StateAbbr, StateDesc, CountyName, CountyFIPS, LocationName, DataSo...
## dbl (5): Year, Data_Value, Low_Confidence_Limit, High_Confidence_Limit, Tot...
## lgl (2): Data_Value_Footnote_Symbol, Data_Value_Footnote
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
places_22 <- read_csv("PLACES__Local_Data_for_Better_Health__Census_Tract_Data_2022_release_20240325.csv")
## Rows: 2161543 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (16): StateAbbr, StateDesc, CountyName, CountyFIPS, LocationName, DataSo...
## dbl (5): Year, Data_Value, Low_Confidence_Limit, High_Confidence_Limit, Tot...
## lgl (2): Data_Value_Footnote_Symbol, Data_Value_Footnote
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
places_21 <- read_csv("PLACES__Local_Data_for_Better_Health__Census_Tract_Data_2021_release_20240325.csv")
## Rows: 2125561 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (16): StateAbbr, StateDesc, CountyName, CountyFIPS, LocationName, DataSo...
## dbl (5): Year, Data_Value, Low_Confidence_Limit, High_Confidence_Limit, Tot...
## lgl (2): Data_Value_Footnote_Symbol, Data_Value_Footnote
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
places_20 <- read_csv("PLACES__Local_Data_for_Better_Health__Census_Tract_Data_2020_release_20240325.csv")
## Rows: 2024865 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (16): StateAbbr, StateDesc, CountyName, CountyFIPS, LocationName, DataSo...
## dbl (5): Year, Data_Value, Low_Confidence_Limit, High_Confidence_Limit, Tot...
## lgl (2): Data_Value_Footnote_Symbol, Data_Value_Footnote
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(places_23)
## # A tibble: 6 × 23
## Year StateAbbr StateDesc CountyName CountyFIPS LocationName DataSource
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2021 AL Alabama Baldwin 01003 01003011300 BRFSS
## 2 2021 AL Alabama Chambers 01017 01017954000 BRFSS
## 3 2021 AL Alabama Cleburne 01029 01029959600 BRFSS
## 4 2021 AL Alabama Covington 01039 01039962000 BRFSS
## 5 2021 AL Alabama Fayette 01057 01057020400 BRFSS
## 6 2021 AL Alabama Houston 01069 01069040301 BRFSS
## # ℹ 16 more variables: Category <chr>, Measure <chr>, Data_Value_Unit <chr>,
## # Data_Value_Type <chr>, Data_Value <dbl>, Data_Value_Footnote_Symbol <lgl>,
## # Data_Value_Footnote <lgl>, Low_Confidence_Limit <dbl>,
## # High_Confidence_Limit <dbl>, TotalPopulation <dbl>, Geolocation <chr>,
## # LocationID <chr>, CategoryID <chr>, MeasureId <chr>, DataValueTypeID <chr>,
## # Short_Question_Text <chr>
head(places_22)
## # A tibble: 6 × 23
## Year StateAbbr StateDesc CountyName CountyFIPS LocationName DataSource
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2020 AL Alabama Barbour 01005 01005950800 BRFSS
## 2 2020 AL Alabama Bibb 01007 01007010002 BRFSS
## 3 2020 AL Alabama Calhoun 01015 01015000900 BRFSS
## 4 2020 AL Alabama Calhoun 01015 01015002400 BRFSS
## 5 2020 AL Alabama Cherokee 01019 01019955900 BRFSS
## 6 2020 AL Alabama Chilton 01021 01021060401 BRFSS
## # ℹ 16 more variables: Category <chr>, Measure <chr>, Data_Value_Unit <chr>,
## # Data_Value_Type <chr>, Data_Value <dbl>, Data_Value_Footnote_Symbol <lgl>,
## # Data_Value_Footnote <lgl>, Low_Confidence_Limit <dbl>,
## # High_Confidence_Limit <dbl>, TotalPopulation <dbl>, Geolocation <chr>,
## # LocationID <chr>, CategoryID <chr>, MeasureId <chr>, DataValueTypeID <chr>,
## # Short_Question_Text <chr>
head(places_21)
## # A tibble: 6 × 23
## Year StateAbbr StateDesc CountyName CountyFIPS LocationName DataSource
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2018 AL Alabama Baldwin 01003 01003011601 BRFSS
## 2 2019 ME Maine Waldo 23027 23027045000 BRFSS
## 3 2019 AL Alabama Calhoun 01015 01015002300 BRFSS
## 4 2019 LA Louisiana Tangipahoa 22105 22105953500 BRFSS
## 5 2019 ME Maine Cumberland 23005 23005014000 BRFSS
## 6 2018 LA Louisiana St. Mary 22101 22101041100 BRFSS
## # ℹ 16 more variables: Category <chr>, Measure <chr>, Data_Value_Unit <chr>,
## # Data_Value_Type <chr>, Data_Value <dbl>, Data_Value_Footnote_Symbol <lgl>,
## # Data_Value_Footnote <lgl>, Low_Confidence_Limit <dbl>,
## # High_Confidence_Limit <dbl>, TotalPopulation <dbl>, Geolocation <chr>,
## # LocationID <chr>, CategoryID <chr>, MeasureId <chr>, DataValueTypeID <chr>,
## # Short_Question_Text <chr>
head(places_20)
## # A tibble: 6 × 23
## Year StateAbbr StateDesc CountyName CountyFIPS LocationName DataSource
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2018 AL Alabama Autauga 01001 01001020100 BRFSS
## 2 2018 AL Alabama Autauga 01001 01001020100 BRFSS
## 3 2018 AL Alabama Autauga 01001 01001020100 BRFSS
## 4 2017 AL Alabama Autauga 01001 01001020100 BRFSS
## 5 2017 AL Alabama Autauga 01001 01001020100 BRFSS
## 6 2018 AL Alabama Autauga 01001 01001020100 BRFSS
## # ℹ 16 more variables: Category <chr>, Measure <chr>, Data_Value_Unit <chr>,
## # Data_Value_Type <chr>, Data_Value <dbl>, Data_Value_Footnote_Symbol <lgl>,
## # Data_Value_Footnote <lgl>, Low_Confidence_Limit <dbl>,
## # High_Confidence_Limit <dbl>, TotalPopulation <dbl>, Geolocation <chr>,
## # LocationID <chr>, CategoryID <chr>, MeasureId <chr>, DataValueTypeID <chr>,
## # Short_Question_Text <chr>
table(places_23$Year)
##
## 2020 2021
## 578125 1976988
table(places_22$Year)
##
## 2019 2020
## 281352 1880191
table(places_21$Year)
##
## 2018 2019
## 578125 1547436
table(places_20$Year)
##
## 2017 2018
## 289348 1735517
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(purrr)
data_21 <- places_23 %>%
filter(Year == "2021")
data_20 <- places_22 %>%
filter(Year == "2020")
data_19 <- places_21 %>%
filter(Year == "2019")
data_18 <- places_20 %>%
filter(Year == "2018")
data_17 <- places_20 %>%
filter(Year == "2017")
###### 2017 data ##### : Hypertension, High Cholesterol
table(data_17$Short_Question_Text)
##
## Cholesterol Screening High Blood Pressure High Cholesterol
## 72337 72337 72337
## Taking BP Medication
## 72337
table(data_17$MeasureId)
##
## BPHIGH BPMED CHOLSCREEN HIGHCHOL
## 72337 72337 72337 72337
#### 1. Create the hypertension data frame
hypertension_data_17 <- data_17 %>%
filter(Short_Question_Text == "High Blood Pressure") %>%
rename(hypertension = Data_Value)
#### 2. Create the High Cholesterol data frame
high_cholesterol_data_17 <- data_17 %>%
filter(Short_Question_Text == "High Cholesterol") %>%
rename(high_cholesterol = Data_Value) %>%
select(high_cholesterol, LocationID)### rename the variable and Keep the two variable, which are required variable and the merge variable
#### 3. Merge data
data_list_17 <- list(hypertension_data_17, high_cholesterol_data_17) ### generate the new data list for merging
data_2017 <- reduce(data_list_17, left_join, by = "LocationID") ### merge data by the LocationID variable
data_2017 <- data_2017 %>%
select(Year, StateAbbr, StateDesc, CountyName, CountyFIPS, LocationID, hypertension, high_cholesterol, Data_Value_Unit, TotalPopulation, Short_Question_Text)
###### 2018 data ##### : Cancer, Diabetes, Obesity
#### 1. Check Data
table(data_18$Short_Question_Text)
##
## Annual Checkup
## 72337
## Arthritis
## 72337
## Binge Drinking
## 72337
## Cancer (except skin)
## 72337
## Cervical Cancer Screening
## 72320
## Chronic Kidney Disease
## 72337
## Colorectal Cancer Screening
## 72305
## COPD
## 72337
## Core preventive services for older men
## 72193
## Core preventive services for older women
## 72142
## Coronary Heart Disease
## 72337
## Current Asthma
## 72337
## Current Smoking
## 72337
## Dental Visit
## 72337
## Diabetes
## 72337
## Health Insurance
## 72337
## Mammography
## 72253
## Mental Health
## 72337
## Obesity
## 72337
## Physical Health
## 72337
## Physical Inactivity
## 72337
## Sleep <7 hours
## 72337
## Stroke
## 72337
## Teeth Loss
## 72238
table(data_18$MeasureId)
##
## ACCESS2 ARTHRITIS BINGE CANCER CASTHMA CERVICAL
## 72337 72337 72337 72337 72337 72320
## CHD CHECKUP COLON_SCREEN COPD COREM COREW
## 72337 72337 72305 72337 72193 72142
## CSMOKING DENTAL DIABETES KIDNEY LPA MAMMOUSE
## 72337 72337 72337 72337 72337 72253
## MHLTH OBESITY PHLTH SLEEP STROKE TEETHLOST
## 72337 72337 72337 72337 72337 72238
#### 2. Generate new data
cancer_data_18 <- data_18 %>%
filter(Short_Question_Text == "Cancer (except skin)") %>%
rename(cancer = Data_Value)
create_data_frame <- function(data, question_text, new_var_name) {
data %>%
filter(Short_Question_Text == question_text) %>%
rename(!!new_var_name := Data_Value) %>%
select(!!new_var_name, LocationID)
}
diabetes_data_18 <- create_data_frame(data_18, "Diabetes", "diabetes")
obesity_data_18 <- create_data_frame(data_18, "Obesity", "obesity")
#### 3. Merge data
data_list_18 <- list(cancer_data_18, diabetes_data_18, obesity_data_18) ### generate the new data list for merging
data_2018 <- reduce(data_list_18, left_join, by = "LocationID") ### merge data by the LocationID variable
data_2018 <- data_2018 %>%
select(Year, StateAbbr, StateDesc, CountyName, CountyFIPS, LocationID, cancer, diabetes, obesity, Data_Value_Unit, TotalPopulation, Short_Question_Text)
###### 2019 data ##### : Cancer, Diabetes, Hypertension, High Cholesterol, Obesity
#### 1. Check Data
table(data_19$Short_Question_Text)
##
## Annual Checkup Arthritis Binge Drinking
## 70338 70338 70338
## Cancer (except skin) Cholesterol Screening Chronic Kidney Disease
## 70338 70338 70338
## COPD Coronary Heart Disease Current Asthma
## 70338 70338 70338
## Current Smoking Depression Diabetes
## 70338 70338 70338
## General Health Health Insurance High Blood Pressure
## 70338 70338 70338
## High Cholesterol Mental Health Obesity
## 70338 70338 70338
## Physical Health Physical Inactivity Stroke
## 70338 70338 70338
## Taking BP Medication
## 70338
table(data_19$MeasureId)
##
## ACCESS2 ARTHRITIS BINGE BPHIGH BPMED CANCER CASTHMA
## 70338 70338 70338 70338 70338 70338 70338
## CHD CHECKUP CHOLSCREEN COPD CSMOKING DEPRESSION DIABETES
## 70338 70338 70338 70338 70338 70338 70338
## GHLTH HIGHCHOL KIDNEY LPA MHLTH OBESITY PHLTH
## 70338 70338 70338 70338 70338 70338 70338
## STROKE
## 70338
#### 2. Generate new data
cancer_data_19 <- data_19 %>%
filter(Short_Question_Text == "Cancer (except skin)") %>%
rename(cancer = Data_Value)
create_data_frame <- function(data, question_text, new_var_name) {
data %>%
filter(Short_Question_Text == question_text) %>%
rename(!!new_var_name := Data_Value) %>%
select(!!new_var_name, LocationID)
}
diabetes_data_19 <- create_data_frame(data_19, "Diabetes", "diabetes")
hypertension_data_19 <- create_data_frame(data_19, "High Blood Pressure", "hypertension")
high_cholesterol_data_19 <- create_data_frame(data_19, "High Cholesterol", "high_cholesterol")
obesity_data_19 <- create_data_frame(data_19, "Obesity", "obesity")
#### 3. Merge data
data_list_19 <- list(cancer_data_19, diabetes_data_19, hypertension_data_19, high_cholesterol_data_19, obesity_data_19) ### generate the new data list for merging
data_2019 <- reduce(data_list_19, left_join, by = "LocationID") ### merge data by the LocationID variable
data_2019 <- data_2019 %>%
select(Year, StateAbbr, StateDesc, CountyName, CountyFIPS, LocationID, cancer, diabetes, hypertension, high_cholesterol, obesity, Data_Value_Unit, TotalPopulation, Short_Question_Text)
###### 2020 data ##### : Cancer, Diabetes, Obesity
#### 1. Check Data
table(data_20$Short_Question_Text)
##
## All Teeth Lost
## 72238
## Annual Checkup
## 72337
## Arthritis
## 72337
## Binge Drinking
## 72337
## Cancer (except skin)
## 72337
## Cervical Cancer Screening
## 72320
## Chronic Kidney Disease
## 72337
## Colorectal Cancer Screening
## 72305
## COPD
## 72337
## Core preventive services for older men
## 72193
## Core preventive services for older women
## 72142
## Coronary Heart Disease
## 72337
## Current Asthma
## 72337
## Current Smoking
## 72337
## Dental Visit
## 72337
## Depression
## 72337
## Diabetes
## 72337
## General Health
## 72337
## Health Insurance
## 72337
## Mammography
## 72253
## Mental Health
## 72337
## Obesity
## 72337
## Physical Health
## 72337
## Physical Inactivity
## 72337
## Sleep <7 hours
## 72337
## Stroke
## 72337
table(data_20$MeasureId)
##
## ACCESS2 ARTHRITIS BINGE CANCER CASTHMA CERVICAL
## 72337 72337 72337 72337 72337 72320
## CHD CHECKUP COLON_SCREEN COPD COREM COREW
## 72337 72337 72305 72337 72193 72142
## CSMOKING DENTAL DEPRESSION DIABETES GHLTH KIDNEY
## 72337 72337 72337 72337 72337 72337
## LPA MAMMOUSE MHLTH OBESITY PHLTH SLEEP
## 72337 72253 72337 72337 72337 72337
## STROKE TEETHLOST
## 72337 72238
#### 2. Generate new data
cancer_data_20 <- data_20 %>%
filter(Short_Question_Text == "Cancer (except skin)") %>%
rename(cancer = Data_Value)
create_data_frame <- function(data, question_text, new_var_name) {
data %>%
filter(Short_Question_Text == question_text) %>%
rename(!!new_var_name := Data_Value) %>%
select(!!new_var_name, LocationID)
}
diabetes_data_20 <- create_data_frame(data_20, "Diabetes", "diabetes")
obesity_data_20 <- create_data_frame(data_20, "Obesity", "obesity")
#### 3. Merge data
data_list_20 <- list(cancer_data_20, diabetes_data_20, obesity_data_20) ### generate the new data list for merging
data_2020 <- reduce(data_list_20, left_join, by = "LocationID") ### merge data by the LocationID variable
data_2020 <- data_2020 %>%
select(Year, StateAbbr, StateDesc, CountyName, CountyFIPS, LocationID, cancer, diabetes, obesity, Data_Value_Unit, TotalPopulation, Short_Question_Text)
###### 2021 data ##### : Cancer, Diabetes, Hypertension, High Cholesterol, Obesity
#### 1. Check Data
table(data_21$Short_Question_Text)
##
## Annual Checkup Any Disability
## 68172 68172
## Arthritis Binge Drinking
## 68172 68172
## Cancer (except skin) Cholesterol Screening
## 68172 68172
## Chronic Kidney Disease Cognitive Disability
## 68172 68172
## COPD Coronary Heart Disease
## 68172 68172
## Current Asthma Current Smoking
## 68172 68172
## Depression Diabetes
## 68172 68172
## General Health Health Insurance
## 68172 68172
## Hearing Disability High Blood Pressure
## 68172 68172
## High Cholesterol Independent Living Disability
## 68172 68172
## Mental Health Mobility Disability
## 68172 68172
## Obesity Physical Health
## 68172 68172
## Physical Inactivity Self-care Disability
## 68172 68172
## Stroke Taking BP Medication
## 68172 68172
## Vision Disability
## 68172
table(data_21$MeasureId)
##
## ACCESS2 ARTHRITIS BINGE BPHIGH BPMED CANCER CASTHMA
## 68172 68172 68172 68172 68172 68172 68172
## CHD CHECKUP CHOLSCREEN COGNITION COPD CSMOKING DEPRESSION
## 68172 68172 68172 68172 68172 68172 68172
## DIABETES DISABILITY GHLTH HEARING HIGHCHOL INDEPLIVE KIDNEY
## 68172 68172 68172 68172 68172 68172 68172
## LPA MHLTH MOBILITY OBESITY PHLTH SELFCARE STROKE
## 68172 68172 68172 68172 68172 68172 68172
## VISION
## 68172
#### 2. Generate new data
cancer_data_21 <- data_21 %>%
filter(Short_Question_Text == "Cancer (except skin)") %>%
rename(cancer = Data_Value)
create_data_frame <- function(data, question_text, new_var_name) {
data %>%
filter(Short_Question_Text == question_text) %>%
rename(!!new_var_name := Data_Value) %>%
select(!!new_var_name, LocationID)
}
diabetes_data_21 <- create_data_frame(data_21, "Diabetes", "diabetes")
hypertension_data_21 <- create_data_frame(data_21, "High Blood Pressure", "hypertension")
high_cholesterol_data_21 <- create_data_frame(data_21, "High Cholesterol", "high_cholesterol")
obesity_data_21 <- create_data_frame(data_21, "Obesity", "obesity")
#### 3. Merge data
data_list_21 <- list(cancer_data_21, diabetes_data_21, hypertension_data_21, high_cholesterol_data_21, obesity_data_21) ### generate the new data list for merging
data_2021 <- reduce(data_list_21, left_join, by = "LocationID") ### merge data by the LocationID variable
data_2021 <- data_2021 %>%
select(Year, StateAbbr, StateDesc, CountyName, CountyFIPS, LocationID, cancer, diabetes, hypertension, high_cholesterol, obesity, Data_Value_Unit, TotalPopulation, Short_Question_Text)
merged_data <- bind_rows(data_2017, data_2018, data_2019, data_2020, data_2021)
summary(merged_data)
## Year StateAbbr StateDesc CountyName
## Min. :2017 Length:355521 Length:355521 Length:355521
## 1st Qu.:2018 Class :character Class :character Class :character
## Median :2019 Mode :character Mode :character Mode :character
## Mean :2019
## 3rd Qu.:2020
## Max. :2021
##
## CountyFIPS LocationID hypertension high_cholesterol
## Length:355521 Length:355521 Min. : 4.80 Min. : 6.20
## Class :character Class :character 1st Qu.:27.50 1st Qu.:30.50
## Mode :character Mode :character Median :31.80 Median :33.50
## Mean :32.31 Mean :33.16
## 3rd Qu.:36.60 3rd Qu.:36.30
## Max. :73.30 Max. :57.30
## NA's :144674 NA's :144674
## Data_Value_Unit TotalPopulation Short_Question_Text cancer
## Length:355521 Min. : 56 Length:355521 Min. : 0.50
## Class :character 1st Qu.: 2908 Class :character 1st Qu.: 5.20
## Mode :character Median : 4015 Mode :character Median : 6.50
## Mean : 4265 Mean : 6.47
## 3rd Qu.: 5330 3rd Qu.: 7.60
## Max. :37452 Max. :20.90
## NA's :72337
## diabetes obesity
## Min. : 0.60 Min. :10.70
## 1st Qu.: 8.50 1st Qu.:28.10
## Median :10.40 Median :33.00
## Mean :10.98 Mean :33.01
## 3rd Qu.:12.80 3rd Qu.:37.60
## Max. :46.10 Max. :63.90
## NA's :72337 NA's :72337
write.csv(merged_data, "data.csv", row.names = FALSE)