library(tidyverse)
library(tidyr)
library(knitr)
library(leaflet)
library(sf)
library(readr)
setwd("C:/Users/ava/Downloads/data110")
cities500 <- read_csv("500CitiesLocalHealthIndicators.cdc.csv")Healthy Cities GIS Assignment
libraries
The GeoLocation variable has (lat, long) format
Split GeoLocation (lat, long) into two columns: lat and long
latlong <- cities500|>
mutate(GeoLocation= str_replace_all(GeoLocation, "[()]", ""))|>
separate(GeoLocation, into = c("lat", "long"), sep = ",", convert = TRUE)Filter the dataset
Remove the StateDesc that includes the United Sates, select Prevention as the category (of interest), filter for only measuring crude prevalence and select only 2017.
latlong_clean <- latlong |>
filter(StateDesc != "United States") |>
filter(Category == "Prevention") |>
filter(Data_Value_Type == "Crude prevalence") |>
filter(Year == 2017)
head(latlong_clean)# A tibble: 6 × 25
Year StateAbbr StateDesc CityName GeographicLevel DataSource Category
<dbl> <chr> <chr> <chr> <chr> <chr> <chr>
1 2017 AL Alabama Montgomery City BRFSS Prevention
2 2017 CA California Concord City BRFSS Prevention
3 2017 CA California Concord City BRFSS Prevention
4 2017 CA California Fontana City BRFSS Prevention
5 2017 CA California Richmond Census Tract BRFSS Prevention
6 2017 FL Florida Davie Census Tract BRFSS Prevention
# ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
# DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
# Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
# Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
# PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
# MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
names(latlong_clean) [1] "Year" "StateAbbr"
[3] "StateDesc" "CityName"
[5] "GeographicLevel" "DataSource"
[7] "Category" "UniqueID"
[9] "Measure" "Data_Value_Unit"
[11] "DataValueTypeID" "Data_Value_Type"
[13] "Data_Value" "Low_Confidence_Limit"
[15] "High_Confidence_Limit" "Data_Value_Footnote_Symbol"
[17] "Data_Value_Footnote" "PopulationCount"
[19] "lat" "long"
[21] "CategoryID" "MeasureId"
[23] "CityFIPS" "TractFIPS"
[25] "Short_Question_Text"
Remove the variables that will not be used in the assignment
prevention <- latlong_clean |>
select(-DataSource,-Data_Value_Unit, -DataValueTypeID, -Low_Confidence_Limit, -High_Confidence_Limit, -Data_Value_Footnote_Symbol, -Data_Value_Footnote)
#md <- prevention |>
##filter(StateAbbr=="MD")
#head(md)The new dataset
I have filtered the dataset for the state of Texas and selected three different cities in this state to take a look at their population count and how it relates to their answers to variable survey questions.
unique(latlong_clean$StateAbbr) [1] "AL" "CA" "FL" "CT" "IL" "MN" "NY" "PA" "NC" "OH" "OK" "OR" "TX" "RI" "SC"
[16] "SD" "TN" "UT" "VA" "WA" "AK" "WI" "AZ" "AR" "CO" "DE" "NV" "DC" "GA" "ID"
[31] "HI" "MA" "MI" "IN" "KS" "KY" "IA" "LA" "MD" "ME" "NH" "NJ" "NM" "MO" "MS"
[46] "NE" "MT" "ND" "WV" "VT" "WY"
# with filter selecting Texas between all states
TX_data <- prevention |>
filter(StateAbbr=="TX")# selecting only var Cityname Population count and short quetion texts
selected_data <- TX_data %>%
select(CityName, PopulationCount , Short_Question_Text) Citys Austin , Houston and Dallas data
# selecting only 3 citys from state
selected_city <- selected_data |>
filter( CityName == c("Austin", "Houston", "Dallas"))# cleaning the new data
TX_data <- TX_data %>%
mutate(
lat = as.numeric(lat),
lng = as.numeric(long)
)# creating a map data that will later be used
map_data <- TX_data |>
group_by(CityName) %>%
summarize(
lat = mean(lat, na.rm = TRUE),
lng = mean(long, na.rm = TRUE),
population = sum(PopulationCount, na.rm = TRUE) # Assuming you want to sum populations if duplicates exist
)Bar Chart
In the bar charts below, the population that has answered each question is shown. The questions are asking about individuals getting annual checkups for health insurance. Taking BP medications and cholesterol screening
p2<- selected_city |>
# creating ggplot followed by geom bar and a facet_wrap
# using fill to color each question
ggplot( aes(x=Short_Question_Text, y=PopulationCount , fill = Short_Question_Text))+
geom_bar(stat='identity')+
facet_wrap(~CityName, scales="free_y", ncol=1,) +
# changing position of the bars
theme(axis.text.x = element_text( hjust = 1),
legend.position = "none") +
labs(title = " 3 Citys Of Texas",
x = "Question ",
y = " Population")
p2First Map
The map below shows the population of each city; therefore, the bigger the bubble, the bigger the population.
# having each citys lat and Long for adding circles
leaflet( data = TX_data) %>%
setView(lng = -98.5456116 , lat = 31.2638905, zoom = 6) |>
addProviderTiles("Esri.WorldStreetMap") |>
addCircles( lng = c( -97.7431,-96.7970 , -95.3698),
lat = c (30.2672, 32.7767, 29.7604),
data = selected_city, radius = ~sqrt(PopulationCount) * 100 ,
color = "blue", fillColor = "#c0defa", fillOpacity = 0.5) Second Map
The map below is the refined version of the first map, which means that by clicking on the bubbles, you can see a number that shows the average answer to each question for each city. To have an average of each question, first it was needed to select each question, and then, with the use of the function summrise, have one number that represents the average population for that particular question.
short_quetion1 <- selected_city |>
filter( Short_Question_Text== "Annual Checkup"
)short_quetion2 <- selected_city |>
filter( Short_Question_Text == "Health Insurance"
)short_quetion3 <- selected_city |>
filter( Short_Question_Text == "Cholesterol Screening")short_quetion4 <- selected_city |>
filter( Short_Question_Text == "Taking BP Medication")avg1 <- short_quetion1 |>
group_by(CityName) |>
summarise( Annual_check = mean(PopulationCount))avg2 <- short_quetion2 |>
group_by(CityName) |>
summarise( Health_insurance = mean(PopulationCount))avg3 <- short_quetion3|>
group_by(CityName)|>
summarise( Cholesterol_Screening = mean(PopulationCount))avg4 <- short_quetion4 |>
group_by(CityName)|>
summarise( TakingBPMedication = mean(PopulationCount))popupq <- paste0(
"<b> Annuual check up: </b>", avg1$Annual_check, "<br>" ,
"<b> Health Insurance: </b>", avg2$Health_insurance, "<br>",
"<b> Taking BP Medication: </b>", avg4$TakingBPMedication, "<br>",
"<b> Cholesterol Screening: </b>", avg3$Cholesterol_Screening, "<br>"
) leaflet( data = TX_data) %>%
setView(lng = -98.5456116 , lat = 31.2638905, zoom = 7) |>
addProviderTiles("Esri.WorldStreetMap") |>
addCircles( lng = c( -97.7431,-96.7970 , -95.3698),
lat = c (30.2672, 32.7767, 29.7604),
data = selected_city, radius = ~sqrt(PopulationCount) * 100 ,
color = "blue", fillColor = "#c0defa", fillOpacity = 0.5,
popup = popupq)5. Paragraph
The map I created initially displays the population of each city in Texas. In the refined version of this map, I show the average number of people who responded to questions about health insurance, annual check-ups, taking BP medication, and cholesterol screening. It is evident that larger city populations correspond to more responses to these health-related questions. Furthermore, the related bar chart shows which health category is least followed in every city,Each character is indicated with a single color to make comparisons easier, and cholesterol screening has the lowest participation rate.