The primary objective of this exercise is to provide descriptive statistics for the project and to prepare the data for further statistical analysis. This process involves cleaning and preparing the data, as well as combining several sources of data for descriptive and subsequent statistical analysis.
setwd("C:\\Users\\anami\\OneDrive\\Documents\\Stat ll\\Folder")
dataA <- read.csv("C:\\Users\\anami\\OneDrive\\Documents\\Stat ll\\Folder\\WalkabilityIndex_Tract_2019.csv")
library(dplyr)
dataB<-dataA%>%
filter(StAbbr=="TX")
dataB
nrow(dataB)
## [1] 5265
library(ggplot2)
dataB$rank1 <- as.numeric(cut_number(dataB$D2A_EPHHM,20))
dataB$rank2 <- as.numeric(cut_number(dataB$D2B_E8MIXA,20))
dataB$rank3 <- as.numeric(cut_number(dataB$D3B,20))
library(dplyr)
dataC<-subset(dataB, select=c(TractID2019,Pop2018,rank1,rank2,rank3,D4A,NatWalkInd)) %>%
mutate(d4A =ifelse(D4A ==-99999.00,1, D4A))
dataC
dataC$rank4 <- as.numeric( cut(dataC$d4A,20, labels=F))
library(dplyr)
dataD<-dataC %>%
mutate(WI=(rank1/6)+(rank2/6)+(rank3/3)+(rank4/3))
names(dataD)
## [1] "TractID2019" "Pop2018" "rank1" "rank2" "rank3"
## [6] "D4A" "NatWalkInd" "d4A" "rank4" "WI"
library(dplyr)
dataE<-subset(dataD, select=c(TractID2019,WI)) %>%
group_by(TractID2019)
names(dataE)
## [1] "TractID2019" "WI"
file1<- read.csv("C:\\Users\\anami\\OneDrive\\Documents\\Stat ll\\Folder\\ACS2019.csv")
file2<-subset(file1, select=c(geoid,totpop,fpop,mage,hispan,nh_white,nh_black,nh_asian))
names(file2)
## [1] "geoid" "totpop" "fpop" "mage" "hispan" "nh_white" "nh_black"
## [8] "nh_asian"
nrow(file2)
## [1] 5265
combineA <-data3 %>%
left_join(dataE, by = c("TractFIPS" = "TractID2019")) %>%
select(TractFIPS,WI, BPHIGH_CrudePrev,CHD_CrudePrev,OBESITY_CrudePrev,DIABETES_CrudePrev,HIGHCHOL_CrudePrev)
names(combineA)
## [1] "TractFIPS" "WI" "BPHIGH_CrudePrev"
## [4] "CHD_CrudePrev" "OBESITY_CrudePrev" "DIABETES_CrudePrev"
## [7] "HIGHCHOL_CrudePrev"
combineB <-combineA %>%
left_join(file2, by = c("TractFIPS" = "geoid")) %>%
select(TractFIPS,WI, BPHIGH_CrudePrev,CHD_CrudePrev,OBESITY_CrudePrev,DIABETES_CrudePrev,HIGHCHOL_CrudePrev,totpop,fpop,mage,hispan,nh_white,nh_black,nh_asian)
names(combineB)
## [1] "TractFIPS" "WI" "BPHIGH_CrudePrev"
## [4] "CHD_CrudePrev" "OBESITY_CrudePrev" "DIABETES_CrudePrev"
## [7] "HIGHCHOL_CrudePrev" "totpop" "fpop"
## [10] "mage" "hispan" "nh_white"
## [13] "nh_black" "nh_asian"
library(ggplot2)
combineB$group <- as.numeric(cut_number(combineB$WI, 4))
combineB$group<-factor(combineB$group, levels = c(1, 2, 3,4),
labels = c("Q1", "Q2", "Q3","Q4"))
table(combineB$group)
##
## Q1 Q2 Q3 Q4
## 1332 1346 1246 1298
library(dplyr)
final1<-subset(combineB, select=c(WI,totpop,fpop,mage,hispan,nh_white,nh_black,nh_asian,group))%>%
mutate(percentfemale=(fpop*100)/totpop)
names(final1)
## [1] "WI" "totpop" "fpop" "mage"
## [5] "hispan" "nh_white" "nh_black" "nh_asian"
## [9] "group" "percentfemale"
final1$mage <- as.numeric(final1$mage)
final1$hispan <- as.numeric(final1$hispan)
final1$nh_white <- as.numeric(final1$nh_white)
final1$nh_black <- as.numeric(final1$nh_black)
final1$nh_asian <- as.numeric(final1$nh_asian)
library(dplyr)
final2<-subset(final1, select=c(WI,percentfemale,mage,hispan,nh_white,nh_black,nh_asian,group))%>%
group_by(group)
names(final2)
## [1] "WI" "percentfemale" "mage" "hispan"
## [5] "nh_white" "nh_black" "nh_asian" "group"
library(arsenal)
table_1 <- tableby(group ~ ., data =final1)
summary(table_1, title = "Descriptive Analysis I")
##
##
## Table: Descriptive Analysis I
##
## | | Q1 (N=1332) | Q2 (N=1346) | Q3 (N=1246) | Q4 (N=1298) | Total (N=5222) | p value|
## |:---------------------------|:-------------------:|:-------------------:|:-------------------:|:-------------------:|:-------------------:|-------:|
## |**WI** | | | | | | < 0.001|
## | Mean (SD) | 4.273 (1.175) | 7.222 (0.693) | 9.505 (0.658) | 12.629 (1.419) | 8.358 (3.243) | |
## | Range | 1.000 - 6.000 | 6.000 - 8.333 | 8.500 - 10.667 | 10.833 - 19.000 | 1.000 - 19.000 | |
## |**totpop** | | | | | | < 0.001|
## | Mean (SD) | 5333.612 (3307.716) | 6087.311 (4283.567) | 5358.671 (3096.476) | 4842.710 (2545.362) | 5411.841 (3407.698) | |
## | Range | 37.000 - 35550.000 | 16.000 - 72041.000 | 690.000 - 49119.000 | 454.000 - 41538.000 | 16.000 - 72041.000 | |
## |**fpop** | | | | | | < 0.001|
## | Mean (SD) | 2641.559 (1698.413) | 3077.238 (2209.441) | 2721.681 (1594.162) | 2446.108 (1323.460) | 2724.393 (1757.038) | |
## | Range | 0.000 - 18773.000 | 0.000 - 37626.000 | 288.000 - 26376.000 | 220.000 - 21646.000 | 0.000 - 37626.000 | |
## |**mage** | | | | | | < 0.001|
## | Mean (SD) | 38.267 (7.862) | 35.750 (6.367) | 34.822 (5.957) | 35.503 (6.186) | 36.109 (6.773) | |
## | Range | 18.800 - 73.700 | 19.200 - 60.300 | 19.300 - 64.700 | 19.800 - 61.700 | 18.800 - 73.700 | |
## |**hispan** | | | | | | < 0.001|
## | Mean (SD) | 33.441 (28.103) | 38.595 (26.960) | 44.914 (28.211) | 41.608 (27.779) | 39.537 (28.067) | |
## | Range | 0.000 - 100.000 | 0.600 - 100.000 | 0.000 - 100.000 | 0.000 - 99.900 | 0.000 - 100.000 | |
## |**nh_white** | | | | | | < 0.001|
## | Mean (SD) | 54.212 (28.551) | 42.359 (26.463) | 34.538 (25.284) | 38.847 (25.508) | 42.643 (27.490) | |
## | Range | 0.000 - 100.000 | 0.000 - 94.400 | 0.000 - 89.800 | 0.000 - 98.400 | 0.000 - 100.000 | |
## |**nh_black** | | | | | | < 0.001|
## | Mean (SD) | 8.759 (13.499) | 12.935 (16.512) | 14.032 (17.724) | 11.219 (13.401) | 11.705 (15.497) | |
## | Range | 0.000 - 95.600 | 0.000 - 96.800 | 0.000 - 92.100 | 0.000 - 81.300 | 0.000 - 96.800 | |
## |**nh_asian** | | | | | | < 0.001|
## | Mean (SD) | 1.520 (3.444) | 3.956 (6.988) | 4.533 (7.174) | 6.041 (8.705) | 3.991 (7.027) | |
## | Range | 0.000 - 42.800 | 0.000 - 61.200 | 0.000 - 54.700 | 0.000 - 75.800 | 0.000 - 75.800 | |
## |**percentfemale** | | | | | | < 0.001|
## | Mean (SD) | 49.449 (6.519) | 50.423 (4.538) | 50.816 (3.939) | 50.389 (4.000) | 50.260 (4.906) | |
## | Range | 0.000 - 82.975 | 0.000 - 63.081 | 17.187 - 71.375 | 26.809 - 68.496 | 0.000 - 82.975 | |
final3<-subset(combineB, select=c(BPHIGH_CrudePrev,CHD_CrudePrev,OBESITY_CrudePrev,DIABETES_CrudePrev,HIGHCHOL_CrudePrev,group))
names(final3)
## [1] "BPHIGH_CrudePrev" "CHD_CrudePrev" "OBESITY_CrudePrev"
## [4] "DIABETES_CrudePrev" "HIGHCHOL_CrudePrev" "group"
library(arsenal)
table_2 <- tableby(group ~ ., data =final1)
summary(table_2, title = "Descriptive Analysis II")
##
##
## Table: Descriptive Analysis II
##
## | | Q1 (N=1332) | Q2 (N=1346) | Q3 (N=1246) | Q4 (N=1298) | Total (N=5222) | p value|
## |:---------------------------|:-------------------:|:-------------------:|:-------------------:|:-------------------:|:-------------------:|-------:|
## |**WI** | | | | | | < 0.001|
## | Mean (SD) | 4.273 (1.175) | 7.222 (0.693) | 9.505 (0.658) | 12.629 (1.419) | 8.358 (3.243) | |
## | Range | 1.000 - 6.000 | 6.000 - 8.333 | 8.500 - 10.667 | 10.833 - 19.000 | 1.000 - 19.000 | |
## |**totpop** | | | | | | < 0.001|
## | Mean (SD) | 5333.612 (3307.716) | 6087.311 (4283.567) | 5358.671 (3096.476) | 4842.710 (2545.362) | 5411.841 (3407.698) | |
## | Range | 37.000 - 35550.000 | 16.000 - 72041.000 | 690.000 - 49119.000 | 454.000 - 41538.000 | 16.000 - 72041.000 | |
## |**fpop** | | | | | | < 0.001|
## | Mean (SD) | 2641.559 (1698.413) | 3077.238 (2209.441) | 2721.681 (1594.162) | 2446.108 (1323.460) | 2724.393 (1757.038) | |
## | Range | 0.000 - 18773.000 | 0.000 - 37626.000 | 288.000 - 26376.000 | 220.000 - 21646.000 | 0.000 - 37626.000 | |
## |**mage** | | | | | | < 0.001|
## | Mean (SD) | 38.267 (7.862) | 35.750 (6.367) | 34.822 (5.957) | 35.503 (6.186) | 36.109 (6.773) | |
## | Range | 18.800 - 73.700 | 19.200 - 60.300 | 19.300 - 64.700 | 19.800 - 61.700 | 18.800 - 73.700 | |
## |**hispan** | | | | | | < 0.001|
## | Mean (SD) | 33.441 (28.103) | 38.595 (26.960) | 44.914 (28.211) | 41.608 (27.779) | 39.537 (28.067) | |
## | Range | 0.000 - 100.000 | 0.600 - 100.000 | 0.000 - 100.000 | 0.000 - 99.900 | 0.000 - 100.000 | |
## |**nh_white** | | | | | | < 0.001|
## | Mean (SD) | 54.212 (28.551) | 42.359 (26.463) | 34.538 (25.284) | 38.847 (25.508) | 42.643 (27.490) | |
## | Range | 0.000 - 100.000 | 0.000 - 94.400 | 0.000 - 89.800 | 0.000 - 98.400 | 0.000 - 100.000 | |
## |**nh_black** | | | | | | < 0.001|
## | Mean (SD) | 8.759 (13.499) | 12.935 (16.512) | 14.032 (17.724) | 11.219 (13.401) | 11.705 (15.497) | |
## | Range | 0.000 - 95.600 | 0.000 - 96.800 | 0.000 - 92.100 | 0.000 - 81.300 | 0.000 - 96.800 | |
## |**nh_asian** | | | | | | < 0.001|
## | Mean (SD) | 1.520 (3.444) | 3.956 (6.988) | 4.533 (7.174) | 6.041 (8.705) | 3.991 (7.027) | |
## | Range | 0.000 - 42.800 | 0.000 - 61.200 | 0.000 - 54.700 | 0.000 - 75.800 | 0.000 - 75.800 | |
## |**percentfemale** | | | | | | < 0.001|
## | Mean (SD) | 49.449 (6.519) | 50.423 (4.538) | 50.816 (3.939) | 50.389 (4.000) | 50.260 (4.906) | |
## | Range | 0.000 - 82.975 | 0.000 - 63.081 | 17.187 - 71.375 | 26.809 - 68.496 | 0.000 - 82.975 | |
library(tigris)
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
library(leaflet)
tx_censustracts <- tracts(state = "TX")
## Retrieving data for the year 2020
tx_censustracts_cb <- tracts("TX", cb = TRUE)
## Retrieving data for the year 2020
plot(tx_censustracts$geometry)
plot(tx_censustracts_cb$geometry)
dmap <- tx_censustracts_cb %>%
mutate(GEOID = as.numeric(GEOID))
dmap1 <-dmap %>%
left_join(combineB, by = c("GEOID"="TractFIPS")) %>%
select(GEOID, BPHIGH_CrudePrev,CHD_CrudePrev,OBESITY_CrudePrev,DIABETES_CrudePrev,HIGHCHOL_CrudePrev)
names(dmap1)
## [1] "GEOID" "BPHIGH_CrudePrev" "CHD_CrudePrev"
## [4] "OBESITY_CrudePrev" "DIABETES_CrudePrev" "HIGHCHOL_CrudePrev"
## [7] "geometry"
library(tmap)
tm_shape(dmap1) +
tm_fill(col = "BPHIGH_CrudePrev", style = "quantile",n=4,palette = "Blues",
title = "% High BP")
tm_shape(dmap1) +
tm_fill(col = "CHD_CrudePrev", style = "quantile",n=4,palette = "Reds",
title = "% Coronary Artery Disease")
tm_shape(dmap1) +
tm_fill(col = "DIABETES_CrudePrev", style = "quantile",n=4,palette = "Greens",
title = "% of Diabetes")
tm_shape(dmap1) +
tm_fill(col = "OBESITY_CrudePrev", style = "quantile",n=4,palette = "Oranges",
title = "% of Obesity")
tm_shape(dmap1) +
tm_fill(col = "HIGHCHOL_CrudePrev", style = "quantile",n=4, palette = "Purples",
title = "% of High Cholestrol")