##Read in and join data from Arc We also dropped 8 Census Tracts that are not included in the CDC data and have very limited ACS data, to avoid skewing the results.
CDC_ACS_Dat <- read.csv("PhillyDataCombined.csv")
MEP_Dat <- read.csv("PhillyTractsMEP.csv")
#creating the link to join the two datasets together
MEP_Dat$Geo_FIPS <- MEP_Dat$GEOID10
#joining all data
dat <- full_join(CDC_ACS_Dat, MEP_Dat, by = "Geo_FIPS")
#this is the joined, base dataset without the empty rows
dat2 <- dat[complete.cases(dat[90]),]
Total Pop: “A00001” Pop by Gender: “A02001_001” Pop by Age: A01001 Pop by Race: A03001 Average household size: A10003 Median income: A14006 Nativity by Citizenship status: A06001 Children living with single parents: A10065
#Total Population (continuous)
dat2$TotalPop <- dat2$SE_A00001_001
dat2$Log_TotalPop <- log(dat2$TotalPop)
#Percent female (maybe binary)
dat2$PctFem <- (dat2$SE_A02001_003/dat2$SE_A00002_001)*100
dat2$Log_PctFem <- log(dat2$PctFem)
#Percent male
dat2$PctMale <- (dat2$SE_A02001_002/dat2$SE_A00002_001)*100
dat2$Log_PctMale <- log(dat2$PctMale)
#Pct Pop by Age - up to 17
dat2$PopUnd17 <- ((dat2$SE_A01001_002+dat2$SE_A01001_003+dat2$SE_A01001_004+dat2$SE_A01001_005)/dat2$SE_A01001_001)*100
dat2$Log_PopUnd17 <- log(dat2$PopUnd17)
#Pct Pop Between 18-54
dat2$Pop18to54 <- ((dat2$SE_A01001_006+dat2$SE_A01001_007+dat2$SE_A01001_008+dat2$SE_A01001_009)/dat2$SE_A01001_001)*100
dat2$Log_Pop18to54 <- log(dat2$Pop18to54)
#Pct Pop 55+
dat2$PopOver55 <- ((dat2$SE_A01001_010+dat2$SE_A01001_011+dat2$SE_A01001_012+dat2$SE_A01001_013)/dat2$SE_A01001_001)*100
dat2$Log_PopOver55 <- log(dat2$PopOver55)
#Pct pop that is white
dat2$PctWhite <- (dat2$SE_A03001_002/dat2$SE_A03001_001)*100
dat2$Log_PctWhite <- log(dat2$PctWhite)
#Pct pop that is black
dat2$PctBlack <- (dat2$SE_A03001_003/dat2$SE_A03001_001)*100
dat2$Log_PctBlack <- log(dat2$PctBlack)
#Pct pop American Indian, Asian, Hawaiian or PI, other race
dat2$PctAAPI <- ((dat2$SE_A03001_004+dat2$SE_A03001_005+dat2$SE_A03001_006+dat2$SE_A03001_007+dat2$SE_A03001_008)/dat2$SE_A03001_001)*100
dat2$Log_PctAAPI <- log(dat2$PctAAPI)
#Majority white (binary)
dat2$MajorityWhite[dat2$PctWhite > "0.50"] <- 1
dat2$MajorityWhite[dat2$PctWhite <= "0.50"] <- 0
#Avg Household size
dat2$AvgHHSize <- dat2$SE_A10003_001
dat2$Log_AvgHHSize <- log(dat2$AvgHHSize)
#Unemployment rate
dat2$UnemploymentRate <- (dat2$SE_A17002_006/dat2$SE_A17002_002)*100
dat2$Log_UnemploymentRate <- log(dat2$UnemploymentRate)
#Median income
dat2$MedHHInc <- dat2$SE_A14006_001
dat2$Log_MedHHInc <- log(dat2$MedHHInc)
#Pct non-citizens
dat2$PctNonCitizen <- (dat2$SE_A06001_005/dat2$SE_A06001_001)*100
dat2$Log_PctNonCitizen <- log(dat2$PctNonCitizen)
#Pct children of single parents of total children
dat2$PctSingleParent <- (dat2$SE_A10065_001/dat2$SE_A10065_002)*100
dat2$Log_PctSingleParent <- log(dat2$PctSingleParent)
#Pct Unwell for 14 Days
dat2$PctUnwellAdults <- dat2$Physical_health_not_good_for___14_days_among_adults_aged___18_ye
dat2$Log_PctUnwellAdults <- log(dat2$PctUnwellAdults)
#Adult asthma rate
dat2$AsthmaRate <- dat2$Current_asthma_among_adults_aged___18_years
dat2$Log_AsthmaRate <- log(dat2$AsthmaRate)
#Routine doctor visit rate
dat2$PctRoutineDrVisit <- dat2$Visits_to_doctor_for_routine_checkup_within_the_past_year_among
dat2$Log_PctRoutineDrVisit <- log(dat2$PctRoutineDrVisit)
#Cancer rate
dat2$CancerRate <- dat2$Cancer__excluding_skin_cancer__among_adults_aged___18_years
dat2$Log_CancerRate <- log(dat2$CancerRate)
#Uninsured Rate
dat2$PctUninsured <- dat2$Current_lack_of_health_insurance_among_adults_aged_18_64_years
dat2$Log_PctUninsured <- log(dat2$PctUninsured)
#Density visit rate
dat2$PctDentistVisit <- dat2$Visits_to_dentist_or_dental_clinic_among_adults_aged___18_years
dat2$Log_PctDentistVisit <- log(dat2$PctDentistVisit)
#Mental health not good for 14 days rate
dat2$PctMentallyUnwell <- dat2$Mental_health_not_good_for___14_days_among_adults_aged___18_year
dat2$Log_PctMentallyUnwell <- log(dat2$PctMentallyUnwell)
#MEP
dat2$AvgMEP <- dat2$MEAN_mep
dat2$Log_AvgMEP <- log(dat2$AvgMEP)
is.na(dat2) <- sapply(dat2, is.infinite)
#Categorized variables for t-tests and chi-squared
#Majority female
dat2$MajFemale <- 0
dat2$MajFemale[dat2$PctFem >= "50"] <- 1
#Avg Household size
summary(dat2$AvgHHSize)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.350 2.225 2.540 2.539 2.875 3.950 1
#categorized by above the median =1, others =0
dat2$BigHH <- 0
dat2$BigHH[dat2$AvgHHSize > "2.540"] <- 1
#Median income
summary(dat2$MedHHInc)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 10793 29766 44184 49394 62033 143646 2
#categorized by above the median=1, others=0
dat2$HighIncome <- 0
dat2$HighIncome[dat2$MedHHInc > "44184"] <- 1
##histograms of key variables
hist(dat2$AvgHHSize,
main="Histogram of Average Household Size",
xlab="Avg. Household Size")
hist(dat2$PctBlack,
main="Histogram of Black Percentage of Population",
xlab="% of Census Population that is Black")
hist(dat2$Pop18to54,
main="Histogram of Population Percentage Between 18 and 54",
xlab="% of Census Population that is 18-54 Years Old")
##table showing data stats for variables in the final model
varsInterest <- c("AvgMEP", "AvgHHSize", "Pop18to54", "CancerRate", "PctUnwellAdults", "PctBlack")
datviz<-dat2[ , which(names(dat2) %in% varsInterest)]
head(datviz)
## Pop18to54 PctBlack AvgHHSize PctUnwellAdults CancerRate AvgMEP
## 1 76.05634 6.653715 1.64 5.9 3.9 470.3547
## 2 70.34965 4.300699 2.46 11.6 3.6 493.3073
## 3 60.88626 7.090103 1.70 7.3 5.6 448.2941
## 4 67.17011 11.367250 1.52 8.4 5.7 439.0412
## 5 51.98926 5.223334 1.40 8.7 10.6 478.3530
## 6 88.51750 32.913881 1.47 10.5 2.7 498.1745
stargazer(datviz, type = "text")
##
## ======================================================================
## Statistic N Mean St. Dev. Min Pctl(25) Pctl(75) Max
## ----------------------------------------------------------------------
## Pop18to54 376 53.589 11.150 18.231 46.512 58.552 98.078
## PctBlack 376 42.995 34.350 0.000 10.394 80.430 98.516
## AvgHHSize 375 2.539 0.470 1.350 2.225 2.875 3.950
## PctUnwellAdults 376 14.815 4.708 4.500 11.575 18.025 27.200
## CancerRate 376 5.902 1.810 1.000 4.700 6.700 18.300
## AvgMEP 376 297.769 84.261 145.438 232.310 357.976 499.603
## ----------------------------------------------------------------------
T-Tests
t.test (AvgMEP ~ MajFemale, data = dat2, paired = FALSE)#AvgMEP & Majority Female
##
## Welch Two Sample t-test
##
## data: AvgMEP by MajFemale
## t = 3.0567, df = 164.41, p-value = 0.002612
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 10.79203 50.17157
## sample estimates:
## mean in group 0 mean in group 1
## 320.3062 289.8244
t.test (AvgMEP ~ BigHH, data = dat2, paired = FALSE)#AvgMEP & HH Size
##
## Welch Two Sample t-test
##
## data: AvgMEP by BigHH
## t = 4.9632, df = 330.97, p-value = 0.000001112
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 25.15968 58.19892
## sample estimates:
## mean in group 0 mean in group 1
## 318.3871 276.7078
t.test (AvgMEP ~ HighIncome, data = dat2, paired = FALSE)#AvgMEP & HHI
##
## Welch Two Sample t-test
##
## data: AvgMEP by HighIncome
## t = 1.6802, df = 275.27, p-value = 0.09405
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.632101 33.300514
## sample estimates:
## mean in group 0 mean in group 1
## 304.5390 289.2048
Cor Tests
cor.test(dat2$AvgMEP, dat2$MajFemale)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$MajFemale
## t = -3.1149, df = 374, p-value = 0.001982
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2560348 -0.0588262
## sample estimates:
## cor
## -0.1590163
cor.test(dat2$AvgMEP, dat2$BigHH)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$BigHH
## t = -4.9431, df = 374, p-value = 0.000001163
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.3402535 -0.1502664
## sample estimates:
## cor
## -0.2476391
cor.test(dat2$AvgMEP, dat2$HighIncome)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$HighIncome
## t = -1.7572, df = 374, p-value = 0.07971
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.18988627 0.01074658
## sample estimates:
## cor
## -0.0904879
CrossTable(dat2$AvgMEP, dat2$MajFemale, fisher = FALSE, chisq = TRUE,
expected = TRUE, sresid = FALSE, format="SPSS")#needs to be separated by quartiles. Do we split AvgMEP or other variable?
Correlation tests
Total Pop: small p-value, negatively correlated Pct Fem: 0.06 p-value, negatively correlated Pct Male: 0.062 p-value, positively correlated Pop Under 17: small p-value, negatively correlated Pop 18-54: small p-value, positively correlated Pop 55+: small p-value, negatively correlated Pct White: 0.049 p-value, positively correlated Pct Black: 0.006 p-value, negatively correlated Pct AAPI: 0.012 p-value, positively correlated Majority WHite: 0.5821, minor correlation - ACCEPT NULL Avg HH Size: small p-value, negatively correlated Unemployment Rate: 0.045 p-value, negatively correlated Median HH Income: 0.084 p-value, positively correlated Pct Non Citizen: 0.79 p-value, minor correlation - ACCEPT NULL Pct Single Parent: data issue Pct Unwell Adults: small p-value, negatively correlated Asthma Rate: 0.044 p-value, negatively correlated Pct Routine Dr Visit: small p-value, negatively correlated Cancer Rate: small p-value, negatively correlated Pct Uninsured: 0.88 p-value, minor correlation - ACCEPT NULL Pct Dentist Visits: 0.42 p-value, minor correlation - ACCEPT NULL Pct Mentally Unwell: 0.88 p-value, minor correlation - ACCEPT NULL
cor.test(dat2$AvgMEP, dat2$TotalPop)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$TotalPop
## t = -2.9414, df = 374, p-value = 0.003471
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.24773626 -0.04999172
## sample estimates:
## cor
## -0.1503676
cor.test(dat2$AvgMEP, dat2$PctFem)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$PctFem
## t = -1.8703, df = 374, p-value = 0.06223
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.195493539 0.004923445
## sample estimates:
## cor
## -0.09626062
cor.test(dat2$AvgMEP, dat2$PctMale)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$PctMale
## t = 1.8703, df = 374, p-value = 0.06223
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.004923445 0.195493539
## sample estimates:
## cor
## 0.09626062
cor.test(dat2$AvgMEP, dat2$PopUnd17)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$PopUnd17
## t = -6.1361, df = 374, p-value = 0.000000002156
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.3915923 -0.2076490
## sample estimates:
## cor
## -0.3024338
cor.test(dat2$AvgMEP, dat2$Pop18to54)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$Pop18to54
## t = 11.519, df = 374, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4330079 0.5827118
## sample estimates:
## cor
## 0.5117338
cor.test(dat2$AvgMEP, dat2$PopOver55)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$PopOver55
## t = -6.8634, df = 374, p-value = 0.00000000002805
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.4213428 -0.2414915
## sample estimates:
## cor
## -0.334459
cor.test(dat2$AvgMEP, dat2$PctWhite)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$PctWhite
## t = 1.9785, df = 374, p-value = 0.0486
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.000646634 0.200844897
## sample estimates:
## cor
## 0.1017761
cor.test(dat2$AvgMEP, dat2$PctBlack)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$PctBlack
## t = -2.7435, df = 374, p-value = 0.006372
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.23820873 -0.03988687
## sample estimates:
## cor
## -0.1404564
cor.test(dat2$AvgMEP, dat2$PctAAPI)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$PctAAPI
## t = 2.5167, df = 374, p-value = 0.01226
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.02828103 0.22721866
## sample estimates:
## cor
## 0.1290481
cor.test(dat2$AvgMEP, dat2$MajorityWhite)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$MajorityWhite
## t = 0.55078, df = 374, p-value = 0.5821
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.07287755 0.12923246
## sample estimates:
## cor
## 0.02846841
cor.test(dat2$AvgMEP, dat2$AvgHHSize)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$AvgHHSize
## t = -7.2594, df = 373, p-value = 0.000000000002278
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.4375267 -0.2598327
## sample estimates:
## cor
## -0.3518455
cor.test(dat2$AvgMEP, dat2$UnemploymentRate)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$UnemploymentRate
## t = -2.0137, df = 374, p-value = 0.04476
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.202578925 -0.002454204
## sample estimates:
## cor
## -0.1035646
cor.test(dat2$AvgMEP, dat2$MedHHInc)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$MedHHInc
## t = 1.7307, df = 372, p-value = 0.08433
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.01214194 0.18906755
## sample estimates:
## cor
## 0.0893746
cor.test(dat2$AvgMEP, dat2$PctNonCitizen)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$PctNonCitizen
## t = 0.26569, df = 374, p-value = 0.7906
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.08752041 0.11471407
## sample estimates:
## cor
## 0.01373732
cor.test(dat2$AvgMEP, dat2$PctSingleParent)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$PctSingleParent
## t = -0.31531, df = 364, p-value = 0.7527
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.11883329 0.08613148
## sample estimates:
## cor
## -0.0165245
cor.test(dat2$AvgMEP, dat2$PctUnwellAdults)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$PctUnwellAdults
## t = -3.6245, df = 374, p-value = 0.0003296
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.28012904 -0.08465283
## sample estimates:
## cor
## -0.1842118
cor.test(dat2$AvgMEP, dat2$AsthmaRate)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$AsthmaRate
## t = -2.0246, df = 374, p-value = 0.04362
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.203119198 -0.003017657
## sample estimates:
## cor
## -0.104122
cor.test(dat2$AvgMEP, dat2$PctRoutineDrVisit)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$PctRoutineDrVisit
## t = -6.3347, df = 374, p-value = 0.0000000006832
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.3998347 -0.2169812
## sample estimates:
## cor
## -0.3112863
cor.test(dat2$AvgMEP, dat2$CancerRate)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$CancerRate
## t = -8.8248, df = 374, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.4954731 -0.3277650
## sample estimates:
## cor
## -0.4151397
cor.test(dat2$AvgMEP, dat2$PctUninsured)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$PctUninsured
## t = -0.15328, df = 374, p-value = 0.8783
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.10897466 0.09328503
## sample estimates:
## cor
## -0.007925876
cor.test(dat2$AvgMEP, dat2$PctDentistVisit)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$PctDentistVisit
## t = 0.80416, df = 374, p-value = 0.4218
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.05984116 0.14208553
## sample estimates:
## cor
## 0.04154642
cor.test(dat2$AvgMEP, dat2$PctMentallyUnwell)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$PctMentallyUnwell
## t = -0.13853, df = 374, p-value = 0.8899
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.10822078 0.09404121
## sample estimates:
## cor
## -0.00716305
Plots!
boxplot((AvgMEP) ~ MajFemale, data= dat2)
boxplot((AvgMEP) ~ AvgHHSize, data= dat2)
boxplot((AvgMEP) ~ MedHHInc, data= dat2)
boxplot((Log_AvgMEP) ~ Log_MedHHInc, data= dat2)
More plots!!
Total Pop: small p-value, negatively correlated Pct Fem: 0.06 p-value, negatively correlated Pct Male: 0.062 p-value, positively correlated Pop Under 17: small p-value, negatively correlated Pop 18-54: small p-value, positively correlated Pop 55+: small p-value, negatively correlated Pct White: 0.049 p-value, positively correlated Pct Black: 0.006 p-value, negatively correlated Pct AAPI: 0.012 p-value, positively correlated Majority WHite: 0.5821, minor correlation - ACCEPT NULL Avg HH Size: small p-value, negatively correlated Unemployment Rate: 0.045 p-value, negatively correlated Median HH Income: 0.084 p-value, positively correlated Pct Non Citizen: 0.79 p-value, minor correlation - ACCEPT NULL Pct Single Parent: data issue Pct Unwell Adults: small p-value, negatively correlated Asthma Rate: 0.044 p-value, negatively correlated Pct Routine Dr Visit: small p-value, negatively correlated Cancer Rate: small p-value, negatively correlated Pct Uninsured: 0.88 p-value, minor correlation - ACCEPT NULL Pct Dentist Visits: 0.42 p-value, minor correlation - ACCEPT NULL Pct Mentally Unwell: 0.88 p-value, minor correlation - ACCEPT NULL
ggplot(data=dat2, aes(AvgMEP)) +
geom_histogram() +
labs(title="Average MEP Distribution", x="Value of MEP", y="Number of Tracts")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data=dat2, aes(Log_AvgMEP)) +
geom_histogram() +
labs(title="Log Average MEP Distribution", x="Log Value of MEP", y="Number of Tracts")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data=dat2, aes(Log_MedHHInc, Log_AvgMEP)) + geom_point()+
ggtitle("Log Median Income Against Log Avg MEP")
## Warning: Removed 2 rows containing missing values (geom_point).
ggplot(data=dat2, aes(MedHHInc, AvgMEP)) + geom_point()+
ggtitle("Median Income Against Avg MEP")
## Warning: Removed 2 rows containing missing values (geom_point).
ggplot(data=dat2, aes(AvgHHSize, AvgMEP)) + geom_point()+
ggtitle("Avg Household Size Against Avg MEP")
## Warning: Removed 1 rows containing missing values (geom_point).
ggplot(data=dat2, aes(Log_AvgHHSize, Log_AvgMEP)) + geom_point()+
ggtitle("Log Avg Household Size Against Log Avg MEP")
## Warning: Removed 1 rows containing missing values (geom_point).
ggplot(data=dat2, aes(Log_TotalPop, Log_AvgMEP)) + geom_point()+
ggtitle("Log Total Pop Against Log Avg MEP")
ggplot(data=dat2, aes(Log_Pop18to54, Log_AvgMEP)) + geom_point()+
ggtitle("Log Population 18-54 Against Log Avg MEP")
ggplot(data=dat2, aes(Log_PctUnwellAdults, Log_AvgMEP)) + geom_point()+
ggtitle("Log Unwell Adults Against Log Avg MEP")
ggplot(data=dat2, aes(Log_CancerRate, Log_AvgMEP)) + geom_point()+
ggtitle("Log Cancer Rate Against Log Avg MEP")
ggplot(data=dat2, aes(PctBlack, AvgMEP)) + geom_point()+
ggtitle("Log Cancer Rate Against Log Avg MEP")
ggplot(data=dat2, aes(Log_PctBlack, Log_AvgMEP)) + geom_point()+
ggtitle("Log Cancer Rate Against Log Avg MEP")
## Warning: Removed 1 rows containing missing values (geom_point).
Total Pop: small p-value, negatively correlated Pop Under 17: small p-value, negatively correlated Pop 18-54: small p-value, positively correlated Pop 55+: small p-value, negatively correlated Pct White: 0.049 p-value, positively correlated Pct Black: 0.006 p-value, negatively correlated Pct AAPI: 0.012 p-value, positively correlated Avg HH Size: small p-value, negatively correlated Unemployment Rate: 0.045 p-value, negatively correlated Pct Unwell Adults: small p-value, negatively correlated Asthma Rate: 0.044 p-value, negatively correlated Pct Routine Dr Visit: small p-value, negatively correlated Cancer Rate: small p-value, negatively correlated
cor.test(dat2$AvgMEP, dat2$TotalPop)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$TotalPop
## t = -2.9414, df = 374, p-value = 0.003471
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.24773626 -0.04999172
## sample estimates:
## cor
## -0.1503676
cor.test(dat2$AvgMEP, dat2$PopUnd17)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$PopUnd17
## t = -6.1361, df = 374, p-value = 0.000000002156
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.3915923 -0.2076490
## sample estimates:
## cor
## -0.3024338
cor.test(dat2$AvgMEP, dat2$Pop18to54)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$Pop18to54
## t = 11.519, df = 374, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4330079 0.5827118
## sample estimates:
## cor
## 0.5117338
cor.test(dat2$AvgMEP, dat2$PopOver55)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$PopOver55
## t = -6.8634, df = 374, p-value = 0.00000000002805
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.4213428 -0.2414915
## sample estimates:
## cor
## -0.334459
cor.test(dat2$AvgMEP, dat2$PctWhite)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$PctWhite
## t = 1.9785, df = 374, p-value = 0.0486
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.000646634 0.200844897
## sample estimates:
## cor
## 0.1017761
cor.test(dat2$AvgMEP, dat2$PctBlack)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$PctBlack
## t = -2.7435, df = 374, p-value = 0.006372
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.23820873 -0.03988687
## sample estimates:
## cor
## -0.1404564
cor.test(dat2$AvgMEP, dat2$PctAAPI)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$PctAAPI
## t = 2.5167, df = 374, p-value = 0.01226
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.02828103 0.22721866
## sample estimates:
## cor
## 0.1290481
cor.test(dat2$AvgMEP, dat2$AvgHHSize)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$AvgHHSize
## t = -7.2594, df = 373, p-value = 0.000000000002278
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.4375267 -0.2598327
## sample estimates:
## cor
## -0.3518455
cor.test(dat2$AvgMEP, dat2$UnemploymentRate)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$UnemploymentRate
## t = -2.0137, df = 374, p-value = 0.04476
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.202578925 -0.002454204
## sample estimates:
## cor
## -0.1035646
cor.test(dat2$AvgMEP, dat2$PctUnwellAdults)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$PctUnwellAdults
## t = -3.6245, df = 374, p-value = 0.0003296
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.28012904 -0.08465283
## sample estimates:
## cor
## -0.1842118
cor.test(dat2$AvgMEP, dat2$AsthmaRate)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$AsthmaRate
## t = -2.0246, df = 374, p-value = 0.04362
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.203119198 -0.003017657
## sample estimates:
## cor
## -0.104122
cor.test(dat2$AvgMEP, dat2$PctRoutineDrVisit)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$PctRoutineDrVisit
## t = -6.3347, df = 374, p-value = 0.0000000006832
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.3998347 -0.2169812
## sample estimates:
## cor
## -0.3112863
cor.test(dat2$AvgMEP, dat2$CancerRate)
##
## Pearson's product-moment correlation
##
## data: dat2$AvgMEP and dat2$CancerRate
## t = -8.8248, df = 374, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.4954731 -0.3277650
## sample estimates:
## cor
## -0.4151397
mod <- lm(Log_AvgMEP ~ Log_AvgHHSize, data = dat2)
summary (mod)
##
## Call:
## lm(formula = Log_AvgMEP ~ Log_AvgHHSize, data = dat2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.65162 -0.18810 0.01651 0.22363 0.53666
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.08632 0.06575 92.566 < 0.0000000000000002 ***
## Log_AvgHHSize -0.46863 0.07038 -6.659 0.0000000000991 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2651 on 373 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.1062, Adjusted R-squared: 0.1038
## F-statistic: 44.34 on 1 and 373 DF, p-value: 0.00000000009909
par(mfrow=c(1,1))
plot (dat2$Log_AvgHHSize, dat2$Log_AvgMEP )
abline (mod, col="red")
mod1 <- lm(Log_AvgMEP ~ Log_AvgHHSize + Log_PctBlack, data = dat2)
summary (mod1)
##
## Call:
## lm(formula = Log_AvgMEP ~ Log_AvgHHSize + Log_PctBlack, data = dat2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.65706 -0.18373 0.02349 0.22347 0.53368
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.085266 0.069546 87.500 < 0.0000000000000002 ***
## Log_AvgHHSize -0.461901 0.071563 -6.454 0.00000000034 ***
## Log_PctBlack -0.001388 0.010093 -0.137 0.891
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2655 on 371 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.1039, Adjusted R-squared: 0.09903
## F-statistic: 21.5 on 2 and 371 DF, p-value: 0.000000001464
mod2 <- lm(Log_AvgMEP ~ Log_UnemploymentRate, data = dat2)
summary (mod2)
##
## Call:
## lm(formula = Log_AvgMEP ~ Log_UnemploymentRate, data = dat2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68590 -0.21165 -0.01259 0.24020 0.55371
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.76218 0.03999 144.102 < 0.0000000000000002 ***
## Log_UnemploymentRate -0.05167 0.01849 -2.794 0.00548 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2769 on 371 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.0206, Adjusted R-squared: 0.01796
## F-statistic: 7.805 on 1 and 371 DF, p-value: 0.005482
par(mfrow=c(1,1))
plot (dat2$Log_UnemploymentRate, dat2$Log_AvgMEP )
abline (mod2, col="red")
mod3 <- lm(Log_AvgMEP ~ Log_AvgHHSize + Log_Pop18to54, data = dat2)
summary (mod3)
##
## Call:
## lm(formula = Log_AvgMEP ~ Log_AvgHHSize + Log_Pop18to54, data = dat2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.62106 -0.16890 -0.00087 0.18742 0.66977
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.33009 0.27794 11.981 < 0.0000000000000002 ***
## Log_AvgHHSize -0.27654 0.06519 -4.242 0.000028 ***
## Log_Pop18to54 0.65174 0.06426 10.142 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.235 on 372 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.2998, Adjusted R-squared: 0.2961
## F-statistic: 79.65 on 2 and 372 DF, p-value: < 0.00000000000000022
mod4 <- lm(Log_AvgMEP ~ Log_AvgHHSize + Log_Pop18to54 + Log_PctRoutineDrVisit, data = dat2)
summary (mod4)
##
## Call:
## lm(formula = Log_AvgMEP ~ Log_AvgHHSize + Log_Pop18to54 + Log_PctRoutineDrVisit,
## data = dat2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6193 -0.1676 -0.0026 0.1879 0.6721
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.15748 1.61010 1.961 0.0506 .
## Log_AvgHHSize -0.27507 0.06666 -4.127 0.0000454684977027 ***
## Log_Pop18to54 0.65761 0.08397 7.831 0.0000000000000512 ***
## Log_PctRoutineDrVisit 0.03388 0.31128 0.109 0.9134
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2353 on 371 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.2999, Adjusted R-squared: 0.2942
## F-statistic: 52.97 on 3 and 371 DF, p-value: < 0.00000000000000022
mod5 <- lm(Log_AvgMEP ~ Log_PctRoutineDrVisit, data = dat2)
summary (mod5)
##
## Call:
## lm(formula = Log_AvgMEP ~ Log_PctRoutineDrVisit, data = dat2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.74360 -0.17974 -0.01809 0.21574 0.56659
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.3896 1.1887 10.423 < 0.0000000000000002 ***
## Log_PctRoutineDrVisit -1.5412 0.2721 -5.664 0.0000000295 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2694 on 374 degrees of freedom
## Multiple R-squared: 0.07901, Adjusted R-squared: 0.07655
## F-statistic: 32.08 on 1 and 374 DF, p-value: 0.00000002948
mod6 <- lm(Log_AvgMEP ~ Log_AvgHHSize + Log_Pop18to54 + Log_CancerRate, data = dat2)
summary (mod6)
##
## Call:
## lm(formula = Log_AvgMEP ~ Log_AvgHHSize + Log_Pop18to54 + Log_CancerRate,
## data = dat2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.60560 -0.16157 -0.02097 0.18568 0.60398
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.43758 0.50214 10.829 < 0.0000000000000002 ***
## Log_AvgHHSize -0.41499 0.06906 -6.009 0.00000000446 ***
## Log_Pop18to54 0.27475 0.09811 2.800 0.00537 **
## Log_CancerRate -0.28242 0.05678 -4.974 0.00000100566 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2278 on 371 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.3436, Adjusted R-squared: 0.3383
## F-statistic: 64.74 on 3 and 371 DF, p-value: < 0.00000000000000022
mod7 <- lm(Log_AvgMEP ~ Log_AvgHHSize + Log_Pop18to54 + Log_CancerRate + Log_PctUnwellAdults, data = dat2)
summary (mod7)
##
## Call:
## lm(formula = Log_AvgMEP ~ Log_AvgHHSize + Log_Pop18to54 + Log_CancerRate +
## Log_PctUnwellAdults, data = dat2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.60568 -0.15872 -0.02106 0.15081 0.74977
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.03273 0.58652 6.876 0.000000000026353 ***
## Log_AvgHHSize -0.54635 0.07385 -7.398 0.000000000000934 ***
## Log_Pop18to54 0.49969 0.10879 4.593 0.000005989620398 ***
## Log_CancerRate -0.22527 0.05697 -3.954 0.000092125428930 ***
## Log_PctUnwellAdults 0.20293 0.04648 4.366 0.000016477385806 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2225 on 370 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.3758, Adjusted R-squared: 0.369
## F-statistic: 55.68 on 4 and 370 DF, p-value: < 0.00000000000000022
The Variance Inflation Factor (VIF) detects multicollinearity in regression analysis. The VIF for these variables is relatively small (with anything less than 10 considered appropriate), meaning there are no problems of collinearity.
mod8 <- lm(Log_AvgMEP ~ Log_AvgHHSize + Log_Pop18to54 + Log_CancerRate + Log_PctUnwellAdults + Log_PctBlack, data = dat2)
summary (mod8)
##
## Call:
## lm(formula = Log_AvgMEP ~ Log_AvgHHSize + Log_Pop18to54 + Log_CancerRate +
## Log_PctUnwellAdults + Log_PctBlack, data = dat2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.70344 -0.14362 -0.00223 0.15156 0.71918
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.14022 0.58164 7.118 0.0000000000057677 ***
## Log_AvgHHSize -0.59840 0.07564 -7.912 0.0000000000000301 ***
## Log_Pop18to54 0.47692 0.10794 4.419 0.0000130857529105 ***
## Log_CancerRate -0.26638 0.05807 -4.587 0.0000061710619077 ***
## Log_PctUnwellAdults 0.27889 0.05242 5.320 0.0000001804957684 ***
## Log_PctBlack -0.03109 0.01006 -3.090 0.00215 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2202 on 368 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.3885, Adjusted R-squared: 0.3802
## F-statistic: 46.76 on 5 and 368 DF, p-value: < 0.00000000000000022
vif(mod8)
## Log_AvgHHSize Log_Pop18to54 Log_CancerRate Log_PctUnwellAdults
## 1.664222 3.502726 2.777506 2.582808
## Log_PctBlack
## 1.480532
stargazer(mod6, mod7, mod8, type = "text")
##
## ===========================================================================================
## Dependent variable:
## -----------------------------------------------------------------------
## Log_AvgMEP
## (1) (2) (3)
## -------------------------------------------------------------------------------------------
## Log_AvgHHSize -0.415*** -0.546*** -0.598***
## (0.069) (0.074) (0.076)
##
## Log_Pop18to54 0.275*** 0.500*** 0.477***
## (0.098) (0.109) (0.108)
##
## Log_CancerRate -0.282*** -0.225*** -0.266***
## (0.057) (0.057) (0.058)
##
## Log_PctUnwellAdults 0.203*** 0.279***
## (0.046) (0.052)
##
## Log_PctBlack -0.031***
## (0.010)
##
## Constant 5.438*** 4.033*** 4.140***
## (0.502) (0.587) (0.582)
##
## -------------------------------------------------------------------------------------------
## Observations 375 375 374
## R2 0.344 0.376 0.389
## Adjusted R2 0.338 0.369 0.380
## Residual Std. Error 0.228 (df = 371) 0.222 (df = 370) 0.220 (df = 368)
## F Statistic 64.738*** (df = 3; 371) 55.681*** (df = 4; 370) 46.763*** (df = 5; 368)
## ===========================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
##Correlation between final variables and MEP
par(mfrow=c(2, 3))
plot(dat2$Log_AvgHHSize, dat2$Log_AvgMEP, main=paste("Corr=", cor(dat2$Log_AvgHHSize, dat2$Log_AvgMEP, use="complete.obs", method="pearson")))
plot(dat2$Log_Pop18to54, dat2$Log_AvgMEP, main=paste("Corr=", cor(dat2$Log_Pop18to54, dat2$Log_AvgMEP, use="complete.obs", method="pearson")))
plot(dat2$Log_CancerRate, dat2$Log_AvgMEP, main=paste("Corr=", cor(dat2$Log_CancerRate, dat2$Log_AvgMEP, use="complete.obs", method="pearson")))
plot(dat2$Log_PctUnwellAdults, dat2$Log_AvgMEP, main=paste("Corr=", cor(dat2$Log_PctUnwellAdults, dat2$Log_AvgMEP, use="complete.obs", method="pearson")))
plot(dat2$Log_PctBlack, dat2$Log_AvgMEP, main=paste("Corr=", cor(dat2$Log_PctBlack, dat2$Log_AvgMEP, use="complete.obs", method="pearson")))
mod9 <- lm(Log_AvgMEP ~ Log_MedHHInc, data=dat2)
summary (mod9)
##
## Call:
## lm(formula = Log_AvgMEP ~ Log_MedHHInc, data = dat2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.67988 -0.20508 -0.04325 0.19902 0.58192
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.09748 0.30435 20.034 <0.0000000000000002 ***
## Log_MedHHInc -0.04104 0.02846 -1.442 0.15
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2793 on 372 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.005558, Adjusted R-squared: 0.002885
## F-statistic: 2.079 on 1 and 372 DF, p-value: 0.1502
par(mfrow=c(1,1))
plot (dat2$Log_MedHHInc, dat2$Log_AvgMEP )
abline (mod9, col="red")