505 Final

Vars of Interest

Total Pop: “A00001” Pop by Gender: “A02001_001” Pop by Age: A01001 Pop by Race: A03001 Average household size: A10003 Median income: A14006 Nativity by Citizenship status: A06001 Children living with single parents: A10065

#Total Population (continuous)
dat2$TotalPop <- dat2$SE_A00001_001
dat2$Log_TotalPop <- log(dat2$TotalPop)

#Percent female (maybe binary)
dat2$PctFem <- (dat2$SE_A02001_003/dat2$SE_A00002_001)*100
dat2$Log_PctFem <- log(dat2$PctFem)

#Percent male
dat2$PctMale <- (dat2$SE_A02001_002/dat2$SE_A00002_001)*100
dat2$Log_PctMale <- log(dat2$PctMale)

#Pct Pop by Age - up to 17
dat2$PopUnd17 <- ((dat2$SE_A01001_002+dat2$SE_A01001_003+dat2$SE_A01001_004+dat2$SE_A01001_005)/dat2$SE_A01001_001)*100
dat2$Log_PopUnd17 <- log(dat2$PopUnd17)

#Pct Pop Between 18-54
dat2$Pop18to54 <- ((dat2$SE_A01001_006+dat2$SE_A01001_007+dat2$SE_A01001_008+dat2$SE_A01001_009)/dat2$SE_A01001_001)*100
dat2$Log_Pop18to54 <- log(dat2$Pop18to54)

#Pct Pop 55+
dat2$PopOver55 <- ((dat2$SE_A01001_010+dat2$SE_A01001_011+dat2$SE_A01001_012+dat2$SE_A01001_013)/dat2$SE_A01001_001)*100
dat2$Log_PopOver55 <- log(dat2$PopOver55)

#Pct pop that is white
dat2$PctWhite <- (dat2$SE_A03001_002/dat2$SE_A03001_001)*100
dat2$Log_PctWhite <- log(dat2$PctWhite)

#Pct pop that is black
dat2$PctBlack <- (dat2$SE_A03001_003/dat2$SE_A03001_001)*100
dat2$Log_PctBlack <- log(dat2$PctBlack)

#Pct pop American Indian, Asian, Hawaiian or PI, other race
dat2$PctAAPI <- ((dat2$SE_A03001_004+dat2$SE_A03001_005+dat2$SE_A03001_006+dat2$SE_A03001_007+dat2$SE_A03001_008)/dat2$SE_A03001_001)*100
dat2$Log_PctAAPI <- log(dat2$PctAAPI)

#Majority white (binary)
dat2$MajorityWhite[dat2$PctWhite > "0.50"] <- 1
dat2$MajorityWhite[dat2$PctWhite <= "0.50"] <- 0

#Avg Household size
dat2$AvgHHSize <- dat2$SE_A10003_001
dat2$Log_AvgHHSize <- log(dat2$AvgHHSize)

#Unemployment rate
dat2$UnemploymentRate <- (dat2$SE_A17002_006/dat2$SE_A17002_002)*100
dat2$Log_UnemploymentRate <- log(dat2$UnemploymentRate)

#Median income
dat2$MedHHInc <- dat2$SE_A14006_001
dat2$Log_MedHHInc <- log(dat2$MedHHInc)

#Pct non-citizens
dat2$PctNonCitizen <- (dat2$SE_A06001_005/dat2$SE_A06001_001)*100
dat2$Log_PctNonCitizen <- log(dat2$PctNonCitizen)

#Pct children of single parents of total children
dat2$PctSingleParent <- (dat2$SE_A10065_001/dat2$SE_A10065_002)*100
dat2$Log_PctSingleParent <- log(dat2$PctSingleParent)

#Pct Unwell for 14 Days
dat2$PctUnwellAdults <- dat2$Physical_health_not_good_for___14_days_among_adults_aged___18_ye
dat2$Log_PctUnwellAdults <- log(dat2$PctUnwellAdults)

#Adult asthma rate
dat2$AsthmaRate <- dat2$Current_asthma_among_adults_aged___18_years
dat2$Log_AsthmaRate <- log(dat2$AsthmaRate)

#Routine doctor visit rate
dat2$PctRoutineDrVisit <- dat2$Visits_to_doctor_for_routine_checkup_within_the_past_year_among
dat2$Log_PctRoutineDrVisit <- log(dat2$PctRoutineDrVisit)

#Cancer rate
dat2$CancerRate <- dat2$Cancer__excluding_skin_cancer__among_adults_aged___18_years
dat2$Log_CancerRate <- log(dat2$CancerRate)

#Uninsured Rate
dat2$PctUninsured <- dat2$Current_lack_of_health_insurance_among_adults_aged_18_64_years
dat2$Log_PctUninsured <- log(dat2$PctUninsured)

#Density visit rate
dat2$PctDentistVisit <- dat2$Visits_to_dentist_or_dental_clinic_among_adults_aged___18_years
dat2$Log_PctDentistVisit <- log(dat2$PctDentistVisit)

#Mental health not good for 14 days rate
dat2$PctMentallyUnwell <- dat2$Mental_health_not_good_for___14_days_among_adults_aged___18_year
dat2$Log_PctMentallyUnwell <- log(dat2$PctMentallyUnwell)

#MEP
dat2$AvgMEP <- dat2$MEAN_mep
dat2$Log_AvgMEP <- log(dat2$AvgMEP)

is.na(dat2) <- sapply(dat2, is.infinite)

#Categorized variables for t-tests and chi-squared

#Majority female
dat2$MajFemale <- 0
dat2$MajFemale[dat2$PctFem >= "50"] <- 1

#Avg Household size
summary(dat2$AvgHHSize)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.350   2.225   2.540   2.539   2.875   3.950       1

#categorized by above the median =1, others =0
dat2$BigHH <- 0
dat2$BigHH[dat2$AvgHHSize > "2.540"] <- 1


#Median income
summary(dat2$MedHHInc)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   10793   29766   44184   49394   62033  143646       2

#categorized by above the median=1, others=0
dat2$HighIncome <- 0
dat2$HighIncome[dat2$MedHHInc > "44184"] <- 1

##histograms of key variables

hist(dat2$AvgHHSize,
     main="Histogram of Average Household Size", 
     xlab="Avg. Household Size")

hist(dat2$PctBlack,
     main="Histogram of Black Percentage of Population", 
     xlab="% of Census Population that is Black")

hist(dat2$Pop18to54,
     main="Histogram of Population Percentage Between 18 and 54", 
     xlab="% of Census Population that is 18-54 Years Old")

##table showing data stats for variables in the final model

varsInterest <- c("AvgMEP", "AvgHHSize", "Pop18to54", "CancerRate", "PctUnwellAdults", "PctBlack")
datviz<-dat2[ , which(names(dat2) %in% varsInterest)]
head(datviz)

##   Pop18to54  PctBlack AvgHHSize PctUnwellAdults CancerRate   AvgMEP
## 1  76.05634  6.653715      1.64             5.9        3.9 470.3547
## 2  70.34965  4.300699      2.46            11.6        3.6 493.3073
## 3  60.88626  7.090103      1.70             7.3        5.6 448.2941
## 4  67.17011 11.367250      1.52             8.4        5.7 439.0412
## 5  51.98926  5.223334      1.40             8.7       10.6 478.3530
## 6  88.51750 32.913881      1.47            10.5        2.7 498.1745

stargazer(datviz, type = "text")

## 
## ======================================================================
## Statistic        N   Mean   St. Dev.   Min   Pctl(25) Pctl(75)   Max  
## ----------------------------------------------------------------------
## Pop18to54       376 53.589   11.150  18.231   46.512   58.552  98.078 
## PctBlack        376 42.995   34.350   0.000   10.394   80.430  98.516 
## AvgHHSize       375  2.539   0.470    1.350   2.225    2.875    3.950 
## PctUnwellAdults 376 14.815   4.708    4.500   11.575   18.025  27.200 
## CancerRate      376  5.902   1.810    1.000   4.700    6.700   18.300 
## AvgMEP          376 297.769  84.261  145.438 232.310  357.976  499.603
## ----------------------------------------------------------------------

T-Tests

t.test (AvgMEP ~ MajFemale, data = dat2, paired = FALSE)#AvgMEP & Majority Female

## 
##  Welch Two Sample t-test
## 
## data:  AvgMEP by MajFemale
## t = 3.0567, df = 164.41, p-value = 0.002612
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  10.79203 50.17157
## sample estimates:
## mean in group 0 mean in group 1 
##        320.3062        289.8244

t.test (AvgMEP ~ BigHH, data = dat2, paired = FALSE)#AvgMEP & HH Size

## 
##  Welch Two Sample t-test
## 
## data:  AvgMEP by BigHH
## t = 4.9632, df = 330.97, p-value = 0.000001112
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  25.15968 58.19892
## sample estimates:
## mean in group 0 mean in group 1 
##        318.3871        276.7078

t.test (AvgMEP ~ HighIncome, data = dat2, paired = FALSE)#AvgMEP & HHI

## 
##  Welch Two Sample t-test
## 
## data:  AvgMEP by HighIncome
## t = 1.6802, df = 275.27, p-value = 0.09405
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.632101 33.300514
## sample estimates:
## mean in group 0 mean in group 1 
##        304.5390        289.2048

Cor Tests

cor.test(dat2$AvgMEP, dat2$MajFemale)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$MajFemale
## t = -3.1149, df = 374, p-value = 0.001982
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2560348 -0.0588262
## sample estimates:
##        cor 
## -0.1590163

cor.test(dat2$AvgMEP, dat2$BigHH)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$BigHH
## t = -4.9431, df = 374, p-value = 0.000001163
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3402535 -0.1502664
## sample estimates:
##        cor 
## -0.2476391

cor.test(dat2$AvgMEP, dat2$HighIncome)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$HighIncome
## t = -1.7572, df = 374, p-value = 0.07971
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.18988627  0.01074658
## sample estimates:
##        cor 
## -0.0904879

CrossTable(dat2$AvgMEP, dat2$MajFemale, fisher = FALSE, chisq = TRUE, 
            expected = TRUE, sresid = FALSE, format="SPSS")#needs to be separated by quartiles. Do we split AvgMEP or other variable?

Correlation tests

Total Pop: small p-value, negatively correlated Pct Fem: 0.06 p-value, negatively correlated Pct Male: 0.062 p-value, positively correlated Pop Under 17: small p-value, negatively correlated Pop 18-54: small p-value, positively correlated Pop 55+: small p-value, negatively correlated Pct White: 0.049 p-value, positively correlated Pct Black: 0.006 p-value, negatively correlated Pct AAPI: 0.012 p-value, positively correlated Majority WHite: 0.5821, minor correlation - ACCEPT NULL Avg HH Size: small p-value, negatively correlated Unemployment Rate: 0.045 p-value, negatively correlated Median HH Income: 0.084 p-value, positively correlated Pct Non Citizen: 0.79 p-value, minor correlation - ACCEPT NULL Pct Single Parent: data issue Pct Unwell Adults: small p-value, negatively correlated Asthma Rate: 0.044 p-value, negatively correlated Pct Routine Dr Visit: small p-value, negatively correlated Cancer Rate: small p-value, negatively correlated Pct Uninsured: 0.88 p-value, minor correlation - ACCEPT NULL Pct Dentist Visits: 0.42 p-value, minor correlation - ACCEPT NULL Pct Mentally Unwell: 0.88 p-value, minor correlation - ACCEPT NULL

cor.test(dat2$AvgMEP, dat2$TotalPop)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$TotalPop
## t = -2.9414, df = 374, p-value = 0.003471
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.24773626 -0.04999172
## sample estimates:
##        cor 
## -0.1503676

cor.test(dat2$AvgMEP, dat2$PctFem)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$PctFem
## t = -1.8703, df = 374, p-value = 0.06223
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.195493539  0.004923445
## sample estimates:
##         cor 
## -0.09626062

cor.test(dat2$AvgMEP, dat2$PctMale)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$PctMale
## t = 1.8703, df = 374, p-value = 0.06223
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.004923445  0.195493539
## sample estimates:
##        cor 
## 0.09626062

cor.test(dat2$AvgMEP, dat2$PopUnd17)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$PopUnd17
## t = -6.1361, df = 374, p-value = 0.000000002156
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3915923 -0.2076490
## sample estimates:
##        cor 
## -0.3024338

cor.test(dat2$AvgMEP, dat2$Pop18to54)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$Pop18to54
## t = 11.519, df = 374, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4330079 0.5827118
## sample estimates:
##       cor 
## 0.5117338

cor.test(dat2$AvgMEP, dat2$PopOver55)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$PopOver55
## t = -6.8634, df = 374, p-value = 0.00000000002805
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4213428 -0.2414915
## sample estimates:
##       cor 
## -0.334459

cor.test(dat2$AvgMEP, dat2$PctWhite)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$PctWhite
## t = 1.9785, df = 374, p-value = 0.0486
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.000646634 0.200844897
## sample estimates:
##       cor 
## 0.1017761

cor.test(dat2$AvgMEP, dat2$PctBlack)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$PctBlack
## t = -2.7435, df = 374, p-value = 0.006372
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.23820873 -0.03988687
## sample estimates:
##        cor 
## -0.1404564

cor.test(dat2$AvgMEP, dat2$PctAAPI)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$PctAAPI
## t = 2.5167, df = 374, p-value = 0.01226
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.02828103 0.22721866
## sample estimates:
##       cor 
## 0.1290481

cor.test(dat2$AvgMEP, dat2$MajorityWhite)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$MajorityWhite
## t = 0.55078, df = 374, p-value = 0.5821
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.07287755  0.12923246
## sample estimates:
##        cor 
## 0.02846841

cor.test(dat2$AvgMEP, dat2$AvgHHSize)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$AvgHHSize
## t = -7.2594, df = 373, p-value = 0.000000000002278
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4375267 -0.2598327
## sample estimates:
##        cor 
## -0.3518455

cor.test(dat2$AvgMEP, dat2$UnemploymentRate)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$UnemploymentRate
## t = -2.0137, df = 374, p-value = 0.04476
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.202578925 -0.002454204
## sample estimates:
##        cor 
## -0.1035646

cor.test(dat2$AvgMEP, dat2$MedHHInc)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$MedHHInc
## t = 1.7307, df = 372, p-value = 0.08433
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.01214194  0.18906755
## sample estimates:
##       cor 
## 0.0893746

cor.test(dat2$AvgMEP, dat2$PctNonCitizen)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$PctNonCitizen
## t = 0.26569, df = 374, p-value = 0.7906
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.08752041  0.11471407
## sample estimates:
##        cor 
## 0.01373732

cor.test(dat2$AvgMEP, dat2$PctSingleParent)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$PctSingleParent
## t = -0.31531, df = 364, p-value = 0.7527
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.11883329  0.08613148
## sample estimates:
##        cor 
## -0.0165245

cor.test(dat2$AvgMEP, dat2$PctUnwellAdults)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$PctUnwellAdults
## t = -3.6245, df = 374, p-value = 0.0003296
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.28012904 -0.08465283
## sample estimates:
##        cor 
## -0.1842118

cor.test(dat2$AvgMEP, dat2$AsthmaRate)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$AsthmaRate
## t = -2.0246, df = 374, p-value = 0.04362
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.203119198 -0.003017657
## sample estimates:
##       cor 
## -0.104122

cor.test(dat2$AvgMEP, dat2$PctRoutineDrVisit)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$PctRoutineDrVisit
## t = -6.3347, df = 374, p-value = 0.0000000006832
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3998347 -0.2169812
## sample estimates:
##        cor 
## -0.3112863

cor.test(dat2$AvgMEP, dat2$CancerRate)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$CancerRate
## t = -8.8248, df = 374, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4954731 -0.3277650
## sample estimates:
##        cor 
## -0.4151397

cor.test(dat2$AvgMEP, dat2$PctUninsured)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$PctUninsured
## t = -0.15328, df = 374, p-value = 0.8783
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.10897466  0.09328503
## sample estimates:
##          cor 
## -0.007925876

cor.test(dat2$AvgMEP, dat2$PctDentistVisit)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$PctDentistVisit
## t = 0.80416, df = 374, p-value = 0.4218
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.05984116  0.14208553
## sample estimates:
##        cor 
## 0.04154642

cor.test(dat2$AvgMEP, dat2$PctMentallyUnwell)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$PctMentallyUnwell
## t = -0.13853, df = 374, p-value = 0.8899
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.10822078  0.09404121
## sample estimates:
##         cor 
## -0.00716305

Plots!

boxplot((AvgMEP) ~ MajFemale, data= dat2)

boxplot((AvgMEP) ~ AvgHHSize, data= dat2)

boxplot((AvgMEP) ~ MedHHInc, data= dat2)

boxplot((Log_AvgMEP) ~ Log_MedHHInc, data= dat2)

More plots!!

ggplot(data=dat2, aes(AvgMEP)) + 
  geom_histogram() +
  labs(title="Average MEP Distribution", x="Value of MEP", y="Number of Tracts")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data=dat2, aes(Log_AvgMEP)) + 
  geom_histogram() +
  labs(title="Log Average MEP Distribution", x="Log Value of MEP", y="Number of Tracts")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data=dat2, aes(Log_MedHHInc, Log_AvgMEP)) + geom_point()+ 
   ggtitle("Log Median Income Against Log Avg MEP")

## Warning: Removed 2 rows containing missing values (geom_point).

ggplot(data=dat2, aes(MedHHInc, AvgMEP)) + geom_point()+ 
   ggtitle("Median Income Against Avg MEP")

## Warning: Removed 2 rows containing missing values (geom_point).

ggplot(data=dat2, aes(AvgHHSize, AvgMEP)) + geom_point()+ 
   ggtitle("Avg Household Size Against Avg MEP")

## Warning: Removed 1 rows containing missing values (geom_point).

ggplot(data=dat2, aes(Log_AvgHHSize, Log_AvgMEP)) + geom_point()+ 
   ggtitle("Log Avg Household Size Against Log Avg MEP")

## Warning: Removed 1 rows containing missing values (geom_point).

ggplot(data=dat2, aes(Log_TotalPop, Log_AvgMEP)) + geom_point()+ 
   ggtitle("Log Total Pop Against Log Avg MEP")

ggplot(data=dat2, aes(Log_Pop18to54, Log_AvgMEP)) + geom_point()+ 
   ggtitle("Log Population 18-54 Against Log Avg MEP")

ggplot(data=dat2, aes(Log_PctUnwellAdults, Log_AvgMEP)) + geom_point()+ 
   ggtitle("Log Unwell Adults Against Log Avg MEP")

ggplot(data=dat2, aes(Log_CancerRate, Log_AvgMEP)) + geom_point()+ 
   ggtitle("Log Cancer Rate Against Log Avg MEP")

ggplot(data=dat2, aes(PctBlack, AvgMEP)) + geom_point()+ 
   ggtitle("Log Cancer Rate Against Log Avg MEP")

ggplot(data=dat2, aes(Log_PctBlack, Log_AvgMEP)) + geom_point()+ 
   ggtitle("Log Cancer Rate Against Log Avg MEP")

## Warning: Removed 1 rows containing missing values (geom_point).

Total Pop: small p-value, negatively correlated Pop Under 17: small p-value, negatively correlated Pop 18-54: small p-value, positively correlated Pop 55+: small p-value, negatively correlated Pct White: 0.049 p-value, positively correlated Pct Black: 0.006 p-value, negatively correlated Pct AAPI: 0.012 p-value, positively correlated Avg HH Size: small p-value, negatively correlated Unemployment Rate: 0.045 p-value, negatively correlated Pct Unwell Adults: small p-value, negatively correlated Asthma Rate: 0.044 p-value, negatively correlated Pct Routine Dr Visit: small p-value, negatively correlated Cancer Rate: small p-value, negatively correlated

cor.test(dat2$AvgMEP, dat2$TotalPop)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$TotalPop
## t = -2.9414, df = 374, p-value = 0.003471
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.24773626 -0.04999172
## sample estimates:
##        cor 
## -0.1503676

cor.test(dat2$AvgMEP, dat2$PopUnd17)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$PopUnd17
## t = -6.1361, df = 374, p-value = 0.000000002156
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3915923 -0.2076490
## sample estimates:
##        cor 
## -0.3024338

cor.test(dat2$AvgMEP, dat2$Pop18to54)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$Pop18to54
## t = 11.519, df = 374, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4330079 0.5827118
## sample estimates:
##       cor 
## 0.5117338

cor.test(dat2$AvgMEP, dat2$PopOver55)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$PopOver55
## t = -6.8634, df = 374, p-value = 0.00000000002805
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4213428 -0.2414915
## sample estimates:
##       cor 
## -0.334459

cor.test(dat2$AvgMEP, dat2$PctWhite)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$PctWhite
## t = 1.9785, df = 374, p-value = 0.0486
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.000646634 0.200844897
## sample estimates:
##       cor 
## 0.1017761

cor.test(dat2$AvgMEP, dat2$PctBlack)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$PctBlack
## t = -2.7435, df = 374, p-value = 0.006372
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.23820873 -0.03988687
## sample estimates:
##        cor 
## -0.1404564

cor.test(dat2$AvgMEP, dat2$PctAAPI)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$PctAAPI
## t = 2.5167, df = 374, p-value = 0.01226
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.02828103 0.22721866
## sample estimates:
##       cor 
## 0.1290481

cor.test(dat2$AvgMEP, dat2$AvgHHSize)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$AvgHHSize
## t = -7.2594, df = 373, p-value = 0.000000000002278
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4375267 -0.2598327
## sample estimates:
##        cor 
## -0.3518455

cor.test(dat2$AvgMEP, dat2$UnemploymentRate)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$UnemploymentRate
## t = -2.0137, df = 374, p-value = 0.04476
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.202578925 -0.002454204
## sample estimates:
##        cor 
## -0.1035646

cor.test(dat2$AvgMEP, dat2$PctUnwellAdults)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$PctUnwellAdults
## t = -3.6245, df = 374, p-value = 0.0003296
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.28012904 -0.08465283
## sample estimates:
##        cor 
## -0.1842118

cor.test(dat2$AvgMEP, dat2$AsthmaRate)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$AsthmaRate
## t = -2.0246, df = 374, p-value = 0.04362
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.203119198 -0.003017657
## sample estimates:
##       cor 
## -0.104122

cor.test(dat2$AvgMEP, dat2$PctRoutineDrVisit)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$PctRoutineDrVisit
## t = -6.3347, df = 374, p-value = 0.0000000006832
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3998347 -0.2169812
## sample estimates:
##        cor 
## -0.3112863

cor.test(dat2$AvgMEP, dat2$CancerRate)

## 
##  Pearson's product-moment correlation
## 
## data:  dat2$AvgMEP and dat2$CancerRate
## t = -8.8248, df = 374, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4954731 -0.3277650
## sample estimates:
##        cor 
## -0.4151397

mod <- lm(Log_AvgMEP ~ Log_AvgHHSize, data = dat2)
summary (mod)

## 
## Call:
## lm(formula = Log_AvgMEP ~ Log_AvgHHSize, data = dat2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.65162 -0.18810  0.01651  0.22363  0.53666 
## 
## Coefficients:
##               Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)    6.08632    0.06575  92.566 < 0.0000000000000002 ***
## Log_AvgHHSize -0.46863    0.07038  -6.659      0.0000000000991 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2651 on 373 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1062, Adjusted R-squared:  0.1038 
## F-statistic: 44.34 on 1 and 373 DF,  p-value: 0.00000000009909

par(mfrow=c(1,1))           
plot (dat2$Log_AvgHHSize, dat2$Log_AvgMEP )
abline (mod, col="red")

mod1 <- lm(Log_AvgMEP ~ Log_AvgHHSize + Log_PctBlack, data = dat2)
summary (mod1)

## 
## Call:
## lm(formula = Log_AvgMEP ~ Log_AvgHHSize + Log_PctBlack, data = dat2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.65706 -0.18373  0.02349  0.22347  0.53368 
## 
## Coefficients:
##                Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)    6.085266   0.069546  87.500 < 0.0000000000000002 ***
## Log_AvgHHSize -0.461901   0.071563  -6.454        0.00000000034 ***
## Log_PctBlack  -0.001388   0.010093  -0.137                0.891    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2655 on 371 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.1039, Adjusted R-squared:  0.09903 
## F-statistic:  21.5 on 2 and 371 DF,  p-value: 0.000000001464

mod2 <- lm(Log_AvgMEP ~ Log_UnemploymentRate, data = dat2)
summary (mod2)

## 
## Call:
## lm(formula = Log_AvgMEP ~ Log_UnemploymentRate, data = dat2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.68590 -0.21165 -0.01259  0.24020  0.55371 
## 
## Coefficients:
##                      Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)           5.76218    0.03999 144.102 < 0.0000000000000002 ***
## Log_UnemploymentRate -0.05167    0.01849  -2.794              0.00548 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2769 on 371 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.0206, Adjusted R-squared:  0.01796 
## F-statistic: 7.805 on 1 and 371 DF,  p-value: 0.005482

par(mfrow=c(1,1))           
plot (dat2$Log_UnemploymentRate, dat2$Log_AvgMEP )
abline (mod2, col="red")

mod3 <- lm(Log_AvgMEP ~ Log_AvgHHSize + Log_Pop18to54, data = dat2)
summary (mod3)

## 
## Call:
## lm(formula = Log_AvgMEP ~ Log_AvgHHSize + Log_Pop18to54, data = dat2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.62106 -0.16890 -0.00087  0.18742  0.66977 
## 
## Coefficients:
##               Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)    3.33009    0.27794  11.981 < 0.0000000000000002 ***
## Log_AvgHHSize -0.27654    0.06519  -4.242             0.000028 ***
## Log_Pop18to54  0.65174    0.06426  10.142 < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.235 on 372 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2998, Adjusted R-squared:  0.2961 
## F-statistic: 79.65 on 2 and 372 DF,  p-value: < 0.00000000000000022

mod4 <- lm(Log_AvgMEP ~ Log_AvgHHSize + Log_Pop18to54 + Log_PctRoutineDrVisit, data = dat2)
summary (mod4)

## 
## Call:
## lm(formula = Log_AvgMEP ~ Log_AvgHHSize + Log_Pop18to54 + Log_PctRoutineDrVisit, 
##     data = dat2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6193 -0.1676 -0.0026  0.1879  0.6721 
## 
## Coefficients:
##                       Estimate Std. Error t value           Pr(>|t|)    
## (Intercept)            3.15748    1.61010   1.961             0.0506 .  
## Log_AvgHHSize         -0.27507    0.06666  -4.127 0.0000454684977027 ***
## Log_Pop18to54          0.65761    0.08397   7.831 0.0000000000000512 ***
## Log_PctRoutineDrVisit  0.03388    0.31128   0.109             0.9134    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2353 on 371 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2999, Adjusted R-squared:  0.2942 
## F-statistic: 52.97 on 3 and 371 DF,  p-value: < 0.00000000000000022

mod5 <- lm(Log_AvgMEP ~ Log_PctRoutineDrVisit, data = dat2)
summary (mod5)

## 
## Call:
## lm(formula = Log_AvgMEP ~ Log_PctRoutineDrVisit, data = dat2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.74360 -0.17974 -0.01809  0.21574  0.56659 
## 
## Coefficients:
##                       Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)            12.3896     1.1887  10.423 < 0.0000000000000002 ***
## Log_PctRoutineDrVisit  -1.5412     0.2721  -5.664         0.0000000295 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2694 on 374 degrees of freedom
## Multiple R-squared:  0.07901,    Adjusted R-squared:  0.07655 
## F-statistic: 32.08 on 1 and 374 DF,  p-value: 0.00000002948

younger people more likely to desire living in accessible places, also core of working age population so have more of a need to be mobile
more speculative discussion about poverty rates/wealth among seniors - old people move to less accessible locations in order to save money
less accessible places may be near the TRI facilities (toxic release inventory) that contribute to higher levels of adverse health impacts
in philly a lot of the best medical facilities are near transit (Penn Medicine)

mod6 <- lm(Log_AvgMEP ~ Log_AvgHHSize + Log_Pop18to54 + Log_CancerRate, data = dat2)
summary (mod6)

## 
## Call:
## lm(formula = Log_AvgMEP ~ Log_AvgHHSize + Log_Pop18to54 + Log_CancerRate, 
##     data = dat2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.60560 -0.16157 -0.02097  0.18568  0.60398 
## 
## Coefficients:
##                Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)     5.43758    0.50214  10.829 < 0.0000000000000002 ***
## Log_AvgHHSize  -0.41499    0.06906  -6.009        0.00000000446 ***
## Log_Pop18to54   0.27475    0.09811   2.800              0.00537 ** 
## Log_CancerRate -0.28242    0.05678  -4.974        0.00000100566 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2278 on 371 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.3436, Adjusted R-squared:  0.3383 
## F-statistic: 64.74 on 3 and 371 DF,  p-value: < 0.00000000000000022

mod7 <- lm(Log_AvgMEP ~ Log_AvgHHSize + Log_Pop18to54 + Log_CancerRate + Log_PctUnwellAdults, data = dat2)
summary (mod7)

## 
## Call:
## lm(formula = Log_AvgMEP ~ Log_AvgHHSize + Log_Pop18to54 + Log_CancerRate + 
##     Log_PctUnwellAdults, data = dat2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.60568 -0.15872 -0.02106  0.15081  0.74977 
## 
## Coefficients:
##                     Estimate Std. Error t value          Pr(>|t|)    
## (Intercept)          4.03273    0.58652   6.876 0.000000000026353 ***
## Log_AvgHHSize       -0.54635    0.07385  -7.398 0.000000000000934 ***
## Log_Pop18to54        0.49969    0.10879   4.593 0.000005989620398 ***
## Log_CancerRate      -0.22527    0.05697  -3.954 0.000092125428930 ***
## Log_PctUnwellAdults  0.20293    0.04648   4.366 0.000016477385806 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2225 on 370 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.3758, Adjusted R-squared:  0.369 
## F-statistic: 55.68 on 4 and 370 DF,  p-value: < 0.00000000000000022

The Variance Inflation Factor (VIF) detects multicollinearity in regression analysis. The VIF for these variables is relatively small (with anything less than 10 considered appropriate), meaning there are no problems of collinearity.

mod8 <- lm(Log_AvgMEP ~ Log_AvgHHSize + Log_Pop18to54 + Log_CancerRate + Log_PctUnwellAdults + Log_PctBlack, data = dat2)
summary (mod8)

## 
## Call:
## lm(formula = Log_AvgMEP ~ Log_AvgHHSize + Log_Pop18to54 + Log_CancerRate + 
##     Log_PctUnwellAdults + Log_PctBlack, data = dat2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.70344 -0.14362 -0.00223  0.15156  0.71918 
## 
## Coefficients:
##                     Estimate Std. Error t value           Pr(>|t|)    
## (Intercept)          4.14022    0.58164   7.118 0.0000000000057677 ***
## Log_AvgHHSize       -0.59840    0.07564  -7.912 0.0000000000000301 ***
## Log_Pop18to54        0.47692    0.10794   4.419 0.0000130857529105 ***
## Log_CancerRate      -0.26638    0.05807  -4.587 0.0000061710619077 ***
## Log_PctUnwellAdults  0.27889    0.05242   5.320 0.0000001804957684 ***
## Log_PctBlack        -0.03109    0.01006  -3.090            0.00215 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2202 on 368 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.3885, Adjusted R-squared:  0.3802 
## F-statistic: 46.76 on 5 and 368 DF,  p-value: < 0.00000000000000022

vif(mod8)

##       Log_AvgHHSize       Log_Pop18to54      Log_CancerRate Log_PctUnwellAdults 
##            1.664222            3.502726            2.777506            2.582808 
##        Log_PctBlack 
##            1.480532

stargazer(mod6, mod7, mod8, type = "text")

## 
## ===========================================================================================
##                                               Dependent variable:                          
##                     -----------------------------------------------------------------------
##                                                   Log_AvgMEP                               
##                               (1)                     (2)                     (3)          
## -------------------------------------------------------------------------------------------
## Log_AvgHHSize              -0.415***               -0.546***               -0.598***       
##                             (0.069)                 (0.074)                 (0.076)        
##                                                                                            
## Log_Pop18to54              0.275***                0.500***                0.477***        
##                             (0.098)                 (0.109)                 (0.108)        
##                                                                                            
## Log_CancerRate             -0.282***               -0.225***               -0.266***       
##                             (0.057)                 (0.057)                 (0.058)        
##                                                                                            
## Log_PctUnwellAdults                                0.203***                0.279***        
##                                                     (0.046)                 (0.052)        
##                                                                                            
## Log_PctBlack                                                               -0.031***       
##                                                                             (0.010)        
##                                                                                            
## Constant                   5.438***                4.033***                4.140***        
##                             (0.502)                 (0.587)                 (0.582)        
##                                                                                            
## -------------------------------------------------------------------------------------------
## Observations                  375                     375                     374          
## R2                           0.344                   0.376                   0.389         
## Adjusted R2                  0.338                   0.369                   0.380         
## Residual Std. Error    0.228 (df = 371)        0.222 (df = 370)        0.220 (df = 368)    
## F Statistic         64.738*** (df = 3; 371) 55.681*** (df = 4; 370) 46.763*** (df = 5; 368)
## ===========================================================================================
## Note:                                                           *p<0.1; **p<0.05; ***p<0.01

##Correlation between final variables and MEP

par(mfrow=c(2, 3))           
plot(dat2$Log_AvgHHSize, dat2$Log_AvgMEP, main=paste("Corr=", cor(dat2$Log_AvgHHSize, dat2$Log_AvgMEP, use="complete.obs", method="pearson")))
plot(dat2$Log_Pop18to54, dat2$Log_AvgMEP, main=paste("Corr=", cor(dat2$Log_Pop18to54, dat2$Log_AvgMEP, use="complete.obs", method="pearson")))
plot(dat2$Log_CancerRate, dat2$Log_AvgMEP, main=paste("Corr=", cor(dat2$Log_CancerRate, dat2$Log_AvgMEP, use="complete.obs", method="pearson")))
plot(dat2$Log_PctUnwellAdults, dat2$Log_AvgMEP, main=paste("Corr=", cor(dat2$Log_PctUnwellAdults, dat2$Log_AvgMEP, use="complete.obs", method="pearson")))
plot(dat2$Log_PctBlack, dat2$Log_AvgMEP, main=paste("Corr=", cor(dat2$Log_PctBlack, dat2$Log_AvgMEP, use="complete.obs", method="pearson")))

mod9 <- lm(Log_AvgMEP ~ Log_MedHHInc, data=dat2)
summary (mod9)

## 
## Call:
## lm(formula = Log_AvgMEP ~ Log_MedHHInc, data = dat2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.67988 -0.20508 -0.04325  0.19902  0.58192 
## 
## Coefficients:
##              Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)   6.09748    0.30435  20.034 <0.0000000000000002 ***
## Log_MedHHInc -0.04104    0.02846  -1.442                0.15    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2793 on 372 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.005558,   Adjusted R-squared:  0.002885 
## F-statistic: 2.079 on 1 and 372 DF,  p-value: 0.1502

par(mfrow=c(1,1))           
plot (dat2$Log_MedHHInc, dat2$Log_AvgMEP )
abline (mod9, col="red")

505 Final

Madeline Csere and Jonah Garnick

4/20/2021

R Markdown

Vars of Interest