title: “AdvancedR_JEDSI_2023” author: “Ivy Ortiz” date: “2023-08-10” output: html_document: toc: yes toc_float: true code_folding: hide —
LCover <- read.csv("WPF_County_LCover.csv")
#View(LCover)
FAccess <- read.csv("GR_FoodAccess2.csv")
#View(FAccess)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(GGally)
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(ggfortify)
library(corrplot)
## corrplot 0.92 loaded
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggthemes)
library(ggdist)
library(ggplot2)
#LCover$ReLCover[((LCover$Landcover == 'Woody Wetlands')|(LCover$Landcover=='Evergreen Forest')|(LCover$Landcover=='Deciduous Forest')|(LCover$Landcover=='Mixed Forest'))] <- "Forest"
#LCover$ReLCover[((LCover$Landcover == 'Developed, Medium Intensity')|(LCover$Landcover=='Developed, High Intensity'))] <- "HDevelop"
#LCover$ReLCover[((LCover$Landcover == 'Hay/Pasture')|(LCover$Landcover=='Developed, Open Space')|(LCover$Landcover=='Developed, Low Intensity')|(LCover$Landcover=='Cultivated Crops'))] <- "LDevelop"
#LCover$ReLCover[((LCover$Landcover == 'Barren Land')|(LCover$Landcover== 'Emergent Herbaceous Wetlands')|(LCover$Landcover=='Herbaceous')|(LCover$Landcover=='Open Water')|(LCover$Landcover=='Shrub/Scrub'))] <- "Conserve"
#write.csv(LCover,'DRWS_LCover_Recode.csv',row.names=FALSE)
ReLC <- read.csv("DRWS_LCover_Recode.csv")
arNum <- as.numeric(ReLC$Area)
## Warning: NAs introduced by coercion
aggLCA <- aggregate(arNum~ReLC$COUNTY+ReLC$STATE+ReLC$ReLCover, FUN=sum)
#View(aggLCA)
ReLC2 <- aggLCA
st <- ReLC2$`ReLC$STATE`
ct <- ReLC2$`ReLC$COUNTY`
ReLC2$Location <- paste(ReLC2$`ReLC$COUNTY`,ReLC2$`ReLC$STATE`,sep="_")
#View(ReLC2)
aggReLC2 <- aggregate(ReLC2$arNum~ReLC2$`ReLC$COUNTY`+ReLC2$`ReLC$STATE`,FUN=sum)
aggReLC2$Location <- paste(aggReLC2$`ReLC2$\`ReLC$COUNTY\``,aggReLC2$`ReLC2$\`ReLC$STATE\``,sep="_")
#View(aggReLC2)
dfx <- ReLC2 %>% inner_join( aggReLC2,
by=c('Location'='Location'))
#View(dfx)
dfx$paLC <- (dfx$arNum/dfx$`ReLC2$arNum`)*100
dfx$cpaLC[dfx$paLC<=30] <- "low"
dfx$cpaLC[30>dfx$paLC&dfx$palLC<50] <- "medium"
dfx$cpaLC[dfx$paLC>=50] <- "high"
PAsian <- (FAccess$Asian/FAccess$TotPop)*100
PBIPOC <- ((FAccess$TotPop-FAccess$White)/FAccess$TotPop)*100
PBlack <- (FAccess$Black/FAccess$TotPop)*100
PHawaiianPI <- (FAccess$HawaiianPI/FAccess$TotPop)*100
PHispanic <- (FAccess$Hispanic/FAccess$TotPop)*100
PNativeAmerican <- (FAccess$NativeAmerican/FAccess$TotPop)*100
POtherRace <- (FAccess$OtherRace/FAccess$TotPop)*100
PWhite <- (FAccess$White/FAccess$TotPop)*100
Incomeadj <- FAccess$MedianHHIncome^(1/2)
PSNAP <- FAccess$HHSNAP/FAccess$TotalHouseholds
PGED <- FAccess$GED/FAccess$X25pTotal
PBachelor <- FAccess$Bachelor/FAccess$X25pTotal
FAccessBind <- cbind(FAccess,PAsian,PBIPOC,PBlack,PHawaiianPI,PHispanic,PNativeAmerican,POtherRace,PWhite,Incomeadj,PSNAP,PGED,PBachelor)
FAccessBind$CBlack[FAccessBind$PBlack<=30] <- "low"
FAccessBind$CBlack[FAccessBind$PBlack>30] <- "high"
FAccessBind$CHispanic[FAccessBind$PHispanic<=30] <- "low"
FAccessBind$CHispanic[FAccessBind$PHispanic>30] <- "high"
FAccessBind$CBIPOC[FAccessBind$PBIPOC<=40] <- "low"
FAccessBind$CBIPOC[FAccessBind$PBIPOC>40] <- "high"
#(FAccessBind)
OutletsZip <- read.csv("GR_FoodAccess_OutletsZip.csv")
dfy <- FAccessBind %>% inner_join( OutletsZip,
by=c('GEOID'='GEOID'))
#View(dfy)
GRdf0 <- dfy[complete.cases(dfy),]
GRdf1 <- GRdf0[-c(1:19,32:40,42:56,58:68,70:72,74:83)]
#View(GRdf0)
CorGRdf1 <- cor(GRdf1)
corrplot(CorGRdf1, is.corr = FALSE, method = "square")
fit1 <- lm(dfy$Coffee ~ dfy$PBIPOC + dfy$PBachelor + dfy$Incomeadj)
summary(fit1)
##
## Call:
## lm(formula = dfy$Coffee ~ dfy$PBIPOC + dfy$PBachelor + dfy$Incomeadj)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8757 -0.7816 -0.2667 0.4414 5.1822
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.852394 1.053370 2.708 0.008004 **
## dfy$PBIPOC -0.016084 0.008939 -1.799 0.075077 .
## dfy$PBachelor 6.187661 1.547019 4.000 0.000124 ***
## dfy$Incomeadj -0.012714 0.003979 -3.195 0.001887 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.293 on 97 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.1939, Adjusted R-squared: 0.169
## F-statistic: 7.779 on 3 and 97 DF, p-value: 0.0001044
par(mfrow=c(2,2))
plot(fit1)
x1 <- dfy[c(21,22,24,27,28,29,30,31)]
ggpairs(x1)
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Removed 1 rows containing missing values (`geom_point()`).
## Removed 1 rows containing missing values (`geom_point()`).
## Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Removed 1 rows containing missing values (`geom_point()`).
## Removed 1 rows containing missing values (`geom_point()`).
OutletType <- read.csv("Outlets_Type.csv")
#View(OutletType)
gg1 <- ggplot(OutletType,
aes(x = OutletCode,
y = FREQUENCY,
fill = FoodOutletType))+
geom_col(stat = "identity")+
scale_fill_viridis_d(name = "Group", option = "turbo")+
labs(title = "Outlet", y = "Count", x = "Group")
## Warning in geom_col(stat = "identity"): Ignoring unknown parameters: `stat`
gg1 + theme_dark()
cor.test(dfy$PBachelor, dfy$Pharmacy, use = "complete obs", method = "pearson")
##
## Pearson's product-moment correlation
##
## data: dfy$PBachelor and dfy$Pharmacy
## t = 3.4631, df = 100, p-value = 0.0007874
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1417919 0.4905021
## sample estimates:
## cor
## 0.3272431
gg2 <- ggplot(dfy,
aes(x = PBachelor,
y = Pharmacy,
color = TotPop))+
geom_point()+
geom_smooth()+
labs(title = "Pharmacy ~ PBachelor %",
x = "PBachelor %",
y = "Pharmacy")
gg2 + theme_dark()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
ttest1 <- t.test(Coffee~CBlack, data=dfy, paired=F)
ttest1
##
## Welch Two Sample t-test
##
## data: Coffee by CBlack
## t = -2.5654, df = 64.324, p-value = 0.01265
## alternative hypothesis: true difference in means between group high and group low is not equal to 0
## 95 percent confidence interval:
## -0.9268659 -0.1153418
## sample estimates:
## mean in group high mean in group low
## 0.2857143 0.8068182
gg3 <- ggplot(dfy,
aes(x = CBlack,
y = Coffee,
fill = CBlack))+
geom_col(stat = "identity")+
scale_fill_viridis_d(name = "Group", option = "turbo")+
labs(title = "Coffee in Tracts with High and Low Pct Black Residents", y = "Count", x = "Group")
## Warning in geom_col(stat = "identity"): Ignoring unknown parameters: `stat`
gg3+theme_dark()
ttest2 <- t.test(Pharmacy~CHispanic, data=dfy, paired=F)
ttest2
##
## Welch Two Sample t-test
##
## data: Pharmacy by CHispanic
## t = -5.6145, df = 90, p-value = 2.165e-07
## alternative hypothesis: true difference in means between group high and group low is not equal to 0
## 95 percent confidence interval:
## -0.7885049 -0.3763303
## sample estimates:
## mean in group high mean in group low
## 0.0000000 0.5824176
gg4 <- ggplot(dfy,
aes(x = CHispanic,
y = Pharmacy,
fill = CHispanic))+
geom_col(stat = "identity")+
scale_fill_viridis_d(name = "Group", option = "turbo")+
labs(title = "Pharmacy in Tracts with High and Low Pct Hispanic Residents", y = "Count", x = "Group")
## Warning in geom_col(stat = "identity"): Ignoring unknown parameters: `stat`
gg4+theme_dark()
p1 <- dfy %>%
plot_ly(x = ~Shape_Area,
z = ~Incomeadj,
y = ~TotPop,
color = ~TotPop,
colors = c("red", "blue")) %>%
layout(title = 'Income by Area and Total Population',
scene = list(xaxis = list(title = 'Area'),
yaxis = list(title = 'Total Population'),
zaxis = list(title = 'Income Adj ^1/2')))
p1
## No trace type specified:
## Based on info supplied, a 'scatter3d' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
## Warning: Ignoring 1 observations
p2 <- dfy %>%
plot_ly(x = ~PBlack,
z = ~Incomeadj,
y = ~PBachelor,
color = ~TotPop,
colors = c("green", "blue")) %>%
layout(title = 'Income by Pct Black Residents and Pct Bachelor Degrees',
scene = list(xaxis = list(title = 'Pct Black'),
yaxis = list(title = 'Pct Bachor Degrees'),
zaxis = list(title = 'Income Adj ^1/2')))
p2
## No trace type specified:
## Based on info supplied, a 'scatter3d' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
## Warning: Ignoring 1 observations
p3 <- dfy %>%
plot_ly(x = ~PHispanic,
z = ~Incomeadj,
y = ~PBachelor,
color = ~TotPop,
colors = c("red", "blue")) %>%
layout(title = 'Income by Pct Hispanic Residents and Pct Bachelor Degrees',
scene = list(xaxis = list(title = 'Pct Hispanic'),
yaxis = list(title = 'Pct Bachor Degrees'),
zaxis = list(title = 'Income Adj ^1/2')))
p3
## No trace type specified:
## Based on info supplied, a 'scatter3d' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
## Warning: Ignoring 1 observations
x2 <- dfy[c(21,22,24,27,28,29,30,31,41,57,69,73)]
x3 <- x2[complete.cases(x2),]
pca_df <- prcomp(x3,scale. = TRUE)
autoplot(pca_df)
autoplot(pca_df, data = x3, label = TRUE)
biplot(pca_df)