df<-read.csv("heatmap2_df.csv")
Data to produce:
Get the percentage of zip codes per county that are in groups 1, 2, or 3. Then, get the percentage of counties per state that were in groups 1, 2, or 3. The groups are based on ROI (Profit Margin) groups: 0-33, 34-66, 67+; underperforming, average, above average respectively.
Next, list which whole counties to keep (if a high percentage of zip codes within a county). Lastly, list which counties to keep if the county does not perform well.
Denser counties which need to be plotted:
For now we’ll group the Profit Margin as:
df$Group<-NA
df$Profit.Margin<-as.numeric(df$Profit.Margin)
df[df$Profit.Margin>=0 & df$Profit.Margin<=33,25]<-1
df[df$Profit.Margin>=34 & df$Profit.Margin<=66,25]<-2
df[df$Profit.Margin>=67,25]<-3
df$Group<-as.factor(df$Group)
df$Cost<-gsub("-",0,df$Cost)
df$Cost<-as.numeric(gsub("[$,]","",df$Cost))
The % of counties per state that were in group 1, 2, 3.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
counties<-df%>%filter(Cost>=1000)%>%group_by(state,county,Group)%>%summarise(percentage =n()/nrow(df[df$Cost>=1000,])*100)
## Warning: Factor `Group` contains implicit NA, consider using
## `forcats::fct_explicit_na`
The % of zipcodes per county that were in group 1, 2, 3.
zip<-df%>%filter(Cost>=1000)%>%group_by(county,zip,Group)%>%summarise(percentage =n()/nrow(df[df$Cost>=1000,])*100)
## Warning: Factor `Group` contains implicit NA, consider using
## `forcats::fct_explicit_na`
Output in csv sorted by state and then county.
counties<-counties[order(counties$state),]
zip<-zip[order(zip$county),]
write.table(zip,"zip.csv",sep = ",",row.names = F)
write.table(counties,"counties.csv",sep = ",",row.names = F)
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
countiesmap<-map_data("county")
counties$county<-tolower(counties$county)
c1<-left_join( countiesmap, counties[counties$Group==1,c(2,4)], by = c('subregion' = 'county'))
c2<-left_join( countiesmap, counties[counties$Group==2,c(2,4)], by = c('subregion' = 'county'))
c3<-left_join( countiesmap, counties[counties$Group==3,c(2,4)], by = c('subregion' = 'county'))
library(zipcode)
df$zip<-clean.zipcodes(df$zip)
data("zipcode")
library(dplyr)
coord<-left_join(df,zipcode[,c(1,4:5)],by="zip")
California
ggplot(c1[c1$region=="california",], aes( x = long, y = lat, group = group )) +
geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("California Counties (Profit Margin: 0-33)")
ggplot(c2[c2$region=="california",], aes( x = long, y = lat, group = group )) +
geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("California Counties (Profit Margin: 34-66)")
ggplot(c3[c3$region=="california",], aes( x = long, y = lat, group = group )) +
geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("California Counties (Profit Margin: 67+)")
Zip codes
ggplot(countiesmap[countiesmap$region=="california",])+ geom_polygon(aes( x = long, y = lat, group = group, fill="white" ),color="black") + scale_fill_identity() +
ggtitle("California") + guides(fill=FALSE) +
geom_point(data = coord[coord$state=="CA" & coord$Cost>=1000,], aes(x = longitude, y = latitude, color = Group), size = 0.9)+
scale_color_hue( labels = c("0-33", "34-66", "75+"))
Los Angeles
library(leaflet)
## Warning: package 'leaflet' was built under R version 3.6.1
LA = coord %>% filter(county=="Los Angeles",state=="CA")
leaflet(LA, height=300, width=900) %>%
addTiles() %>% # Add default OpenStreetMap map tiles
addMarkers(lng=LA$longitude, lat=LA$latitude,
clusterOptions = markerClusterOptions())
Illinois
ggplot(c1[c1$region=="illinois",], aes( x = long, y = lat, group = group )) +
geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("Illinois Counties (Profit Margin: 0-33)")
ggplot(c2[c2$region=="illinois",], aes( x = long, y = lat, group = group )) +
geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("Illinois Counties (Profit Margin: 34-66)")
ggplot(c3[c3$region=="illinois",], aes( x = long, y = lat, group = group )) +
geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("Illinois Counties (Profit Margin: 67+)")
Zip codes
ggplot(countiesmap[countiesmap$region=="illinois",])+ geom_polygon(aes( x = long, y = lat, group = group, fill="white" ),color="black") + scale_fill_identity() +
ggtitle("Illinois") + guides(fill=FALSE) +
geom_point(data = coord[coord$state=="IL" & coord$Cost>=1000,], aes(x = longitude, y = latitude, color = Group), size = 0.9)+
scale_color_hue( labels = c("0-33", "34-66", "75+"))
Cook
Cook = coord %>% filter(county=="Cook",state=="IL")
leaflet(LA, height=300, width=900) %>%
addTiles() %>% # Add default OpenStreetMap map tiles
addMarkers(lng=Cook$longitude, lat=Cook$latitude,
clusterOptions = markerClusterOptions())
## Warning in validateCoords(lng, lat, funcName): Data contains 1 rows with
## either missing or invalid lat/lon values and will be ignored
Arizona
ggplot(c1[c1$region=="arizona",], aes( x = long, y = lat, group = group )) +
geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("Arizona Counties (Profit Margin: 0-33)")
ggplot(c2[c2$region=="arizona",], aes( x = long, y = lat, group = group )) +
geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("Arizona Counties (Profit Margin: 34-66)")
ggplot(c3[c3$region=="arizona",], aes( x = long, y = lat, group = group )) +
geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("Arizona Counties (Profit Margin: 67+)")
Zip codes
ggplot(countiesmap[countiesmap$region=="arizona",])+ geom_polygon(aes( x = long, y = lat, group = group, fill="white" ),color="black") + scale_fill_identity() +
ggtitle("Arizona") + guides(fill=FALSE) +
geom_point(data = coord[coord$state=="AZ" & coord$Cost>=1000,], aes(x = longitude, y = latitude, color = Group), size = 0.9)+
scale_color_hue( labels = c("0-33", "34-66", "75+"))
Maricopa
Maricopa = coord %>% filter(county=="Maricopa",state=="AZ")
leaflet(Maricopa, height=300, width=900) %>%
addTiles() %>% # Add default OpenStreetMap map tiles
addMarkers(lng=Maricopa$longitude, lat=Maricopa$latitude,
clusterOptions = markerClusterOptions())
Texas
ggplot(c1[c1$region=="texas",], aes( x = long, y = lat, group = group )) +
geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("Texas Counties (Profit Margin: 0-33)")
ggplot(c2[c2$region=="texas",], aes( x = long, y = lat, group = group )) +
geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("Texas Counties (Profit Margin: 34-66)")
ggplot(c3[c3$region=="texas",], aes( x = long, y = lat, group = group )) +
geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("Texas Counties (Profit Margin: 67+)")
Zip codes
ggplot(countiesmap[countiesmap$region=="texas",])+ geom_polygon(aes( x = long, y = lat, group = group, fill="white" ),color="black") + scale_fill_identity() +
ggtitle("Texas") + guides(fill=FALSE) +
geom_point(data = coord[coord$state=="TX" & coord$Cost>=1000,], aes(x = longitude, y = latitude, color = Group), size = 0.9)+
scale_color_hue( labels = c("0-33", "34-66", "75+"))
Harris
Harris = coord %>% filter(county=="Harris",state=="TX")
leaflet(Harris, height=300, width=900) %>%
addTiles() %>% # Add default OpenStreetMap map tiles
addMarkers(lng=Harris$longitude, lat=Harris$latitude,
clusterOptions = markerClusterOptions())
Florida
ggplot(c1[c1$region=="florida",], aes( x = long, y = lat, group = group )) +
geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("Florida Counties (Profit Margin: 0-33)")
ggplot(c2[c2$region=="florida",], aes( x = long, y = lat, group = group )) +
geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("Florida Counties (Profit Margin: 34-66)")
ggplot(c3[c3$region=="florida",], aes( x = long, y = lat, group = group )) +
geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("Florida Counties (Profit Margin: 67+)")
Zip codes
ggplot(countiesmap[countiesmap$region=="florida",])+ geom_polygon(aes( x = long, y = lat, group = group, fill="white" ),color="black") + scale_fill_identity() +
ggtitle("Florida") + guides(fill=FALSE) +
geom_point(data = coord[coord$state=="FL" & coord$Cost>=1000,], aes(x = longitude, y = latitude, color = Group), size = 0.9)+
scale_color_hue( labels = c("0-33", "34-66", "75+"))
Miami-Dade
MiamiDade = coord %>% filter(county=="Miami-Dade",state=="FL")
leaflet(MiamiDade, height=300, width=900) %>%
addTiles() %>% # Add default OpenStreetMap map tiles
addMarkers(lng=MiamiDade$longitude, lat=MiamiDade$latitude,
clusterOptions = markerClusterOptions())
New York
ggplot(c1[c1$region=="new york",], aes( x = long, y = lat, group = group )) +
geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("New York Counties (Profit Margin: 0-33)")
ggplot(c2[c2$region=="new york",], aes( x = long, y = lat, group = group )) +
geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("New York Counties (Profit Margin: 34-66)")
ggplot(c3[c3$region=="new york",], aes( x = long, y = lat, group = group )) +
geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("New York Counties (Profit Margin: 67+)")
Zip codes
ggplot(countiesmap[countiesmap$region=="new york",])+ geom_polygon(aes( x = long, y = lat, group = group, fill="white" ),color="black") + scale_fill_identity() +
ggtitle("New York") + guides(fill=FALSE) +
geom_point(data = coord[coord$state=="NY" & coord$Cost>=1000,], aes(x = longitude, y = latitude, color = Group), size = 0.9)+
scale_color_hue( labels = c("0-33", "34-66", "75+"))
New York
NY = coord %>% filter(county=="New York",state=="NY")
leaflet(NY, height=300, width=900) %>%
addTiles() %>% # Add default OpenStreetMap map tiles
addMarkers(lng=NY$longitude, lat=NY$latitude,
clusterOptions = markerClusterOptions())
Kings
Kings = coord %>% filter(county=="Kings",state=="NY")
leaflet(Kings, height=300, width=900) %>%
addTiles() %>% # Add default OpenStreetMap map tiles
addMarkers(lng=Kings$longitude, lat=Kings$latitude,
clusterOptions = markerClusterOptions())