df<-read.csv("heatmap2_df.csv")

Determine which zip codes and/or counties to focus our marketing efforts on based upon past performance and density

Data to produce:

Get the percentage of zip codes per county that are in groups 1, 2, or 3. Then, get the percentage of counties per state that were in groups 1, 2, or 3. The groups are based on ROI (Profit Margin) groups: 0-33, 34-66, 67+; underperforming, average, above average respectively.

Next, list which whole counties to keep (if a high percentage of zip codes within a county). Lastly, list which counties to keep if the county does not perform well.

Denser counties which need to be plotted:

For now we’ll group the Profit Margin as:

df$Group<-NA  
df$Profit.Margin<-as.numeric(df$Profit.Margin)
df[df$Profit.Margin>=0 & df$Profit.Margin<=33,25]<-1
df[df$Profit.Margin>=34 & df$Profit.Margin<=66,25]<-2
df[df$Profit.Margin>=67,25]<-3
df$Group<-as.factor(df$Group)
df$Cost<-gsub("-",0,df$Cost)
df$Cost<-as.numeric(gsub("[$,]","",df$Cost))

The % of counties per state that were in group 1, 2, 3.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
counties<-df%>%filter(Cost>=1000)%>%group_by(state,county,Group)%>%summarise(percentage =n()/nrow(df[df$Cost>=1000,])*100)
## Warning: Factor `Group` contains implicit NA, consider using
## `forcats::fct_explicit_na`

The % of zipcodes per county that were in group 1, 2, 3.

zip<-df%>%filter(Cost>=1000)%>%group_by(county,zip,Group)%>%summarise(percentage =n()/nrow(df[df$Cost>=1000,])*100)
## Warning: Factor `Group` contains implicit NA, consider using
## `forcats::fct_explicit_na`

Output in csv sorted by state and then county.

counties<-counties[order(counties$state),]
zip<-zip[order(zip$county),]

write.table(zip,"zip.csv",sep = ",",row.names = F)
write.table(counties,"counties.csv",sep = ",",row.names = F)
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
countiesmap<-map_data("county")  
counties$county<-tolower(counties$county)

c1<-left_join( countiesmap, counties[counties$Group==1,c(2,4)], by = c('subregion' = 'county'))
c2<-left_join( countiesmap, counties[counties$Group==2,c(2,4)], by = c('subregion' = 'county'))
c3<-left_join( countiesmap, counties[counties$Group==3,c(2,4)], by = c('subregion' = 'county'))

library(zipcode)

df$zip<-clean.zipcodes(df$zip)
data("zipcode")

library(dplyr)
coord<-left_join(df,zipcode[,c(1,4:5)],by="zip")

California

ggplot(c1[c1$region=="california",], aes( x = long, y = lat, group = group )) +
  geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("California Counties (Profit Margin: 0-33)")

ggplot(c2[c2$region=="california",], aes( x = long, y = lat, group = group )) +
  geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("California Counties (Profit Margin: 34-66)")

ggplot(c3[c3$region=="california",], aes( x = long, y = lat, group = group )) +
  geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("California Counties (Profit Margin: 67+)") 

Zip codes

ggplot(countiesmap[countiesmap$region=="california",])+ geom_polygon(aes( x = long, y = lat, group = group, fill="white" ),color="black") + scale_fill_identity() +
  ggtitle("California") + guides(fill=FALSE) +
  geom_point(data = coord[coord$state=="CA" & coord$Cost>=1000,], aes(x = longitude, y = latitude, color = Group), size = 0.9)+
  scale_color_hue( labels = c("0-33", "34-66", "75+"))

Los Angeles

library(leaflet)
## Warning: package 'leaflet' was built under R version 3.6.1
LA = coord %>% filter(county=="Los Angeles",state=="CA")

leaflet(LA, height=300, width=900) %>%
  addTiles() %>%  # Add default OpenStreetMap map tiles
  addMarkers(lng=LA$longitude, lat=LA$latitude,
             clusterOptions = markerClusterOptions())

Illinois

ggplot(c1[c1$region=="illinois",], aes( x = long, y = lat, group = group )) +
  geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("Illinois Counties (Profit Margin: 0-33)")

ggplot(c2[c2$region=="illinois",], aes( x = long, y = lat, group = group )) +
  geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("Illinois Counties (Profit Margin: 34-66)")

ggplot(c3[c3$region=="illinois",], aes( x = long, y = lat, group = group )) +
  geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("Illinois Counties (Profit Margin: 67+)") 

Zip codes

ggplot(countiesmap[countiesmap$region=="illinois",])+ geom_polygon(aes( x = long, y = lat, group = group, fill="white" ),color="black") + scale_fill_identity() +
  ggtitle("Illinois") + guides(fill=FALSE) +
  geom_point(data = coord[coord$state=="IL" & coord$Cost>=1000,], aes(x = longitude, y = latitude, color = Group), size = 0.9)+
  scale_color_hue( labels = c("0-33", "34-66", "75+"))

Cook

Cook = coord %>% filter(county=="Cook",state=="IL")

leaflet(LA, height=300, width=900) %>%
  addTiles() %>%  # Add default OpenStreetMap map tiles
  addMarkers(lng=Cook$longitude, lat=Cook$latitude,
             clusterOptions = markerClusterOptions())
## Warning in validateCoords(lng, lat, funcName): Data contains 1 rows with
## either missing or invalid lat/lon values and will be ignored

Arizona

ggplot(c1[c1$region=="arizona",], aes( x = long, y = lat, group = group )) +
  geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("Arizona Counties (Profit Margin: 0-33)")

ggplot(c2[c2$region=="arizona",], aes( x = long, y = lat, group = group )) +
  geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("Arizona Counties (Profit Margin: 34-66)")

ggplot(c3[c3$region=="arizona",], aes( x = long, y = lat, group = group )) +
  geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("Arizona Counties (Profit Margin: 67+)") 

Zip codes

ggplot(countiesmap[countiesmap$region=="arizona",])+ geom_polygon(aes( x = long, y = lat, group = group, fill="white" ),color="black") + scale_fill_identity() +
  ggtitle("Arizona") + guides(fill=FALSE) +
  geom_point(data = coord[coord$state=="AZ" & coord$Cost>=1000,], aes(x = longitude, y = latitude, color = Group), size = 0.9)+
  scale_color_hue( labels = c("0-33", "34-66", "75+"))

Maricopa

Maricopa = coord %>% filter(county=="Maricopa",state=="AZ")

leaflet(Maricopa, height=300, width=900) %>%
  addTiles() %>%  # Add default OpenStreetMap map tiles
  addMarkers(lng=Maricopa$longitude, lat=Maricopa$latitude,
             clusterOptions = markerClusterOptions())

Texas

ggplot(c1[c1$region=="texas",], aes( x = long, y = lat, group = group )) +
  geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("Texas Counties (Profit Margin: 0-33)")

ggplot(c2[c2$region=="texas",], aes( x = long, y = lat, group = group )) +
  geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("Texas Counties (Profit Margin: 34-66)")

ggplot(c3[c3$region=="texas",], aes( x = long, y = lat, group = group )) +
  geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("Texas Counties (Profit Margin: 67+)") 

Zip codes

ggplot(countiesmap[countiesmap$region=="texas",])+ geom_polygon(aes( x = long, y = lat, group = group, fill="white" ),color="black") + scale_fill_identity() +
  ggtitle("Texas") + guides(fill=FALSE) +
  geom_point(data = coord[coord$state=="TX" & coord$Cost>=1000,], aes(x = longitude, y = latitude, color = Group), size = 0.9)+
  scale_color_hue( labels = c("0-33", "34-66", "75+"))

Harris

Harris = coord %>% filter(county=="Harris",state=="TX")

leaflet(Harris, height=300, width=900) %>%
  addTiles() %>%  # Add default OpenStreetMap map tiles
  addMarkers(lng=Harris$longitude, lat=Harris$latitude,
             clusterOptions = markerClusterOptions())

Florida

ggplot(c1[c1$region=="florida",], aes( x = long, y = lat, group = group )) +
  geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("Florida Counties (Profit Margin: 0-33)")

ggplot(c2[c2$region=="florida",], aes( x = long, y = lat, group = group )) +
  geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("Florida Counties (Profit Margin: 34-66)")

ggplot(c3[c3$region=="florida",], aes( x = long, y = lat, group = group )) +
  geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("Florida Counties (Profit Margin: 67+)") 

Zip codes

ggplot(countiesmap[countiesmap$region=="florida",])+ geom_polygon(aes( x = long, y = lat, group = group, fill="white" ),color="black") + scale_fill_identity() +
  ggtitle("Florida") + guides(fill=FALSE) +
  geom_point(data = coord[coord$state=="FL" & coord$Cost>=1000,], aes(x = longitude, y = latitude, color = Group), size = 0.9)+
  scale_color_hue( labels = c("0-33", "34-66", "75+"))

Miami-Dade

MiamiDade = coord %>% filter(county=="Miami-Dade",state=="FL")

leaflet(MiamiDade, height=300, width=900) %>%
  addTiles() %>%  # Add default OpenStreetMap map tiles
  addMarkers(lng=MiamiDade$longitude, lat=MiamiDade$latitude,
             clusterOptions = markerClusterOptions())

New York

ggplot(c1[c1$region=="new york",], aes( x = long, y = lat, group = group )) +
  geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("New York Counties (Profit Margin: 0-33)")

ggplot(c2[c2$region=="new york",], aes( x = long, y = lat, group = group )) +
  geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("New York Counties (Profit Margin: 34-66)")

ggplot(c3[c3$region=="new york",], aes( x = long, y = lat, group = group )) +
  geom_polygon(aes(fill = percentage))+scale_fill_continuous(high = "#132B43", low = "#56B1F7")+ggtitle("New York Counties (Profit Margin: 67+)") 

Zip codes

ggplot(countiesmap[countiesmap$region=="new york",])+ geom_polygon(aes( x = long, y = lat, group = group, fill="white" ),color="black") + scale_fill_identity() +
  ggtitle("New York") + guides(fill=FALSE) +
  geom_point(data = coord[coord$state=="NY" & coord$Cost>=1000,], aes(x = longitude, y = latitude, color = Group), size = 0.9)+
  scale_color_hue( labels = c("0-33", "34-66", "75+"))

New York

NY = coord %>% filter(county=="New York",state=="NY")

leaflet(NY, height=300, width=900) %>%
  addTiles() %>%  # Add default OpenStreetMap map tiles
  addMarkers(lng=NY$longitude, lat=NY$latitude,
             clusterOptions = markerClusterOptions())

Kings

Kings = coord %>% filter(county=="Kings",state=="NY")

leaflet(Kings, height=300, width=900) %>%
  addTiles() %>%  # Add default OpenStreetMap map tiles
  addMarkers(lng=Kings$longitude, lat=Kings$latitude,
             clusterOptions = markerClusterOptions())