Video link https://youtu.be/MZ6Ki3eHQOg
Our youtube channel has lots of videos on data visualisation in r.
Visit our youtube channel https://www.youtube.com/c/TechAnswers88
We would need the shape files which will be used to draw the boundaries which are needed to make a choropleth maps. The following website has various boundary files for Egypt
https://data.humdata.org/dataset/cod-ab-egy?
Here is a link of the first adm1 level data files
Here is a link of the first adm2 level data files Which has suburb level details under each area
Extract the data from each zip file and we will be mainly interested in the .shp file. Do not delete any files in the zip folder.
If you do not have these packages then please install them using the install.packages command or follow the install option in your RStudio GUI.
Here are the install.packages commands foe each package.
install.packages(‘ggplot2’)
install.packages(‘sf’)
install.packages(‘rvest’)
install.packages(‘dplyr’)
install.packages(‘viridis’)
install.packages(‘ggrepel’)
install.packages(‘ggthemes’)
library(ggplot2)
library(sf)
library(rvest)
library(dplyr)
library(viridis)
library(ggrepel)
library(ggthemes)
shp1 <- read_sf("d:\\tmp\\egy_admbnda_adm1_capmas_20170421\\egy_admbnda_adm1_capmas_20170421.shp")
shp2 <- read_sf("d:\\tmp\\egy_admbnda_adm2_capmas_20170421\\egy_admbnda_adm2_capmas_20170421.shp")
very basic chart to see how the plot works
pl <- ggplot(shp1)
pl <- pl+ geom_sf()
pl
See the wikipedia for more information on the Provincial divisions
str(shp1)
FALSE sf [27 x 17] (S3: sf/tbl_df/tbl/data.frame)
FALSE $ ADM1_EN : chr [1:27] "Alexandria" "Assiut" "Aswan" "Behera" ...
FALSE $ ADM1_AR : chr [1:27] "<U+0627><U+0644><U+0627><U+0633><U+0643><U+0646><U+062F><U+0631><U+064A><U+0629>\n" "<U+0623><U+0633><U+064A><U+0648><U+0637>" "<U+0623><U+0633><U+0648><U+0627><U+0646>" "<U+0627><U+0644><U+0628><U+062D><U+064A><U+0631><U+0629>\n" ...
FALSE $ ADM1_PCODE: chr [1:27] "EG02" "EG25" "EG28" "EG18" ...
FALSE $ ADM1_REF : chr [1:27] "Alexandria" "Assiut" "Aswan" "Behera" ...
FALSE $ ADM1ALT1EN: chr [1:27] NA NA NA NA ...
FALSE $ ADM1ALT2EN: chr [1:27] NA NA NA NA ...
FALSE $ ADM1ALT1AR: chr [1:27] NA NA NA NA ...
FALSE $ ADM1ALT2AR: chr [1:27] NA NA NA NA ...
FALSE $ ADM0_EN : chr [1:27] "Egypt" "Egypt" "Egypt" "Egypt" ...
FALSE $ ADM0_AR : chr [1:27] "<U+0645><U+0650><U+0635><U+0631>" "<U+0645><U+0650><U+0635><U+0631>" "<U+0645><U+0650><U+0635><U+0631>" "<U+0645><U+0650><U+0635><U+0631>" ...
FALSE $ ADM0_PCODE: chr [1:27] "EG" "EG" "EG" "EG" ...
FALSE $ date : Date[1:27], format: "2006-01-01" "2006-01-01" ...
FALSE $ validOn : Date[1:27], format: "2017-04-21" "2017-04-21" ...
FALSE $ validTo : Date[1:27], format: NA NA ...
FALSE $ Shape_Leng: num [1:27] 4.27 8 38.13 6.51 7.44 ...
FALSE $ Shape_Area: num [1:27] 0.237 1.498 5.102 1.068 0.981 ...
FALSE $ geometry :sfc_MULTIPOLYGON of length 27; first list element: List of 1
FALSE ..$ :List of 2
FALSE .. ..$ : num [1:50689, 1:2] 30.1 30.1 30.1 30.1 30.1 ...
FALSE .. ..$ : num [1:99, 1:2] 29.9 29.9 29.9 29.9 29.9 ...
FALSE ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
FALSE - attr(*, "sf_column")= chr "geometry"
FALSE - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
FALSE ..- attr(*, "names")= chr [1:16] "ADM1_EN" "ADM1_AR" "ADM1_PCODE" "ADM1_REF" ...
pl <- ggplot(shp1)
pl <- pl+ geom_sf(aes(fill= ADM1_REF) , color = "white")
pl <- pl + ggthemes::theme_map()
pl <- pl + theme(legend.position = "none")
pl <- pl + ggrepel::geom_label_repel(aes(label = ADM1_REF
, geometry = geometry)
, stat= "sf_coordinates"
, box.padding = 0.5
, max.overlaps = Inf
, size = 4)
pl
pl <- ggplot(shp1)
pl <- pl+ geom_sf(aes(fill= ADM1_REF) , color = "white")
pl <- pl + ggthemes::theme_map()
pl <- pl + theme(legend.position = "none")
pl <- pl + ggrepel::geom_text_repel(aes(label = ADM1_REF
, geometry = geometry)
, stat= "sf_coordinates"
, box.padding = 0.5
, max.overlaps = Inf
, size = 4)
pl
We will create a new column in our shp1 file as shown below so that we can generate a heatmp or Choropleth chart As there are 27 governorates area we add 27 values in our sales column.
shp1$sales <- c(7261 ,18100 ,31282 , 45900, 7170, 6110, 987000, 71700, 61110, 987000
, 143000 , 162000 ,8200 , 1990, 65700, 36876, 89000, 45000, 35000, 870000
, 19900 , 56000 ,3400 , 2800, 6500 , 5600 , 45000)
pl <- ggplot(shp1)
pl <- pl+ geom_sf(aes(fill= sales) , color = "white")
pl <- pl + ggthemes::theme_map()
pl <- pl + scale_fill_viridis_c(option = "D")
pl <- pl + ggrepel::geom_text_repel(aes(label = ADM1_REF
, geometry = geometry)
, stat= "sf_coordinates"
, box.padding = 0.5
, max.overlaps = Inf
, size = 3)
pl
Too many labels, so we should avoid the labelling or do only selective labelling.
pl <- ggplot(shp2)
pl <- pl+ geom_sf(aes(fill= ADM2_REF) , color = "white")
pl <- pl + ggthemes::theme_map()
pl <- pl + theme(legend.position = "none")
pl <- pl + ggrepel::geom_label_repel(aes(label = ADM2_REF
, geometry = geometry)
, stat= "sf_coordinates"
, box.padding = 0.5
, max.overlaps = Inf
, size = 4)
pl <- pl + scale_fill_viridis_d(option = "C")
pl
See that we have used a defined a filter in the label, so that we can only show the labels for the areas under Aswan.
ggrepel::geom_label_repel(data = shp2%>%dplyr::filter(ADM1_EN %in% c(‘Aswan’))
#str(shp2)
#(shp2$ADM1_EN)
pl <- ggplot(shp2)
pl <- pl+ geom_sf(aes(fill= ADM2_REF) , color = "white")
pl <- pl + ggthemes::theme_map()
pl <- pl + theme(legend.position = "none")
pl <- pl + ggrepel::geom_label_repel(data = shp2%>%dplyr::filter(ADM1_EN %in% c('Aswan'))
, aes(label = ADM2_REF
, geometry = geometry)
, stat= "sf_coordinates"
, box.padding = 0.5
, max.overlaps = Inf
, size = 4)
pl <- pl + scale_fill_viridis_d(option = "C")
pl
Subscribe to our channel for various data visualisation and statistics videos in R.