Number of Schools

This document shows the code and graphs of the number of schools and pupils according to the municipalities in Georgia.

#Loading necessary libraries
library(tidyverse) # For "ggplot" - graphs

## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang

## Registered S3 method overwritten by 'rvest':
##   method            from
##   read_xml.response xml2

## ── Attaching packages ────────────────────────────────────────────────────────── tidyverse 1.2.1 ──

## ✔ ggplot2 3.1.1       ✔ purrr   0.3.2  
## ✔ tibble  2.1.1       ✔ dplyr   0.8.0.1
## ✔ tidyr   0.8.3       ✔ stringr 1.4.0  
## ✔ readr   1.3.1       ✔ forcats 0.4.0

## ── Conflicts ───────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(dplyr)
library(sp) #For importing maps
library(readr) #For importing .csv file. There are other packages for importing data, but this one is more commonly used because it makes fewer assumptions (doesn't set the rules for the imported data unless so indicated). 
library(skimr) #To easily overview data

## Registered S3 method overwritten by 'skimr':
##   method      from  
##   print.spark pillar

## 
## Attaching package: 'skimr'

## The following object is masked from 'package:stats':
## 
##     filter

setwd("/Users/Posh/Documents/GIK - R") # To make any folder a working directory for the current project. This is more convenient to avoid using detaild file paths when needed later. If you replicate this code, you have to change the file patch following your folder location. 

#Now to read the data
school_pupil <- read_csv("pupils.and.schools.csv", skip = 2, na = c("", "...", "NA"), locale = default_locale())

## Parsed with column specification:
## cols(
##   მუნიციპალიტეტი = col_character(),
##   `სკოლების რაოდენობა 2018/2019` = col_double(),
##   `მოსწავლეთა რიცხოვნობა 2018/2019` = col_double()
## )

#Note this: I was struggling to import the data as R couldn't handle Geo characters even when I specified locale as UTF-8. Then I saved excel .csv file in UTF-8 and resolved the issue. So when a file contains georigan characters always make sure that the file is saved in UTF-8.

After importing data it’s time to clean it to prepare it for visualisation.

school_pupil <- school_pupil[-1, ] #Removed a global number of schools and pupils as I don't want it to show up in any graph
school_pupil_reg <- school_pupil %>%
  filter(str_detect(მუნიციპალიტეტი, "რეგიონი|თბილისი|აფხაზეთი|აჭარის"))
#I filtered data for regions and removed municipalities
names(school_pupil_reg)[1:3] <- c("region", "n_school", "n_pupil") #names in georgian complicate things since sometimes you have to mention the column names in the code
school_pupil_reg$region <- as.factor(school_pupil_reg$region) #Changing character vector to factor. This might be a better choice while working in ggplot (for ex. in setting this vector as a colour on scatterplot)

Now it’s time for some visual play.

#First I'll craete a simple scatterplot with three variables (municipality [as colour], number of schools and number of pupils)

school_pupil_reg %>% 
  ggplot(aes(x=n_school, y=n_pupil)) +
  geom_point(aes(col=region)) +
  labs(title = "სკოლებისა და მოსწავლეების რაოდენობა რეგიონების მიხედვით",
       subtitle = "მარტივი წერტილოვანი განაწილება",
       x = "სკოლების რაოდენობა",
       y = "მოსწავლეების რაოდენობა",
       caption = "წყარო: საქსტატი",
       color = "რეგიონი") +
  theme_minimal(base_family="Sylfaen")

#2.Diverging bars
#data prep
school_pupil_reg <- school_pupil_reg %>%
  mutate(school_z = (n_school - mean(n_school))/sd(n_school))

school_pupil_reg <- school_pupil_reg %>%
  mutate(school_type = ifelse(school_z <0 , "below", "above"))
  
school_pupil_reg <- school_pupil_reg %>%
  arrange(school_z)

school_pupil_reg$region <- factor(school_pupil_reg$region, levels = school_pupil_reg$region)

school_pupil_reg %>%
  ggplot(aes(x = region, y = school_z, label = school_z)) +
  geom_bar(stat = 'identity', aes(fill = school_type), width = .5) +
  scale_fill_manual(name = "რაოდენობა",
                    labels = c("საშუალოს ზემოთ", "საშუალოს ქვემოთ"),
                    values = c("above"="#00ba38", "below"="#f8766d")) +
  labs(title = "Divergent Bars",
       subtitle = "Normalised number of schools",
       y = "სკოლების ნორმალიზებული რაოდენობა",
       x = "რეგიონი") +
  coord_flip() +
  theme_minimal(base_family="Sylfaen")

Loading and viewing maps

library(sf)

## Linking to GEOS 3.6.1, GDAL 2.1.3, PROJ 4.9.3

my_map <- st_read("/Users/Posh/Documents/GIK - R/gadm36_GEO_shp/gadm36_GEO_1.shp")

## Reading layer `gadm36_GEO_1' from data source `/Users/Posh/Documents/GIK - R/gadm36_GEO_shp/gadm36_GEO_1.shp' using driver `ESRI Shapefile'
## Simple feature collection with 12 features and 10 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 40.01111 ymin: 41.03851 xmax: 46.72136 ymax: 43.58454
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs

str(my_map$NAME_1)

##  Factor w/ 12 levels "Abkhazia","Ajaria",..: 1 2 3 4 5 6 7 8 9 10 ...

#Renaming region names in map data to later combine two tibbles with the same variable (region)
my_map$NAME_1 <- c("აფხაზეთის არ","აჭარის არ","გურიის რეგიონი","იმერეთის რეგიონი",
                   "კახეთის რეგიონი","ქვემო ქართლის რეგიონი","მცხეთა-მთიანეთის რეგიონი",
                   "რაჭა-ლეჩხუმის და ქვემო სვანეთის რეგიონი","სამეგრელო-ზემო სვანეთის რეგიონი",
                   "სამცხე-ჯავახეთის რეგიონი","შიდა ქართლის რეგიონი","თბილისი")
library(dplyr)  
#Renaming variable to match in the two datasets for joining them later
my_map <- my_map %>%
  rename(region = NAME_1) 

#Now to join two tibbles into a single one for mapping
map_and_data <- inner_join(my_map, school_pupil_reg)

## Joining, by = "region"

## Warning: Column `region` joining character vector and factor, coercing into
## character vector

#Now the magic
ggplot(map_and_data) +
  geom_sf(aes(fill = n_school))

library(ggthemes)
ggplot(map_and_data) +
  geom_sf(aes(fill = n_school)) +
  scale_fill_gradient(low = "#56B1F7", high = "#132B43") +
  theme_map() +
  theme(legend.position = "bottom")