R Markdown

Центральный федеральный округ, любой показатель из раздела «Наука и

инновации».

# Загрузка shp файла для Центрального федерального округа

# посмотреть список файлов распакованного архива
dir('./data')
##  [1] "_readme.txt"        "admin_level_10.dbf" "admin_level_10.prj"
##  [4] "admin_level_10.qpj" "admin_level_10.shp" "admin_level_10.shx"
##  [7] "admin_level_2.dbf"  "admin_level_2.prj"  "admin_level_2.qpj" 
## [10] "admin_level_2.shp"  "admin_level_2.shx"  "admin_level_3.dbf" 
## [13] "admin_level_3.prj"  "admin_level_3.qpj"  "admin_level_3.shp" 
## [16] "admin_level_3.shx"  "admin_level_4.dbf"  "admin_level_4.prj" 
## [19] "admin_level_4.qpj"  "admin_level_4.shp"  "admin_level_4.shx" 
## [22] "admin_level_5.dbf"  "admin_level_5.prj"  "admin_level_5.qpj" 
## [25] "admin_level_5.shp"  "admin_level_5.shx"  "admin_level_6.dbf" 
## [28] "admin_level_6.prj"  "admin_level_6.qpj"  "admin_level_6.shp" 
## [31] "admin_level_6.shx"  "admin_level_7.dbf"  "admin_level_7.prj" 
## [34] "admin_level_7.qpj"  "admin_level_7.shp"  "admin_level_7.shx" 
## [37] "admin_level_8.dbf"  "admin_level_8.prj"  "admin_level_8.qpj" 
## [40] "admin_level_8.shp"  "admin_level_8.shx"  "admin_level_9.dbf" 
## [43] "admin_level_9.prj"  "admin_level_9.qpj"  "admin_level_9.shp" 
## [46] "admin_level_9.shx"
Sys.setlocale(locale = "Russian")
## [1] "LC_COLLATE=Russian_Russia.1251;LC_CTYPE=Russian_Russia.1251;LC_MONETARY=Russian_Russia.1251;LC_NUMERIC=C;LC_TIME=Russian_Russia.1251"
# прочитать данные уровня 4
Regions <- readOGR("./data/admin_level_4.shp")
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\Mikha\Desktop\GUU\4course2\R\Lab4\data\admin_level_4.shp", layer: "admin_level_4"
## with 88 features
## It has 193 fields
# картограмма ЦФО, на которой каждая область залита своим цветом ..........
# делаем фактор из имён областей (т.е. нумеруем их)
Regions@data$name_en <- as.factor(Regions@data$name_en)

# Регионы, входящие в Центральный федеральный округ
CFOarea <- c("Belgorod Oblast","Bryansk Oblast","Vladimir Oblast", "Voronezh Oblast", "Ivanovo Oblast", "Kaluga Oblast",
                     "Kostroma Oblast", "Kursk Oblast", "Lipetsk Oblast", "Moscow Oblast", "Oryol Oblast", "Ryazan Oblast", "Tambov Oblast",
                     "Tver Oblast", "Tula Oblast", "Yaroslavl Oblast", "Moscow")
# Оставляем только те регионы, которые входят в ЦФО
Regions <- Regions[Regions@data$name_en %in% CFOarea, ]

Импорт данных с Росстата. “Регионы России” из раздела “Наука и инновации”

# Загрузка функции для работы с Росстатом
get_GKSSS <- source("https://raw.githubusercontent.com/nillsondg/r_gks_stat_data/master/gks.R")
## 
## Attaching package: 'tools'
## The following object is masked from 'package:XML':
## 
##     toHTML
# Загрузка данных по приёму, выпуску и защиты в докторантуре
dataGKSS <- loadGKSData("/bgd/regl/B14_14p/IssWWW.exe/Stg/d03/21-11.htm")
# Оставляем только регионы входящие в ЦФО и удаяем первую строку (названия столбцов неправильно импортированы)
dataGKSS <- dataGKSS[4:20,-1]
# Даём имена столбцам
colnames(dataGKSS) <- c("name_en",rep(c("2005","2010","2011","2012","2013"),3))
# Удаляем нумерацию строк
rownames(dataGKSS) <- c()

# Оставляем столбцы только по приём в докторантуру
CFOdata2013 <- dataGKSS[,c(1,6)]
# Заменяем пропущенные значения 0
CFOdata2013[,2][grep("-",CFOdata2013[,2], fixed = T)] <- 0

# 
CFOdata2013[,1] <- c("Belgorod Oblast","Bryansk Oblast","Vladimir Oblast", "Voronezh Oblast", "Ivanovo Oblast", "Kaluga Oblast",
                     "Kostroma Oblast", "Kursk Oblast", "Lipetsk Oblast", "Moscow Oblast", "Oryol Oblast", "Ryazan Oblast", "Tambov Oblast",
                     "Tver Oblast", "Tula Oblast", "Yaroslavl Oblast", "Moscow")

colnames(CFOdata2013) <-  c("name","admission")


CFOdata2010 <- dataGKSS[,c(1,3)]
CFOdata2010[,2][grep("-",CFOdata2010[,2], fixed = T)] <- 0
CFOdata2010[,1] <- c("Belgorod Oblast","Bryansk Oblast","Vladimir Oblast", "Voronezh Oblast", "Ivanovo Oblast", "Kaluga Oblast",
                     "Kostroma Oblast", "Kursk Oblast", "Lipetsk Oblast", "Moscow Oblast", "Oryol Oblast", "Ryazan Oblast", "Tambov Oblast",
                     "Tver Oblast", "Tula Oblast", "Yaroslavl Oblast", "Moscow")
colnames(CFOdata2010) <-  c("name","admission")

# Строительство графика за последний доступный год (2013) при помощи spplot

Regions@data <- merge(Regions@data, CFOdata2013,
                       by.x = 'name_en', by.y = "name", all.x = F, sort = F)

# задаём палитру
mypalette <- colorRampPalette(c('yellow', 'blue'))


# строим картограмму численности населения .....................................


ls.lay1 <- list("sp.text", coordinates(Regions), Regions@data$name_en, sort = F)
ls.lay1[[3]] <- gsub("Moscow Oblast","M Obl",ls.lay1[[3]])
ls.lay1[[3]] <- gsub("Oblast","",ls.lay1[[3]])
ls.lay1[[3]] <- gsub("Moscow", "M", ls.lay1[[3]])



output <- spplot(Regions, 'admission',
       col.regions = mypalette(30),  # определение цветовой шкалы

       col = 'coral4',               # цвет контурных линий на карте
       par.settings = list(axis.line = list(col = NA)), # без осей
      sp.layout = ls.lay1
)

output

rm(Regions)

# Строительство графика за 2010 год при помощи ggplot

# Пример 3 #####################################################################
# Перестроить последний график из примера 3 средствами ggplot2

# прочитать данные уровня 4
Regions <- readOGR("./data/admin_level_4.shp")
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\Mikha\Desktop\GUU\4course2\R\Lab4\data\admin_level_4.shp", layer: "admin_level_4"
## with 88 features
## It has 193 fields
CFOarea <- c("Belgorod Oblast","Bryansk Oblast","Vladimir Oblast", "Voronezh Oblast", "Ivanovo Oblast", "Kaluga Oblast",
                     "Kostroma Oblast", "Kursk Oblast", "Lipetsk Oblast", "Moscow Oblast", "Oryol Oblast", "Ryazan Oblast", "Tambov Oblast",
                     "Tver Oblast", "Tula Oblast", "Yaroslavl Oblast", "Moscow")

Regions <- Regions[Regions@data$name_en %in% CFOarea, ]

# создаём столбец-ключ id для связи с другими таблицами
#  (названия регионов из столбца NAME_1)
Regions@data$id <- Regions@data$name_en
    
# преобразовать SpatialPolygonsDataFrame в data.frame
Regions.points <- fortify(Regions, region = 'id')
## Warning in proj4string(SpP): CRS object has comment, which is lost in output
# добавить к координатам сведения о регионах
Regions.df <- merge(Regions.points, Regions@data, by = 'id')

# добавляем к координатам значения показателя для заливки
#  (численность населения из фрейма stat.Regions)
CFOdata2010$id <- CFOdata2010$name 
Regions.df <- merge(Regions.df, CFOdata2010[, c('id',
                                                 'admission')])
Regions.df$admission <- as.numeric(Regions.df$admission)
names(Regions.df)
##   [1] "id"         "long"       "lat"        "order"      "hole"      
##   [6] "piece"      "group"      "name"       "name_ru"    "boundary"  
##  [11] "admin_leve" "ref"        "int_ref"    "name_ca"    "name_de"   
##  [16] "name_en"    "name_es"    "name_fr"    "name_hu"    "name_ja"   
##  [21] "name_lt"    "name_nl"    "name_pl"    "name_pt"    "name_sv"   
##  [26] "name_uk"    "name_vi"    "name_zh"    "alt_name"   "int_name"  
##  [31] "timezone"   "ISO3166.2"  "wikipedia"  "population" "addr_count"
##  [36] "is_in_coun" "gost_7.67." "is_in_cont" "official_s" "populati_1"
##  [41] "is_in_co_1" "ref_en"     "name_fi"    "name_hr"    "name_no"   
##  [46] "name_sk"    "name_sr"    "oktmo_user" "border_typ" "name_af"   
##  [51] "name_az"    "name_be"    "name_cy"    "name_el"    "name_eo"   
##  [56] "name_hy"    "name_ka"    "name_kv"    "name_oc"    "name_os"   
##  [61] "name_sh"    "name_sw"    "name_tr"    "name_tt"    "website"   
##  [66] "name_bxr"   "name_kbd"   "name_mhr"   "name_myv"   "name_sah"  
##  [71] "name_be.ta" "name_ce"    "name_cs"    "name_cv"    "name_et"   
##  [76] "name_ga"    "name_he"    "name_it"    "name_ko"    "name_ku"   
##  [81] "name_lv"    "name_mn"    "name_ms"    "name_ro"    "name_sl"   
##  [86] "name_uz"    "name_yi"    "name_dsb"   "name_hsb"   "name_szl"  
##  [91] "name_xal"   "alt_name_d" "name_ar"    "name_ba"    "name_bg"   
##  [96] "name_bs"    "name_da"    "name_eu"    "name_fa"    "name_hi"   
## [101] "name_id"    "name_kk"    "name_mk"    "name_mr"    "name_nn"   
## [106] "name_se"    "name_ta"    "name_tg"    "name_tl"    "name_ur"   
## [111] "name_ace"   "name_hak"   "name_kaa"   "name_lrc"   "name_pam"  
## [116] "name_pnb"   "name_sco"   "name_udm"   "name_vep"   "name_war"  
## [121] "name_zh.mi" "note"       "name_crh"   "wikidata"   "wikipedia_"
## [126] "wikipedi_1" "koatuu"     "ref_uk"     "name_la"    "flag"      
## [131] "is_in"      "ref_ru"     "name_cu"    "alt_name_v" "name_av"   
## [136] "name_inh"   "name_krc"   "official_n" "official_1" "cladr_code"
## [141] "short_name" "official_2" "name_int"   "populati_2" "name_krl"  
## [146] "source"     "name_mrj"   "name_is"    "source_url" "name_be.x."
## [151] "gis.lab_st" "official_3" "official_4" "official_5" "official_6"
## [156] "official_7" "place"      "name_ab"    "name_fy"    "name_lb"   
## [161] "name_nb"    "name_ast"   "name_tzl"   "old_name"   "old_name_e"
## [166] "old_name_1" "old_name_f" "old_name_o" "old_name_v" "addr_postc"
## [171] "old_alt_na" "name_koi"   "name_lbe"   "name_lez"   "name_az.cy"
## [176] "official_8" "official_9" "official10" "official11" "official12"
## [181] "official13" "name_ky"    "ssrf_code"  "cadaster_c" "omkte_code"
## [186] "addr_regio" "alt_name_r" "name_atv"   "alt_name_c" "alt_name_e"
## [191] "alt_name_f" "alt_name_n" "name_sq"    "name_su"    "name_ug"   
## [196] "name_tyv"   "official14" "name_aba"   "name_nog"   "alt_name_s"
## [201] "admission"
# координаты центров полигонов (для подписей регионов)
centroids.df <- as.data.frame(coordinates(Regions))
centroids.df$id <- Regions@data$id
colnames(centroids.df) <- c('long', 'lat', 'id')

ls.lay1[[3]] <- gsub("Moscow Oblast","M Obl",ls.lay1[[3]])
ls.lay1[[3]] <- gsub("Oblast","",ls.lay1[[3]])
ls.lay1[[3]] <- gsub("Moscow", "M", ls.lay1[[3]])



# создаём график
gp <- ggplot() + 
    geom_polygon(data = Regions.df, aes(long, lat, group = group,
                                        fill = admission)) +
    geom_path(data = Regions.df, aes(long, lat, group = group), 
              color = 'coral4') +
    coord_map(projection = 'gilbert') +
    scale_fill_distiller(palette = 'OrRd',
                         direction = 1,
                         breaks = pretty_breaks(n = 5)) +
    labs(x = 'Долгота', y = 'Широта', 
         title = "Приём в докторантуру, человек") +
    geom_text(data = centroids.df, 
                       aes(long, lat, label = id))
# выводим график
gp