#Downloading needed libraries
library(tidyverse)
library(broom)
library(haven)
library(AER)
library(sandwich)
library(lmtest)
library(car)
library(stargazer)
library(dplyr)
library(ggplot2)
library(GGally)
library(psych)
library(corrplot)
library(readxl)
library(writexl)
#TS graphs
library(quantmod)
library(tseries)
library(forecast)
library(TSstudio)
library(reshape2)
library(plotly)
#Histograms
library(TSstudio)
library(reshape2)
library(tidyverse)
library(hrbrthemes)
library(viridis)
library(forcats)
#Spatial analysis
# geo
library(sp)
library(spdep)
library(rgdal)
library(maptools)
library(rgeos)
library(sf)
library(spatialreg)
# mass retype()
library(hablar)
# multicollinearity test omcdiag imcdiag
library(mctest)
library(GGally)
#tables
library(xtable)
Data Downloading
#Downloading needed data
dat_Educ = read_xlsx('C:/Users/Kolya/Documents/New_studies/Term_paper/Data/Data for use/Education.xlsx', sheet = '4.19.')
## New names:
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * ...
dat_Emp = read_xlsx('C:/Users/Kolya/Documents/New_studies/Term_paper/Data/Data for use/Employment.xlsx', sheet = '2.11.')
## New names:
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * ...
dat_GRP = read_xlsx('C:/Users/Kolya/Documents/New_studies/Term_paper/Data/Data for use/GRP.xlsx', sheet = '8.1.')
## New names:
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * ...
dat_LabPr_extra = read_xls('C:/Users/Kolya/Documents/New_studies/Term_paper/Data/Data for use/Labour_Productivity_2005_2007.xls',sheet = 1)
## New names:
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * ...
dat_LabPr = read_xlsx('C:/Users/Kolya/Documents/New_studies/Term_paper/Data/Data for use/Labour_Productivity_2008_2019.xlsx',sheet = 1)
## New names:
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * ...
dat_Unemp = read_xlsx('C:/Users/Kolya/Documents/New_studies/Term_paper/Data/Data for use/Unemployment.xlsx',
sheet = '2.10.1.')
## New names:
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * ...
dat_Wage = read_xlsx('C:/Users/Kolya/Documents/New_studies/Term_paper/Data/Data for use/Wages.xlsx', sheet = '3.1.1.')
## New names:
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * ...
dat_Inv = read_xlsx('C:/Users/Kolya/Documents/New_studies/Term_paper/Data/Data for use/Investment.xlsx', sheet = '10.2.')
## New names:
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * ...
dat_WorkF = read_xlsx('C:/Users/Kolya/Documents/New_studies/Term_paper/Data/Data for use/Work_force.xlsx', sheet = '2.1.')
## New names:
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * ...
#Indexies
dat_V <- read_xlsx('C:/Users/Kolya/Documents/New_studies/Term_paper/Data/Data for use/V_index.xlsx')
dat_RV <- read_xlsx('C:/Users/Kolya/Documents/New_studies/Term_paper/Data/Data for use/RV_index.xlsx')
dat_UV <- read_xlsx('C:/Users/Kolya/Documents/New_studies/Term_paper/Data/Data for use//UV_index.xlsx')
#Data Transformation
Bad_regions <- c('Ненецкий автономный округ', 'Ямало-Ненецкий автономный округ',
'Архангельская область без автономного округа', 'Тюменская область без автономных округов', 'Республика Крым', 'г. Севастополь',
'Чеченская Республика', 'Центральный федеральный округ',
'Северо-Западный федеральный округ', 'Южный федеральный округ', 'Северо-Кавказский федеральный округ',
'Приволжский федеральный округ', 'Уральский федеральный округ', 'Дальневосточный федеральный округ',
'в том числе:', 'Сибирский федеральный округ', 'Ханты-Мансийский автономный округ - Югра', 'Ханты-Мансийский автономный округ – Югра',
'Архангельская область(кроме Ненецкого АО)', 'Hенецкий авт.округ', 'г.Севастополь', 'Ханты-Мансийский авт.округ-Югра',
'Ямало-Hенецкий авт.округ', 'Тюменская область (кроме Ханты-Мансийского авт.округа-Югра и Ямало-Hенецкого авт.округа)',
'Российская Федерация', 'Ненецкий автономный округ (Архангельская область)', 'Ямало-Ненецкий автономный округ (Тюменская область)',
'Архангельская область (без АО)', 'Город федерального значения Севастополь', 'Ханты-Мансийский автономный округ - Югра (Тюменская область)',
"Архангельская область (кроме Ненецкого автономного округа)", "Тюменская область (кроме Ханты-Мансийского автономного округа-Югры и Ямало-Ненецкого автономного округа)",
"Тюменская область (без АО)")
dat_Educ <- dat_Educ %>%
mutate(across(where(is.character), str_squish))
names(dat_Educ) <- c(c('region'), paste("educ", c(2000:2020), sep="_"))
dat_Educ <- dat_Educ[c(6:length(dat_Educ$region)), ]
dat_Educ <- subset(dat_Educ,!(dat_Educ$region %in% Bad_regions))
dat_Educ
dat_Emp <- dat_Emp %>%
mutate(across(where(is.character), str_squish))
names(dat_Emp) <- c(c('region'), paste("emp", c(2000:2020), sep="_"))
dat_Emp <- dat_Emp[c(7:(length(dat_Emp$region)-2)), ]
dat_Emp <- subset(dat_Emp,!(dat_Emp$region %in% Bad_regions))
dat_Emp
dat_GRP <- dat_GRP %>%
mutate(across(where(is.character), str_squish))
names(dat_GRP) <- c(c('region'), paste("GRP", c(2000:2019), sep="_"))
dat_GRP <- dat_GRP[c(6:length(dat_GRP$region)), ]
dat_GRP <- subset(dat_GRP,!(dat_GRP$region %in% Bad_regions))
dat_GRP
dat_LabPr <- dat_LabPr %>%
mutate(across(where(is.character), str_squish))
names(dat_LabPr) <- c(c('region'), paste("LabPr", c(2008:2019), sep="_"))
## Warning: The `value` argument of `names<-` must have the same length as `x` as of tibble 3.0.0.
## `names` must have length 17, not 13.
## Warning: The `value` argument of `names<-` can't be empty as of tibble 3.0.0.
## Columns 14, 15, 16, and 17 must be named.
dat_LabPr <- dat_LabPr[c(3:(length(dat_LabPr$region)-3)), c(-(14:17))]
dat_LabPr <- subset(dat_LabPr,!(dat_LabPr$region %in% Bad_regions))
# dat_LabPr$region[!(dat_Emp$region == dat_GRP$region)]
dat_LabPr_extra <- dat_LabPr_extra %>%
mutate(across(where(is.character), str_squish))
dat_LabPr_extra <- dat_LabPr_extra[, c(2, 6:7)]
names(dat_LabPr_extra) <- c(c('region'), paste("LabPr", c(2006:2007), sep="_"))
dat_LabPr_extra <- dat_LabPr_extra[c(4:(length(dat_LabPr_extra$region))),]
dat_LabPr_extra <- subset(dat_LabPr_extra,!(dat_LabPr_extra$region %in% Bad_regions))
#Replacing names with shorter ones
dat_LabPr_extra$region <-
replace(dat_LabPr_extra$region,
dat_LabPr_extra$region == "Город Москва столица Российской Федерации город федерального значения",
'г. Москва')
dat_LabPr_extra$region <-
replace(dat_LabPr_extra$region, dat_LabPr_extra$region == "Город Санкт-Петербург город федерального значения",
"г. Санкт-Петербург")
dat_Unemp <- dat_Unemp %>%
mutate(across(where(is.character), str_squish))
names(dat_Unemp) <- c(c('region'), paste("Unemp", c(2000:2020), sep="_"))
## Warning: The `value` argument of `names<-` must have the same length as `x` as of tibble 3.0.0.
## `names` must have length 23, not 22.
## Warning: The `value` argument of `names<-` can't be empty as of tibble 3.0.0.
## Column 23 must be named.
dat_Unemp <- dat_Unemp[c(8:(length(dat_Unemp$region)-2)), ]
dat_Unemp <- subset(dat_Unemp,!(dat_Unemp$region %in% Bad_regions))
dat_Unemp <- dat_Unemp[, c(-23)]
dat_Unemp
dat_Wage <- dat_Wage %>%
mutate(across(where(is.character), str_squish))
names(dat_Wage) <- c(c('region'), paste("Wage", c(2000:2020), sep="_"))
dat_Wage <- dat_Wage[c(8:(length(dat_Wage$region)-5)), ]
dat_Wage <- subset(dat_Wage,!(dat_Wage$region %in% Bad_regions))
dat_Wage <- dat_Wage[-c(29, 60, 71), ]
dat_Wage
dat_Inv <- dat_Inv %>%
mutate(across(where(is.character), str_squish))
names(dat_Inv) <- c(c('region'), paste("Inv", c(2000:2020), sep="_"))
dat_Inv <- dat_Inv[c(7:(length(dat_Inv$region)-2)), ]
dat_Inv <- subset(dat_Inv,!(dat_Inv$region %in% Bad_regions))
dat_Inv <- dat_Inv[-c(59, 70), ]
dat_Inv
dat_WorkF <- dat_WorkF %>%
mutate(across(where(is.character), str_squish))
names(dat_WorkF) <- c(c('region'), paste("WorkF", c(2000:2020), sep="_"))
dat_WorkF <- dat_WorkF[c(7:(length(dat_WorkF$region)-2)), ]
dat_WorkF <- subset(dat_WorkF,!(dat_WorkF$region %in% Bad_regions))
dat_WorkF
# for (i in 2:length(names(dat_Emp))){
# EmpPerWF[, c(i)] <- dat_Emp[, c(i)]/dat_WorkF[, c(i)]
# }
dat_WorkF[, c(-1)] <- dat_WorkF[, c(-1)] %>% mutate_if(is.character, as.numeric)
dat_Emp[, c(-1)] <- dat_Emp[, c(-1)] %>% mutate_if(is.character, as.numeric)
dat_EmpPerWF <- cbind(dat_Emp[1], dat_Emp[-1]/(1000*dat_WorkF[-1]))
names(dat_EmpPerWF) <- c(c('region'), paste("EmpPerWF", c(2000:2020), sep="_"))
#dat_Emp[-1] without first column
#Creating common DataSet
Common_dataset <- Reduce(merge, list(dat_Educ, dat_GRP, dat_LabPr_extra, dat_LabPr, dat_Unemp, dat_Wage, dat_EmpPerWF,dat_Inv, dat_V, dat_RV, dat_UV))
sum(is.na(Common_dataset))
## [1] 0
Common_dataset_new <- Common_dataset
# Common_dataset_new <- subset(Common_dataset_new, select=-c(z,u))
Common_dataset_new[, c(-1)] <- Common_dataset[, c(-1)] %>% mutate_if(is.character, as.numeric)
## Warning in mask$eval_all_mutate(quo): в результате преобразования созданы NA
## Warning in mask$eval_all_mutate(quo): в результате преобразования созданы NA
## Warning in mask$eval_all_mutate(quo): в результате преобразования созданы NA
## Warning in mask$eval_all_mutate(quo): в результате преобразования созданы NA
## Warning in mask$eval_all_mutate(quo): в результате преобразования созданы NA
## Warning in mask$eval_all_mutate(quo): в результате преобразования созданы NA
## Warning in mask$eval_all_mutate(quo): в результате преобразования созданы NA
## Warning in mask$eval_all_mutate(quo): в результате преобразования созданы NA
## Warning in mask$eval_all_mutate(quo): в результате преобразования созданы NA
## Warning in mask$eval_all_mutate(quo): в результате преобразования созданы NA
## Warning in mask$eval_all_mutate(quo): в результате преобразования созданы NA
sum(is.na(Common_dataset_new))
## [1] 11
# Common_dataset_new %>% glimpse() #now everything is okay
sum(is.na(Common_dataset_new)) #Just for no data
## [1] 11
#For spatal models, whic cannot work with unbalanced data
educ_problem = as.numeric(Common_dataset[78, c(13:22)])
mean(educ_problem)
## [1] 50.9
setdiff(dat_Educ$region, Common_dataset$region)#no abcent regions
## character(0)
get_year <- function(df, year){
df %>%
select(contains(c('region', year)))
}
Data_2017 <- get_year(Common_dataset_new, '2017')
Data_2017
Data_2006 <- get_year(Common_dataset_new, '2006')
Data_2006
Common_dataset_new <- get_year(Common_dataset_new, c(2006:2017))
#Datasets for robust check
Common_dataset_new_2006_2008 <- get_year(Common_dataset_new, c(2006:2008))
Common_dataset_new_2009_2014 <- get_year(Common_dataset_new, c(2009:2014))
Common_dataset_new_2015_2017 <- get_year(Common_dataset_new, c(2015:2017))
#Specifications for 2017
grp_2017_RV_UV <- log(GRP_2017) ~ Rv_2017 + Uv_2017 + log(Inv_2017) + educ_2017 + Wage_2017
labpr_2017_RV_UV <- LabPr_2017 ~ Rv_2017 + Uv_2017 + log(Inv_2017) + educ_2017 + Wage_2017
emp_2017_RV_UV <- EmpPerWF_2017 ~ Rv_2017 + Uv_2017 + log(Inv_2017) + educ_2017 + Wage_2017
unemp_2017_RV_UV <- Unemp_2017 ~ Rv_2017 + Uv_2017 + log(Inv_2017) + educ_2017 + Wage_2017
grp_2017_V <- log(GRP_2017) ~ V_2017 + log(Inv_2017) + educ_2017 + Wage_2017
labpr_2017_V <- LabPr_2017 ~ V_2017 + log(Inv_2017) + educ_2017 + Wage_2017
emp_2017_V <- EmpPerWF_2017 ~ V_2017 + log(Inv_2017) + educ_2017 + Wage_2017
unemp_2017_V <- Unemp_2017 ~ V_2017 + log(Inv_2017) + educ_2017 + Wage_2017
#Cross-section models
#GRP
reg_2017_GRP_RV_UV <- lm(grp_2017_RV_UV, data = Data_2017)
vcov2_GRP_RV_UV <- vcovHC(reg_2017_GRP_RV_UV, type = "HC0")
se2imp_GRP_RV_UV <- sqrt(diag(vcov2_GRP_RV_UV))
#LabPr
reg_2017_LabPr_RV_UV <- lm(labpr_2017_RV_UV, data = Data_2017)
vcov2_LabPr_RV_UV <- vcovHC(reg_2017_LabPr_RV_UV, type = "HC0")
se2imp_LabPr_RV_UV <- sqrt(diag(vcov2_LabPr_RV_UV))
#emp
reg_2017_emp_RV_UV <- lm(emp_2017_RV_UV, data = Data_2017)
vcov2_emp_RV_UV <- vcovHC(reg_2017_emp_RV_UV, type = "HC0")
se2imp_emp_RV_UV <- sqrt(diag(vcov2_emp_RV_UV))
#Unemp
reg_2017_Unemp_RV_UV <- lm(unemp_2017_RV_UV, data = Data_2017)
vcov2_Unemp_RV_UV <- vcovHC(reg_2017_Unemp_RV_UV, type = "HC0")
se2imp_Unemp_RV_UV <- sqrt(diag(vcov2_Unemp_RV_UV))
stargazer(reg_2017_GRP_RV_UV, reg_2017_LabPr_RV_UV, reg_2017_emp_RV_UV, reg_2017_Unemp_RV_UV, se = list(se2imp_GRP_RV_UV, se2imp_LabPr_RV_UV, se2imp_emp_RV_UV,se2imp_Unemp_RV_UV), type = 'text', omit = c('Constant'), keep.stat = c("n"), font.size = "tiny")
##
## ===============================================================
## Dependent variable:
## -------------------------------------------------
## log(GRP_2017) LabPr_2017 EmpPerWF_2017 Unemp_2017
## (1) (2) (3) (4)
## ---------------------------------------------------------------
## Rv_2017 0.182 -0.119 -0.0001 0.516
## (0.124) (0.499) (0.001) (0.451)
##
## Uv_2017 0.782*** 0.517 0.006** -1.102
## (0.173) (0.640) (0.003) (0.857)
##
## log(Inv_2017) 0.693*** -0.244 0.007*** -1.929***
## (0.159) (0.603) (0.002) (0.591)
##
## educ_2017 0.002 -0.004 -0.00003* -0.007*
## (0.002) (0.004) (0.00002) (0.004)
##
## Wage_2017 -0.007 -0.298 0.00002 -0.086
## (0.053) (0.194) (0.001) (0.150)
##
## ---------------------------------------------------------------
## Observations 79 79 79 79
## ===============================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
#GRP
reg_2017_GRP_V <- lm(grp_2017_V, data = Data_2017)
vcov2_GRP_V <- vcovHC(reg_2017_GRP_V, type = "HC0")
se2imp_GRP_V <- sqrt(diag(vcov2_GRP_V))
#LabPr
reg_2017_LabPr_V <- lm(labpr_2017_V, data = Data_2017)
vcov2_LabPr_V <- vcovHC(reg_2017_LabPr_V, type = "HC0")
se2imp_LabPr_V <- sqrt(diag(vcov2_LabPr_V))
#emp
reg_2017_emp_V <- lm(emp_2017_V, data = Data_2017)
vcov2_emp_V <- vcovHC(reg_2017_emp_V, type = "HC0")
se2imp_emp_V <- sqrt(diag(vcov2_emp_V))
#Unemp
reg_2017_Unemp_V <- lm(unemp_2017_V, data = Data_2017)
vcov2_Unemp_V <- vcovHC(reg_2017_Unemp_V, type = "HC0")
se2imp_Unemp_V <- sqrt(diag(vcov2_Unemp_V))
stargazer(reg_2017_GRP_V, reg_2017_LabPr_V, reg_2017_emp_V, reg_2017_Unemp_V, se = list(se2imp_GRP_V, se2imp_LabPr_V, se2imp_emp_V,se2imp_Unemp_V), type = "text", omit = c('Constant'), keep.stat = c("n"), font.size = "tiny") #, font.size = "tiny"
##
## ===============================================================
## Dependent variable:
## -------------------------------------------------
## log(GRP_2017) LabPr_2017 EmpPerWF_2017 Unemp_2017
## (1) (2) (3) (4)
## ---------------------------------------------------------------
## V_2017 0.452*** 0.168 0.003** -0.212
## (0.093) (0.299) (0.001) (0.489)
##
## log(Inv_2017) 0.715*** -0.221 0.008*** -1.988***
## (0.156) (0.591) (0.002) (0.595)
##
## educ_2017 0.003** -0.003 -0.00002 -0.009***
## (0.001) (0.003) (0.00001) (0.003)
##
## Wage_2017 -0.020 -0.312 -0.0001 -0.050
## (0.052) (0.195) (0.001) (0.144)
##
## ---------------------------------------------------------------
## Observations 79 79 79 79
## ===============================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
stargazer(reg_2017_GRP_V, reg_2017_LabPr_V, reg_2017_emp_V, reg_2017_Unemp_V,
reg_2017_GRP_RV_UV, reg_2017_LabPr_RV_UV, reg_2017_emp_RV_UV, reg_2017_Unemp_RV_UV,
se = list(se2imp_GRP_V, se2imp_LabPr_V, se2imp_emp_V, se2imp_Unemp_V,
se2imp_GRP_RV_UV, se2imp_LabPr_RV_UV, se2imp_emp_RV_UV, se2imp_Unemp_RV_UV),
type = "text", omit = c('Constant'), keep.stat = c("n"), font.size = "tiny", title = 'OLS crossection models', add.lines = list(c('model', 'OLS', 'OLS', 'OLS', 'OLS', 'OLS', 'OLS', 'OLS', 'OLS')), header = FALSE)
##
## OLS crossection models
## =================================================================================================================
## Dependent variable:
## ---------------------------------------------------------------------------------------------------
## log(GRP_2017) LabPr_2017 EmpPerWF_2017 Unemp_2017 log(GRP_2017) LabPr_2017 EmpPerWF_2017 Unemp_2017
## (1) (2) (3) (4) (5) (6) (7) (8)
## -----------------------------------------------------------------------------------------------------------------
## V_2017 0.452*** 0.168 0.003** -0.212
## (0.093) (0.299) (0.001) (0.489)
##
## Rv_2017 0.182 -0.119 -0.0001 0.516
## (0.124) (0.499) (0.001) (0.451)
##
## Uv_2017 0.782*** 0.517 0.006** -1.102
## (0.173) (0.640) (0.003) (0.857)
##
## log(Inv_2017) 0.715*** -0.221 0.008*** -1.988*** 0.693*** -0.244 0.007*** -1.929***
## (0.156) (0.591) (0.002) (0.595) (0.159) (0.603) (0.002) (0.591)
##
## educ_2017 0.003** -0.003 -0.00002 -0.009*** 0.002 -0.004 -0.00003* -0.007*
## (0.001) (0.003) (0.00001) (0.003) (0.002) (0.004) (0.00002) (0.004)
##
## Wage_2017 -0.020 -0.312 -0.0001 -0.050 -0.007 -0.298 0.00002 -0.086
## (0.052) (0.195) (0.001) (0.144) (0.053) (0.194) (0.001) (0.150)
##
## -----------------------------------------------------------------------------------------------------------------
## model OLS OLS OLS OLS OLS OLS OLS OLS
## Observations 79 79 79 79 79 79 79 79
## =================================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
write_xlsx(Data_2017,
'C:/Users/Kolya/Documents/New_studies/Term_paper/Data/Raw data/Data_2017.xlsx')
write_xlsx(Data_2006,
'C:/Users/Kolya/Documents/New_studies/Term_paper/Data/Raw data/Data_2006.xlsx')
write_xlsx(Common_dataset_new,
'C:/Users/Kolya/Documents/New_studies/Term_paper/Data/Raw data/Common_dataset_new.xlsx')
write_xlsx(Common_dataset_new_2006_2008,
'C:/Users/Kolya/Documents/New_studies/Term_paper/Data/Raw data/Common_dataset_new_2006_2008.xlsx')
write_xlsx(Common_dataset_new_2009_2014,
'C:/Users/Kolya/Documents/New_studies/Term_paper/Data/Raw data/Common_dataset_new_2009_2014.xlsx')
write_xlsx(Common_dataset_new_2015_2017,
'C:/Users/Kolya/Documents/New_studies/Term_paper/Data/Raw data/Common_dataset_new_2015_2017.xlsx')