Hereafter I use this tutorial http://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/112-pca-principal-component-analysis-essentials/
##They account for 58% of the whole vaariace of all 37 variables. Not so bad!
pca<-prcomp(df_all[,-c( 33,39:42)], scale = FALSE, center = TRUE) #remove codigos e municipios UF and IGM
summary(pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.4282 1.62233 1.49264 1.36560 1.26994 1.17130 1.11757
## Proportion of Variance 0.1815 0.08103 0.06859 0.05741 0.04965 0.04224 0.03845
## Cumulative Proportion 0.1815 0.26254 0.33113 0.38854 0.43819 0.48043 0.51888
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Standard deviation 1.04358 1.03013 0.95416 0.93584 0.92555 0.89666 0.88580
## Proportion of Variance 0.03353 0.03267 0.02803 0.02696 0.02637 0.02475 0.02416
## Cumulative Proportion 0.55240 0.58507 0.61310 0.64006 0.66644 0.69119 0.71534
## PC15 PC16 PC17 PC18 PC19 PC20 PC21
## Standard deviation 0.8737 0.84613 0.81913 0.78447 0.7667 0.74767 0.74244
## Proportion of Variance 0.0235 0.02204 0.02066 0.01895 0.0181 0.01721 0.01697
## Cumulative Proportion 0.7388 0.76089 0.78154 0.80049 0.8186 0.83579 0.85276
## PC22 PC23 PC24 PC25 PC26 PC27 PC28
## Standard deviation 0.71047 0.69726 0.65871 0.65248 0.64119 0.61177 0.59460
## Proportion of Variance 0.01554 0.01497 0.01336 0.01311 0.01266 0.01152 0.01088
## Cumulative Proportion 0.86830 0.88327 0.89663 0.90974 0.92239 0.93391 0.94480
## PC29 PC30 PC31 PC32 PC33 PC34 PC35
## Standard deviation 0.56632 0.52499 0.50921 0.47775 0.46617 0.42087 0.38352
## Proportion of Variance 0.00987 0.00849 0.00798 0.00703 0.00669 0.00545 0.00453
## Cumulative Proportion 0.95467 0.96316 0.97114 0.97817 0.98486 0.99031 0.99484
## PC36 PC37
## Standard deviation 0.32435 0.24997
## Proportion of Variance 0.00324 0.00192
## Cumulative Proportion 0.99808 1.00000
names(pca)
## [1] "sdev" "rotation" "center" "scale" "x"
ncomp<-9 # lets keep 9 components for this test because they are greater than 1 in eigenvalue
rawLoadings<- pca$rotation[,1:ncomp] %*% diag(pca$sdev, ncomp, ncomp)
rotatedLoadings <- varimax(rawLoadings)$loadings# loadings Varima rotated
rotatedLoadings # We mjust use these to interpret dimensions
##
## Loadings:
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## u5_mor 0.542 0.192 0.145 0.174 0.116
## wat_dis -0.178 -0.245 -0.180
## sev_dis -0.102 0.110 0.107 0.403
## big_hh 0.197 0.122 -0.101 0.701 0.222
## fem_hh 0.185 0.257 -0.164 0.238 0.465 0.224 0.164
## ill 0.571 0.204 0.374 -0.288
## edu_bas 0.825 0.198 -0.122 -0.112
## edu_hig 0.645 0.301 -0.105 -0.222 0.111
## no_wor 0.608 0.213 -0.167 0.406 -0.219
## chi_wor 0.632 0.187 0.172 -0.254 0.427
## inf_wor 0.725 0.344 -0.140 0.112 -0.213
## dep 0.266 0.722 0.167 0.154
## hh_pen -0.140 0.580 -0.240 -0.343
## two_job 0.263 0.287 0.117 -0.514 0.116
## mul_pla -0.176 -0.311 0.220
## hal_min_wag 0.429 0.721 -0.137 0.102
## emp_men -0.236 0.345 -0.620
## gin -0.164 -0.206 0.574 0.283 -0.191
## spr 0.117 0.710 -0.107
## wat_bod 0.159 -0.124
## wel -0.576 0.167
## cis -0.352 -0.767 0.191
## no_sew 0.243 0.468 0.231 0.226 0.326 -0.145
## riv_dis -0.236 0.271 0.125 -0.134
## abs_dom -0.228 -0.116 0.125 -0.712 0.218
## abs_ind -0.138 -0.786
## abs_agri -0.122 0.787
## drought -0.115 -0.481 0.518 -0.161 -0.135
## q95 -0.135 -0.328
## blu_wat -0.552 -0.295 0.132 0.105
## ret_q 0.147 0.222 -0.333
## aridity -0.100 0.796 -0.113 0.132 0.187
## FINANÇAS 0.106 -0.803 -0.147
## GESTÃO -0.159 0.116 -0.780
## DESEMPENHO -0.254 -0.267 -0.544 -0.127 0.219
## def_total 0.237 0.716 0.133 -0.187 0.104
## taxa_desmat_mun -0.258 0.542 -0.125 -0.179
##
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## SS loadings 3.861 2.509 1.823 1.399 1.644 3.311 1.510 1.452 1.494
## Proportion Var 0.104 0.068 0.049 0.038 0.044 0.089 0.041 0.039 0.040
## Cumulative Var 0.104 0.172 0.221 0.259 0.304 0.393 0.434 0.473 0.514
invLoadings <- t(pracma::pinv(rotatedLoadings))
scores <- scale(df_all[,-c(33,39:42)]) %*% invLoadings # Note the df_all is the same of that used for PCA, excluding the same columns
#scores # Scores computed via rotated loadings and that must be used for generatiing maps
mun_scores<-as_tibble(data.frame(df_all[,39:42], scores)) # creates the table with all municipalities with scores
#colnames(mun_scores)
mun_scores # This is the tibble to be used for generation maps Where X are PCs
## # A tibble: 1,153 x 13
## code_short CODIBGE_full Município UF X1 X2 X3 X4 X5
## <int> <int> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 221100 2211001 TERESINA PI -1.86 -0.274 -0.0449 -1.83 0.108
## 2 240800 2408003 MOSSORO RN -2.36 0.0675 1.53 -1.30 0.922
## 3 240200 2402006 CAICO RN -2.77 0.178 1.26 -1.56 0.730
## 4 250400 2504009 CAMPINA … PB -2.11 0.665 1.11 0.380 0.552
## 5 220390 2203909 FLORIANO PI -2.73 -0.714 -0.833 -1.28 0.884
## 6 241200 2412005 SAO GONC… RN -1.95 1.75 -2.12 -0.469 -0.380
## 7 260120 2601201 ARCOVERDE PE -2.78 0.381 1.10 -0.910 -0.411
## 8 241240 2412401 SAO JOSE… RN -2.24 0.877 0.827 1.23 -1.28
## 9 291170 2911709 GUANAMBI BA -1.43 0.798 0.692 -1.08 -1.26
## 10 313520 3135209 JANUARIA MG -2.75 0.0412 0.757 -1.10 0.306
## # … with 1,143 more rows, and 4 more variables: X6 <dbl>, X7 <dbl>, X8 <dbl>,
## # X9 <dbl>
#write.csv(mun_scores, "mun_scores.csv") # This will be used further for the maps
# Dimesnion 1 (Human capital)
mun_scores_sub %>%
mutate(X1= scale(X1)) %>%
select(X1, geom) %>%
as.data.frame() %>%
mutate(category=cut(X1, breaks=c(-Inf, -1.5, -0.5, 0.5, 1.5, Inf),
labels=c("very low","low","middle","high", "very high")))->dim1
ggplot()+ geom_sf(data=dim1$geom, aes(fill = fct_rev(dim1$category)), lwd = 0)+
scale_fill_brewer(palette="RdYlBu")+
geom_sf(data=uf_caat, fill="transparent")+
coord_sf(expand = FALSE)+
labs(subtitle="Human capital", size=8, fill = "Vulnerability") +
theme_map()+
theme(legend.position = "none")->dim1.graf
dim1.graf
mun_scores_sub %>%
mutate(X2= scale(X2)) %>% #negative cardinality
dplyr::select(X2, geom) %>%
as.data.frame() %>%
mutate(category=cut(X2, breaks=c(-Inf, -1.5, -0.5, 0.5, 1.5, Inf),
labels=c("very low","low","middle","high", "very high")))->dim2
ggplot()+ geom_sf(data=dim2$geom, aes(fill = fct_rev(dim2$category)), lwd = 0)+
scale_fill_brewer(palette="RdYlBu")+
geom_sf(data=uf_caat, fill="transparent")+
coord_sf(expand = FALSE)+
labs(subtitle="Environmental conditions", size=8, fill = "Vulnerability") +
theme_map()+
theme(legend.position = "none")->dim2.graf
dim2.graf
mun_scores_sub %>%
mutate(X3= scale(X3)) %>%
select(X3, geom) %>%
as.data.frame() %>%
mutate(category=cut(X3, breaks=c(-Inf, -1.5, -0.5, 0.5, 1.5, Inf),
labels=c("very low","low","middle","high", "very high")))->dim3
ggplot()+ geom_sf(data=dim3$geom, aes(fill = fct_rev(dim3$category)), lwd = 0)+
scale_fill_brewer(palette="RdYlBu")+
geom_sf(data=uf_caat, fill="transparent")+
coord_sf(expand = FALSE)+
labs(subtitle="Drought exposure", size=8, fill = "Vulnerability") +
theme_map()+
theme(legend.position = "none")->dim3.graf
dim3.graf
mun_scores_sub %>%
mutate(X4= scale(X4)) %>% # already inverted as variable
select(X4, geom) %>%
as.data.frame() %>%
mutate(category=cut(X4, breaks=c(-Inf, -1.5, -0.5, 0.5, 1.5, Inf),
labels=c("very low","low","medium","high", "very high")))->dim4
ggplot()+ geom_sf(data=dim4$geom, aes(fill = fct_rev(dim4$category)), lwd = 0)+
scale_fill_brewer(palette="RdYlBu")+
geom_sf(data=uf_caat, fill="transparent")+
coord_sf(expand = FALSE)+
labs(subtitle="Spring protection", size=8, fill = "Vulnerability") +
theme_map()+
theme(legend.position = "none")->dim4.graf
dim4.graf
mun_scores_sub %>%
mutate(X5= scale(X5)) %>% # Cardinality is negative and variableas are not inverted, so bad governane must have positive value and very high category
select(X5, geom) %>%
as.data.frame() %>%
mutate(category=cut(X5, breaks=c(-Inf, -1.5, -0.5, 0.5, 1.5, Inf), # bad values are positive,
labels=c("very low","low","medium","high", "very high")))->dim5
ggplot()+ geom_sf(data=dim5$geom, aes(fill = fct_rev(dim5$category)), lwd = 0)+
scale_fill_brewer(palette="RdYlBu")+
geom_sf(data=uf_caat, fill="transparent")+
coord_sf(expand = FALSE)+
labs(subtitle="Municipal Governance", size=8, fill = "Vulnerability") +
theme_map()+
theme(legend.position = "none")->dim5.graf
dim5.graf
mun_scores_sub %>%
mutate(X6= scale(X6)) %>%
select(X6, geom) %>%
as.data.frame() %>%
mutate(category=cut(X6, breaks=c(-Inf, -1.5, -0.5, 0.5, 1.5, Inf),
labels=c("very low","low","middle","high", "very high")))->dim6
ggplot()+ geom_sf(data=dim6$geom, aes(fill = fct_rev(dim6$category)), lwd = 0)+
scale_fill_brewer(palette="RdYlBu")+
geom_sf(data=uf_caat, fill="transparent")+
coord_sf(expand = FALSE)+
labs(subtitle="Deprivation", size=8, fill = "Vulnerability") +
theme_map()+
theme(legend.position = "none")->dim6.graf
dim6.graf
mun_scores_sub %>%
mutate(X7= scale(X7*-1)) %>% # Cardinality negative, so very small negative values denotes high vulnerability
select(X7, geom) %>%
as.data.frame() %>%
mutate(category=cut(X7, breaks=c(-Inf, -1.5, -0.5, 0.5, 1.5, Inf),
labels=c("very low","low","middle","high", "very high")))->dim7
ggplot()+ geom_sf(data=dim7$geom, aes(fill = fct_rev(dim7$category)), lwd = 0)+
scale_fill_brewer(palette="RdYlBu")+
geom_sf(data=uf_caat, fill="transparent")+
coord_sf(expand = FALSE)+
labs(subtitle="Labour inequity", size=8, fill = "Vulnerability") +
theme_map()+
theme(legend.position = "none")->dim7.graf
dim7.graf
mun_scores_sub %>%
mutate(X8= scale(X8*-1)) %>% # Cardinality negetive
select(X8, geom) %>%
as.data.frame() %>%
mutate(category=cut(X8, breaks=c(-Inf, -1.5, -0.5, 0.5, 1.5, Inf),
labels=c("very low","low","middle","high", "very high")))->dim8
ggplot()+ geom_sf(data=dim8$geom, aes(fill = fct_rev(dim8$category)), lwd = 0)+
scale_fill_brewer(palette="RdYlBu")+
geom_sf(data=uf_caat, fill="transparent")+
coord_sf(expand = FALSE)+
labs(subtitle="Domestic and industrial water-use", size=8, fill = "Vulnerability") +
theme_map()+
theme(legend.position = "none")->dim8.graf
dim8.graf
mun_scores_sub %>%
mutate(X9= scale(X9)) %>% # Cardinality is positive, so, greater numbers more abstraction and high vunlerability???
select(X9, geom) %>%
as.data.frame() %>%
mutate(category=cut(X9, breaks=c(-Inf, -1.5, -0.5, 0.5, 1.5, Inf),
labels=c("very low","low","middle","high", "very high")))->dim9
ggplot()+ geom_sf(data=dim9$geom, aes(fill = fct_rev(dim9$category)), lwd = 0)+
scale_fill_brewer(palette="RdYlBu")+
geom_sf(data=uf_caat, fill="transparent")+
coord_sf(expand = FALSE)+
labs(subtitle="Agricultural water-use", size=8, fill = "Vulnerability") +
theme_map()+
theme(legend.position = "none")->dim9.graf
dim9.graf
data.frame(dim1$X1,dim2$X2,dim3$X3,dim4$X4,dim5$X5,dim6$X6,dim7$X7,dim8$X8,dim9$X9) -> wivi_score
names(wivi_score)<-c("PC1","PC2","PC3","PC4","PC5","PC6","PC7","PC8","PC9")
wivi_score$wivi_score<-scale(rowSums(wivi_score))
summary(wivi_score) # All dimensions and final score are scaled to mean = 0 and sd =1 as "z scores"
## PC1 PC2 PC3 PC4
## Min. :-3.07390 Min. :-2.79065 Min. :-3.39416 Min. :-2.56057
## 1st Qu.:-0.71193 1st Qu.:-0.66157 1st Qu.:-0.59772 1st Qu.:-0.72556
## Median :-0.01468 Median : 0.05567 Median : 0.08418 Median :-0.05734
## Mean : 0.00000 Mean : 0.00000 Mean : 0.00000 Mean : 0.00000
## 3rd Qu.: 0.69500 3rd Qu.: 0.75733 3rd Qu.: 0.67517 3rd Qu.: 0.69117
## Max. : 3.80400 Max. : 2.43197 Max. : 2.87160 Max. : 3.80393
## PC5 PC6 PC7 PC8
## Min. :-2.4645 Min. :-2.61033 Min. :-2.968531 Min. :-1.56770
## 1st Qu.:-0.7155 1st Qu.:-0.69993 1st Qu.:-0.656200 1st Qu.:-0.41286
## Median :-0.1142 Median :-0.01592 Median :-0.002864 Median :-0.07122
## Mean : 0.0000 Mean : 0.00000 Mean : 0.000000 Mean : 0.00000
## 3rd Qu.: 0.6231 3rd Qu.: 0.70802 3rd Qu.: 0.636380 3rd Qu.: 0.23857
## Max. : 4.5709 Max. : 2.94660 Max. : 3.185575 Max. :17.80678
## PC9 wivi_score.V1
## Min. :-1.6643 Min. :-2.934013
## 1st Qu.:-0.5135 1st Qu.:-0.689215
## Median :-0.1636 Median :-0.019484
## Mean : 0.0000 Mean : 0.000000
## 3rd Qu.: 0.2400 3rd Qu.: 0.631153
## Max. :12.6792 Max. : 6.291282
wivi_score %>%
mutate(category=cut(wivi_score, breaks=c(-Inf, -1.5, -0.5, 0.5, 1.5, Inf),
labels=c("very low","low","medium","high", "very high")))->wivi_score_cat # create categories of vunlerability as Hummell's work
ggplot()+ geom_sf(data=mun_scores_sub, aes(fill = fct_rev(wivi_score_cat$category)),lwd=0)+
scale_fill_brewer(palette="RdYlBu")+
geom_sf(data=uf_caat, fill="transparent")+
labs(subtitle="Water Vulnerability Index", size=8, fill = "Vulnerability") +
theme_minimal()->dimall.graf
dimall.graf
library(cowplot)
##
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
## default ggplot2 theme anymore. To recover the previous
## behavior, execute:
## theme_set(theme_cowplot())
## ********************************************************
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggthemes':
##
## theme_map
a<-plot_grid(dim1.graf, dim2.graf, dim3.graf, dim4.graf, dim5.graf, dim6.graf, dim7.graf, dim8.graf, dim9.graf, ncol=3, nrow=3)
b<-plot_grid(a,dimall.graf, ncol=2)
b
mun_score_all %>%
mutate(category2=cut(wivi_score, breaks=c(-Inf, -1.5, -0.5, 0.5, Inf),
labels=c("very low","low","meidum","vulnerable"))) %>%
group_by(UF, category2) %>%
summarise(population=sum(pop), n_mun=n(), perc_mun_hh=n()/298*100, perc_pop_hh=sum(pop)%in%(category2=="vulnerable")/sum(pop)*100) %>%
filter(category2=="vulnerable") -> table_pop
kable(table_pop)
| UF | category2 | population | n_mun | perc_mun_hh | perc_pop_hh |
|---|---|---|---|---|---|
| AL | vulnerable | 536499 | 29 | 9.731544 | 0 |
| BA | vulnerable | 1512236 | 68 | 22.818792 | 0 |
| CE | vulnerable | 817328 | 25 | 8.389262 | 0 |
| MG | vulnerable | 102993 | 5 | 1.677852 | 0 |
| PB | vulnerable | 876760 | 64 | 21.476510 | 0 |
| PE | vulnerable | 1841477 | 46 | 15.436242 | 0 |
| PI | vulnerable | 1031231 | 30 | 10.067114 | 0 |
| RN | vulnerable | 887557 | 54 | 18.120805 | 0 |
| SE | vulnerable | 158659 | 6 | 2.013423 | 0 |
df_graf.bar%>%
mutate(pol=if_else(mean_wivi<0, "low vulnerability", "high vulnerability")) %>%
ggplot(aes(dim, mean_wivi, color= pol))+
#scale_fill_brewer(palette = "RdYlBu")+
geom_point(size=3)+geom_hline(aes(yintercept = 0), linetype="dashed")+ coord_flip()+
geom_errorbar(aes(ymin=mean_wivi-se_wivi, ymax=mean_wivi+se_wivi), size=0.3) +
labs(x="Dimension", y="Vulnerability (mean error)", color= NULL)+
theme_bw ()+
theme(legend.position = "top")+
facet_wrap(.~ UF)->per_uf_wivi
per_uf_wivi