Scritp for PCA WIVI

Creating data and checking inconsistencies

We retain 9 variables because they present Eigenvalues greater than “1”.

##They account for 58% of the whole vaariace of all 37 variables. Not so bad!

pca<-prcomp(df_all[,-c( 33,39:42)], scale = FALSE, center = TRUE) #remove codigos e municipios UF and IGM
summary(pca)
## Importance of components:
##                           PC1     PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.4282 1.62233 1.49264 1.36560 1.26994 1.17130 1.11757
## Proportion of Variance 0.1815 0.08103 0.06859 0.05741 0.04965 0.04224 0.03845
## Cumulative Proportion  0.1815 0.26254 0.33113 0.38854 0.43819 0.48043 0.51888
##                            PC8     PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     1.04358 1.03013 0.95416 0.93584 0.92555 0.89666 0.88580
## Proportion of Variance 0.03353 0.03267 0.02803 0.02696 0.02637 0.02475 0.02416
## Cumulative Proportion  0.55240 0.58507 0.61310 0.64006 0.66644 0.69119 0.71534
##                          PC15    PC16    PC17    PC18   PC19    PC20    PC21
## Standard deviation     0.8737 0.84613 0.81913 0.78447 0.7667 0.74767 0.74244
## Proportion of Variance 0.0235 0.02204 0.02066 0.01895 0.0181 0.01721 0.01697
## Cumulative Proportion  0.7388 0.76089 0.78154 0.80049 0.8186 0.83579 0.85276
##                           PC22    PC23    PC24    PC25    PC26    PC27    PC28
## Standard deviation     0.71047 0.69726 0.65871 0.65248 0.64119 0.61177 0.59460
## Proportion of Variance 0.01554 0.01497 0.01336 0.01311 0.01266 0.01152 0.01088
## Cumulative Proportion  0.86830 0.88327 0.89663 0.90974 0.92239 0.93391 0.94480
##                           PC29    PC30    PC31    PC32    PC33    PC34    PC35
## Standard deviation     0.56632 0.52499 0.50921 0.47775 0.46617 0.42087 0.38352
## Proportion of Variance 0.00987 0.00849 0.00798 0.00703 0.00669 0.00545 0.00453
## Cumulative Proportion  0.95467 0.96316 0.97114 0.97817 0.98486 0.99031 0.99484
##                           PC36    PC37
## Standard deviation     0.32435 0.24997
## Proportion of Variance 0.00324 0.00192
## Cumulative Proportion  0.99808 1.00000
names(pca)
## [1] "sdev"     "rotation" "center"   "scale"    "x"
ncomp<-9 # lets keep 9 components for this test because they are greater than 1 in eigenvalue

Now we must rotate loadings to improve interpretation of the dimensions

rawLoadings<- pca$rotation[,1:ncomp] %*% diag(pca$sdev, ncomp, ncomp)
rotatedLoadings <- varimax(rawLoadings)$loadings# loadings Varima rotated
rotatedLoadings # We mjust use these to interpret dimensions
## 
## Loadings:
##                 [,1]   [,2]   [,3]   [,4]   [,5]   [,6]   [,7]   [,8]   [,9]  
## u5_mor           0.542                0.192         0.145  0.174  0.116       
## wat_dis                -0.178        -0.245        -0.180                     
## sev_dis                       -0.102  0.110         0.107  0.403              
## big_hh           0.197  0.122 -0.101                0.701                0.222
## fem_hh           0.185  0.257 -0.164  0.238         0.465  0.224         0.164
## ill              0.571                0.204         0.374               -0.288
## edu_bas          0.825                              0.198 -0.122        -0.112
## edu_hig          0.645  0.301 -0.105                      -0.222  0.111       
## no_wor           0.608         0.213 -0.167         0.406               -0.219
## chi_wor          0.632  0.187  0.172 -0.254                0.427              
## inf_wor          0.725                              0.344 -0.140  0.112 -0.213
## dep              0.266                              0.722  0.167  0.154       
## hh_pen                               -0.140         0.580 -0.240        -0.343
## two_job          0.263  0.287                       0.117 -0.514         0.116
## mul_pla         -0.176                             -0.311  0.220              
## hal_min_wag      0.429                              0.721 -0.137  0.102       
## emp_men                -0.236         0.345               -0.620              
## gin                    -0.164        -0.206         0.574  0.283 -0.191       
## spr                            0.117  0.710        -0.107                     
## wat_bod          0.159                                                  -0.124
## wel                    -0.576         0.167                                   
## cis                    -0.352 -0.767                                     0.191
## no_sew           0.243  0.468  0.231  0.226         0.326               -0.145
## riv_dis                       -0.236  0.271  0.125                      -0.134
## abs_dom         -0.228               -0.116                0.125 -0.712  0.218
## abs_ind                       -0.138                             -0.786       
## abs_agri                                                  -0.122         0.787
## drought         -0.115 -0.481  0.518               -0.161 -0.135              
## q95                                                -0.135               -0.328
## blu_wat                -0.552        -0.295                0.132  0.105       
## ret_q            0.147                                            0.222 -0.333
## aridity                -0.100  0.796               -0.113         0.132  0.187
## FINANÇAS                              0.106 -0.803               -0.147       
## GESTÃO          -0.159         0.116        -0.780                            
## DESEMPENHO      -0.254               -0.267 -0.544 -0.127         0.219       
## def_total        0.237  0.716         0.133        -0.187  0.104              
## taxa_desmat_mun -0.258  0.542                             -0.125        -0.179
## 
##                 [,1]  [,2]  [,3]  [,4]  [,5]  [,6]  [,7]  [,8]  [,9]
## SS loadings    3.861 2.509 1.823 1.399 1.644 3.311 1.510 1.452 1.494
## Proportion Var 0.104 0.068 0.049 0.038 0.044 0.089 0.041 0.039 0.040
## Cumulative Var 0.104 0.172 0.221 0.259 0.304 0.393 0.434 0.473 0.514

Now, we need to generate the scores of each municipality based on “rotated dimanions”

invLoadings     <- t(pracma::pinv(rotatedLoadings))
scores          <- scale(df_all[,-c(33,39:42)]) %*% invLoadings # Note the df_all is the same of that used for PCA, excluding the same columns
#scores                   # Scores computed via rotated loadings and that must be used for generatiing maps
mun_scores<-as_tibble(data.frame(df_all[,39:42], scores)) # creates the table with all municipalities with scores
#colnames(mun_scores)
mun_scores # This is the tibble to be used for generation maps Where X are PCs
## # A tibble: 1,153 x 13
##    code_short CODIBGE_full Município UF       X1      X2      X3     X4     X5
##         <int>        <int> <fct>     <fct> <dbl>   <dbl>   <dbl>  <dbl>  <dbl>
##  1     221100      2211001 TERESINA  PI    -1.86 -0.274  -0.0449 -1.83   0.108
##  2     240800      2408003 MOSSORO   RN    -2.36  0.0675  1.53   -1.30   0.922
##  3     240200      2402006 CAICO     RN    -2.77  0.178   1.26   -1.56   0.730
##  4     250400      2504009 CAMPINA … PB    -2.11  0.665   1.11    0.380  0.552
##  5     220390      2203909 FLORIANO  PI    -2.73 -0.714  -0.833  -1.28   0.884
##  6     241200      2412005 SAO GONC… RN    -1.95  1.75   -2.12   -0.469 -0.380
##  7     260120      2601201 ARCOVERDE PE    -2.78  0.381   1.10   -0.910 -0.411
##  8     241240      2412401 SAO JOSE… RN    -2.24  0.877   0.827   1.23  -1.28 
##  9     291170      2911709 GUANAMBI  BA    -1.43  0.798   0.692  -1.08  -1.26 
## 10     313520      3135209 JANUARIA  MG    -2.75  0.0412  0.757  -1.10   0.306
## # … with 1,143 more rows, and 4 more variables: X6 <dbl>, X7 <dbl>, X8 <dbl>,
## #   X9 <dbl>
#write.csv(mun_scores, "mun_scores.csv") # This will be used further for the maps

Plotting PCA

We do note really need to go deeper into PCA as we already have the interpretation of each dimension after rotation as above.

Better to explore maps and figures

Dimesnion 1 (Human Capital)

# Dimesnion 1 (Human capital)
mun_scores_sub %>%
  mutate(X1= scale(X1)) %>% 
  select(X1, geom) %>% 
  as.data.frame() %>% 
  mutate(category=cut(X1, breaks=c(-Inf, -1.5, -0.5, 0.5, 1.5, Inf), 
                    labels=c("very low","low","middle","high", "very high")))->dim1

ggplot()+  geom_sf(data=dim1$geom, aes(fill = fct_rev(dim1$category)), lwd = 0)+
  scale_fill_brewer(palette="RdYlBu")+
  geom_sf(data=uf_caat, fill="transparent")+
  coord_sf(expand = FALSE)+
  labs(subtitle="Human capital", size=8, fill = "Vulnerability") +
  theme_map()+
  theme(legend.position = "none")->dim1.graf
dim1.graf

Dimesnion 2 (Environemntal)

mun_scores_sub %>%
  mutate(X2= scale(X2)) %>% #negative cardinality
  dplyr::select(X2, geom) %>% 
  as.data.frame() %>% 
  mutate(category=cut(X2, breaks=c(-Inf, -1.5, -0.5, 0.5, 1.5, Inf), 
                      labels=c("very low","low","middle","high", "very high")))->dim2

ggplot()+  geom_sf(data=dim2$geom, aes(fill = fct_rev(dim2$category)), lwd = 0)+
  scale_fill_brewer(palette="RdYlBu")+
  geom_sf(data=uf_caat, fill="transparent")+
  coord_sf(expand = FALSE)+
  labs(subtitle="Environmental conditions", size=8, fill = "Vulnerability") +
  theme_map()+
  theme(legend.position = "none")->dim2.graf
dim2.graf

Dimesnion 3 (Drought)

mun_scores_sub %>%
  mutate(X3= scale(X3)) %>% 
  select(X3, geom) %>% 
  as.data.frame() %>% 
  mutate(category=cut(X3, breaks=c(-Inf, -1.5, -0.5, 0.5, 1.5, Inf), 
                      labels=c("very low","low","middle","high", "very high")))->dim3
ggplot()+  geom_sf(data=dim3$geom, aes(fill = fct_rev(dim3$category)), lwd = 0)+
  scale_fill_brewer(palette="RdYlBu")+
  geom_sf(data=uf_caat, fill="transparent")+
  coord_sf(expand = FALSE)+
  labs(subtitle="Drought exposure", size=8, fill = "Vulnerability") +
  theme_map()+
  theme(legend.position = "none")->dim3.graf
dim3.graf

Dimesnion 4 (springs - Environmental protection)

mun_scores_sub %>%
  mutate(X4= scale(X4)) %>% # already inverted as variable
  select(X4, geom) %>% 
  as.data.frame() %>% 
  mutate(category=cut(X4, breaks=c(-Inf, -1.5, -0.5, 0.5, 1.5, Inf), 
                      labels=c("very low","low","medium","high", "very high")))->dim4
ggplot()+  geom_sf(data=dim4$geom, aes(fill = fct_rev(dim4$category)), lwd = 0)+
  scale_fill_brewer(palette="RdYlBu")+
  geom_sf(data=uf_caat, fill="transparent")+
  coord_sf(expand = FALSE)+
  labs(subtitle="Spring protection", size=8, fill = "Vulnerability") +
  theme_map()+
  theme(legend.position = "none")->dim4.graf
dim4.graf

Dimesnion 5 (Governance )

mun_scores_sub %>%
  mutate(X5= scale(X5)) %>%  # Cardinality is negative and variableas are not inverted, so bad governane must have positive value and very high category
  select(X5, geom) %>% 
  as.data.frame() %>% 
  mutate(category=cut(X5, breaks=c(-Inf, -1.5, -0.5, 0.5, 1.5, Inf), # bad values are positive,
                      labels=c("very low","low","medium","high", "very high")))->dim5

ggplot()+  geom_sf(data=dim5$geom, aes(fill = fct_rev(dim5$category)), lwd = 0)+
  scale_fill_brewer(palette="RdYlBu")+
  geom_sf(data=uf_caat, fill="transparent")+
  coord_sf(expand = FALSE)+
  labs(subtitle="Municipal Governance", size=8, fill = "Vulnerability") +
  theme_map()+
  theme(legend.position = "none")->dim5.graf
dim5.graf

Dimesnion 6 - Porverty

 mun_scores_sub %>%
  mutate(X6= scale(X6)) %>% 
  select(X6, geom) %>% 
  as.data.frame() %>% 
  mutate(category=cut(X6, breaks=c(-Inf, -1.5, -0.5, 0.5, 1.5, Inf), 
                      labels=c("very low","low","middle","high", "very high")))->dim6

ggplot()+  geom_sf(data=dim6$geom, aes(fill = fct_rev(dim6$category)), lwd = 0)+
  scale_fill_brewer(palette="RdYlBu")+
  geom_sf(data=uf_caat, fill="transparent")+
  coord_sf(expand = FALSE)+
  labs(subtitle="Deprivation", size=8, fill = "Vulnerability") +
  theme_map()+
  theme(legend.position = "none")->dim6.graf
dim6.graf

Dimesnion 7 # Labour inequality

mun_scores_sub %>%
  mutate(X7= scale(X7*-1)) %>% # Cardinality negative, so very small negative values denotes high vulnerability
  select(X7, geom) %>% 
  as.data.frame() %>% 
  mutate(category=cut(X7, breaks=c(-Inf, -1.5, -0.5, 0.5, 1.5, Inf), 
                      labels=c("very low","low","middle","high", "very high")))->dim7

ggplot()+  geom_sf(data=dim7$geom, aes(fill = fct_rev(dim7$category)), lwd = 0)+
  scale_fill_brewer(palette="RdYlBu")+
  geom_sf(data=uf_caat, fill="transparent")+
  coord_sf(expand = FALSE)+
  labs(subtitle="Labour inequity", size=8, fill = "Vulnerability") +
  theme_map()+
  theme(legend.position = "none")->dim7.graf
dim7.graf

Dimesnion 8 (Water Use - domestic and industrial) #high values cause vulenrability

mun_scores_sub %>%
  mutate(X8= scale(X8*-1)) %>% # Cardinality negetive
  select(X8, geom) %>% 
  as.data.frame() %>% 
  mutate(category=cut(X8, breaks=c(-Inf, -1.5, -0.5, 0.5, 1.5, Inf), 
                      labels=c("very low","low","middle","high", "very high")))->dim8

ggplot()+  geom_sf(data=dim8$geom, aes(fill = fct_rev(dim8$category)), lwd = 0)+
  scale_fill_brewer(palette="RdYlBu")+
  geom_sf(data=uf_caat, fill="transparent")+
  coord_sf(expand = FALSE)+
  labs(subtitle="Domestic and industrial water-use", size=8, fill = "Vulnerability") +
  theme_map()+
  theme(legend.position = "none")->dim8.graf
dim8.graf

Dimesnion 9 # Agricultural water-use

mun_scores_sub %>%
  mutate(X9= scale(X9)) %>% # Cardinality is positive, so, greater numbers more abstraction and high vunlerability???
  select(X9, geom) %>% 
  as.data.frame() %>% 
  mutate(category=cut(X9, breaks=c(-Inf, -1.5, -0.5, 0.5, 1.5, Inf), 
                      labels=c("very low","low","middle","high", "very high")))->dim9

ggplot()+  geom_sf(data=dim9$geom, aes(fill = fct_rev(dim9$category)), lwd = 0)+
  scale_fill_brewer(palette="RdYlBu")+
  geom_sf(data=uf_caat, fill="transparent")+
  coord_sf(expand = FALSE)+
  labs(subtitle="Agricultural water-use", size=8, fill = "Vulnerability") +
  theme_map()+
  theme(legend.position = "none")->dim9.graf
dim9.graf

Combined WIVI

data.frame(dim1$X1,dim2$X2,dim3$X3,dim4$X4,dim5$X5,dim6$X6,dim7$X7,dim8$X8,dim9$X9) -> wivi_score
names(wivi_score)<-c("PC1","PC2","PC3","PC4","PC5","PC6","PC7","PC8","PC9")
wivi_score$wivi_score<-scale(rowSums(wivi_score))
summary(wivi_score) # All dimensions and final score are scaled to mean = 0 and sd =1 as "z scores"
##       PC1                PC2                PC3                PC4          
##  Min.   :-3.07390   Min.   :-2.79065   Min.   :-3.39416   Min.   :-2.56057  
##  1st Qu.:-0.71193   1st Qu.:-0.66157   1st Qu.:-0.59772   1st Qu.:-0.72556  
##  Median :-0.01468   Median : 0.05567   Median : 0.08418   Median :-0.05734  
##  Mean   : 0.00000   Mean   : 0.00000   Mean   : 0.00000   Mean   : 0.00000  
##  3rd Qu.: 0.69500   3rd Qu.: 0.75733   3rd Qu.: 0.67517   3rd Qu.: 0.69117  
##  Max.   : 3.80400   Max.   : 2.43197   Max.   : 2.87160   Max.   : 3.80393  
##       PC5               PC6                PC7                 PC8          
##  Min.   :-2.4645   Min.   :-2.61033   Min.   :-2.968531   Min.   :-1.56770  
##  1st Qu.:-0.7155   1st Qu.:-0.69993   1st Qu.:-0.656200   1st Qu.:-0.41286  
##  Median :-0.1142   Median :-0.01592   Median :-0.002864   Median :-0.07122  
##  Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.000000   Mean   : 0.00000  
##  3rd Qu.: 0.6231   3rd Qu.: 0.70802   3rd Qu.: 0.636380   3rd Qu.: 0.23857  
##  Max.   : 4.5709   Max.   : 2.94660   Max.   : 3.185575   Max.   :17.80678  
##       PC9             wivi_score.V1   
##  Min.   :-1.6643   Min.   :-2.934013  
##  1st Qu.:-0.5135   1st Qu.:-0.689215  
##  Median :-0.1636   Median :-0.019484  
##  Mean   : 0.0000   Mean   : 0.000000  
##  3rd Qu.: 0.2400   3rd Qu.: 0.631153  
##  Max.   :12.6792   Max.   : 6.291282

Create category of vulnerability

wivi_score %>% 
  mutate(category=cut(wivi_score, breaks=c(-Inf, -1.5, -0.5, 0.5, 1.5, Inf), 
                      labels=c("very low","low","medium","high", "very high")))->wivi_score_cat # create categories of vunlerability as Hummell's work

Map of combined WIVI score

ggplot()+  geom_sf(data=mun_scores_sub, aes(fill = fct_rev(wivi_score_cat$category)),lwd=0)+
  scale_fill_brewer(palette="RdYlBu")+
  geom_sf(data=uf_caat, fill="transparent")+
  labs(subtitle="Water Vulnerability Index", size=8, fill = "Vulnerability") +
  theme_minimal()->dimall.graf
dimall.graf

Unite all graphs

library(cowplot)
## 
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
##   default ggplot2 theme anymore. To recover the previous
##   behavior, execute:
##   theme_set(theme_cowplot())
## ********************************************************
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggthemes':
## 
##     theme_map
a<-plot_grid(dim1.graf, dim2.graf, dim3.graf, dim4.graf, dim5.graf, dim6.graf, dim7.graf, dim8.graf, dim9.graf, ncol=3, nrow=3)
b<-plot_grid(a,dimall.graf, ncol=2)
b

Descriptive table for “Vulnerable Municpilaities” = High and Very High Risk collapsed

still working on it

mun_score_all %>% 
  mutate(category2=cut(wivi_score, breaks=c(-Inf, -1.5, -0.5, 0.5, Inf), 
                      labels=c("very low","low","meidum","vulnerable"))) %>% 
  group_by(UF, category2) %>% 
  summarise(population=sum(pop), n_mun=n(), perc_mun_hh=n()/298*100, perc_pop_hh=sum(pop)%in%(category2=="vulnerable")/sum(pop)*100) %>% 
  filter(category2=="vulnerable") -> table_pop
kable(table_pop)
UF category2 population n_mun perc_mun_hh perc_pop_hh
AL vulnerable 536499 29 9.731544 0
BA vulnerable 1512236 68 22.818792 0
CE vulnerable 817328 25 8.389262 0
MG vulnerable 102993 5 1.677852 0
PB vulnerable 876760 64 21.476510 0
PE vulnerable 1841477 46 15.436242 0
PI vulnerable 1031231 30 10.067114 0
RN vulnerable 887557 54 18.120805 0
SE vulnerable 158659 6 2.013423 0

Create tables for descriptive grpahs per UF

Figure all municipalities

df_graf.bar%>% 
  mutate(pol=if_else(mean_wivi<0, "low vulnerability", "high vulnerability")) %>% 
  ggplot(aes(dim, mean_wivi, color= pol))+
  #scale_fill_brewer(palette = "RdYlBu")+
  geom_point(size=3)+geom_hline(aes(yintercept = 0),  linetype="dashed")+ coord_flip()+
  geom_errorbar(aes(ymin=mean_wivi-se_wivi, ymax=mean_wivi+se_wivi), size=0.3) +
  labs(x="Dimension", y="Vulnerability (mean error)", color= NULL)+
  theme_bw ()+
  theme(legend.position = "top")+
  facet_wrap(.~ UF)->per_uf_wivi
per_uf_wivi