Install packages and import data

Packages

Loading shapefiles

municipalities.poly <- read_sf(dsn = "d:/Users/thomas.daher/Desktop/PISF/Municipios_beneficiados", layer = "Atlas_%C3%81gua_2021_-_PISF_-_Munic%C3%ADpios_Beneficiados")

municipalities.poly$ben_pisf <- gsub("Municípios não Beneficiados pelo PISF", "Municípios não beneficiados pelo PISF", municipalities.poly$ben_pisf)

channel.poly <- read_sf(dsn = "d:/Users/thomas.daher/Desktop/PISF/Canais_PISF", layer = "Atlas_%C3%81gua_2021_-_PISF_-_Canais")

adutoras.poly <- read_sf(dsn = "d:/Users/thomas.daher/Desktop/PISF/Adutoras_PISF", layer = "Atlas_%C3%81gua_2021_-_PISF_-_Adutoras")

bacia.poly <-  read_sf(dsn = "d:/Users/thomas.daher/Desktop/PISF/Bacias_semi arido_nordeste1", layer = "GEOFT_BHO_ANO_TDR")

ISH.poly <-  read_sf(dsn = "d:/Users/thomas.daher/Desktop/PISF/ISH/ISH_2017", layer = "ISH_2017")

Municipalities and channel with ANA definition

# Change specific variable names in the 'your_column_name' column
municipalities.poly <- municipalities.poly %>%
  mutate(ben_pisf = recode(ben_pisf, 
                                   "Municípios não beneficiados pelo PISF" = "Não beneficiados",
                                   "Municípios Beneficiados em 2035" = "Beneficiados em 2035",
                                   "Municípios Beneficiados em 2025" = "Beneficiados em 2025",
                                   "Municípios Beneficiados em 2020" = "Beneficiados em 2020"))

### UF sem adutoras

ggplot() + 
  geom_sf(data = municipalities.poly, aes(fill = mun_ufe_cd), show.legend = TRUE, size = 0.1)  +
  geom_sf(data = channel.poly, colour = "blue", size = 2) +
  labs(title = "Região do PISF por UF com canais", fill = "UF")

### sem audtoras

ggplot() + 
  geom_sf(data = municipalities.poly, aes(fill = ben_pisf), show.legend = TRUE, size = 0.1)  +
  geom_sf(data = channel.poly, colour = "blue", size = 2) +
  labs(title = "Municípios beneficiados, definição ANA com canais", fill = "Grupos")

### com adutoras
ggplot() +
  geom_sf(data = municipalities.poly, aes(fill = ben_pisf), show.legend = TRUE, size = 0.1) +
  geom_sf(data = channel.poly, colour = "blue", size = 2) +
  geom_sf(data = adutoras.poly, colour = "green", size = 0.5) +
  labs(title = "Municípios beneficiados, definição ANA com adutoras e canais", fill = "Grupos")

Creating the unified table

Intersect channel and main pipelines

### grupo definição ANA

grupo <- fread("d:/Users/thomas.daher/Desktop/PISF/Grupos - Grupos2.csv")
grupo$cod_ibge <- as.character(grupo$cod_ibge)

### Data cleaning

municipalities.poly$cod_ibge <- as.character(municipalities.poly$cod_ibge)
grupo_geom <- merge(grupo, municipalities.poly, by.x = "cod_ibge", by.y = "cod_ibge",
              all.x = TRUE)

### intersect mun and channel
sf_use_s2(FALSE)
## Spherical geometry (s2) switched off
intersect_mun_channel <- st_intersection(municipalities.poly, channel.poly)
## although coordinates are longitude/latitude, st_intersection assumes that they
## are planar
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
intersect_mun_channel <- as.data.frame(unique(intersect_mun_channel$cod_ibge))
colnames(intersect_mun_channel) <- "cod_ibge"

intersect_mun_channel <- merge(intersect_mun_channel, municipalities.poly, by = "cod_ibge", all.x = TRUE)

### Channel

ggplot() + 
  geom_sf(data = intersect_mun_channel, aes(fill = mun_ufe_cd, geometry = geometry), show.legend = TRUE, size = 0.1)  +
  geom_sf(data = channel.poly, colour = "blue", size = 2) +
  labs(title = "Municipios onde há canais", fill = "UF")

### watermains
intersect_mun_watermains <- st_intersection(municipalities.poly, adutoras.poly)
## although coordinates are longitude/latitude, st_intersection assumes that they
## are planar
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
intersect_mun_watermains <- as.data.frame(unique(intersect_mun_watermains$cod_ibge))
colnames(intersect_mun_watermains) <- "cod_ibge"
intersect_mun_watermains <- merge(intersect_mun_watermains, municipalities.poly, by = "cod_ibge", all.x = TRUE)

### watermains_plot
ggplot() + 
  geom_sf(data = intersect_mun_watermains, aes(fill = mun_ufe_cd, geometry = geometry), show.legend = TRUE, size = 0.1)  +
  geom_sf(data = adutoras.poly, colour = "green", size = 0.5) +
  labs(title = "Municipios onde há adutoras", fill = "UF")

### Channel construction group

### Creating municipalities with construction group (obras_group)


obras_goup <- inner_join(grupo %>% filter(grupo == "tratamento"), intersect_mun_channel, by = "cod_ibge")

### Plot
ggplot() + 
  geom_sf(data = obras_goup, aes(fill = eixo, geometry = geometry), show.legend = TRUE, size = 0.1)  +
  labs(title = "municipios onde houveram obras dos canais", fill = "Obra")

### condicional se houve obras
obras_condicional <- data.frame(cod_ibge = municipalities.poly$cod_ibge, obras =    
                                  ifelse(municipalities.poly$cod_ibge %in% obras_goup$cod_ibge, 1, 0))

Main pipelines group

adutoras_condicional <- data.frame(cod_ibge = municipalities.poly$cod_ibge, adutora =    
                                  ifelse(municipalities.poly$cod_ibge %in% intersect_mun_watermains$cod_ibge, 1, 0))

ggplot() + 
  geom_sf(data = intersect_mun_watermains, aes(fill = ben_pisf, geometry = geometry), show.legend = TRUE, size = 0.1)  +
  labs(title = "Municipios com adutoras e definição ANA", fill = "UF") +
geom_sf(data = adutoras.poly, colour = "green", size = 0.5)

Index

Ecossistemica

Econômica

Humana

Resiliência

Unified data

unified_data <- merge(obras_condicional, adutoras_condicional, by = "cod_ibge", all.x = TRUE)
unified_data <- merge(municipalities.poly,unified_data , by = "cod_ibge", all.x = TRUE) %>% select(-FID)

### ISH join_mun

ISH_mun <- merge(merge(merge(merge(ISH_brasil, ISH_economic, by = "cod_ibge", all = TRUE), ISH_human, by = "cod_ibge", all = TRUE), ISH_resilience, by = "cod_ibge", all = TRUE), ISH_ecosistem, by = "cod_ibge", all = TRUE)

rm(ISH_brasil,ISH_economic,ISH_human,ISH_resilience,ISH_ecosistem)
gc()
##             used   (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells   7767151  414.9   15176197  810.5  15176197  810.5
## Vcells 161434384 1231.7  386241407 2946.8 367089189 2800.7
unified_data <- merge(unified_data, ISH_mun, by = "cod_ibge", all.x = TRUE)

Plots by ottobacias

### Plot geral
ggplot() +
  geom_sf(data = overlap, aes(fill = brasil), size = 0.1, color = NA) +
  scale_fill_manual(values = c("Alto" = "red", "Médio" = "yellow", "Baixo" = "green", "Mínimo" = "blue", "NA" = "gray"), 
                    guide = guide_legend(title = "Risco", 
                                         override.aes = list(size = 5))) +
  labs(title = "Risco hídrico ottobacias geral")

### Plot humano
ggplot() +
  geom_sf(data = overlap, aes(fill = humana), size = 0.1, color = NA) +
  scale_fill_manual(values = c("Alto" = "red", "Médio" = "yellow", "Baixo" = "green", "Mínimo" = "blue", "NA" = "gray"), 
                    guide = guide_legend(title = "Risco", 
                                         override.aes = list(size = 5))) +
  labs(title = "Risco hídrico ottobacias humano")

### Plot economico
ggplot() +
  geom_sf(data = overlap, aes(fill = economica), size = 0.1, color = NA) +
  scale_fill_manual(values = c("Alto" = "red", "Médio" = "yellow", "Baixo" = "green", "Mínimo" = "blue", "NA" = "gray"), 
                    guide = guide_legend(title = "Risco", 
                                         override.aes = list(size = 5))) +
  labs(title = "Risco hídrico ottobacias economica")

### Plot ecossistemica
ggplot() +
  geom_sf(data = overlap, aes(fill = ecossistem), size = 0.1, color = NA) +
  scale_fill_manual(values = c("Alto" = "red", "Médio" = "yellow", "Baixo" = "green", "Mínimo" = "blue", "NA" = "gray"), 
                    guide = guide_legend(title = "Risco", 
                                         override.aes = list(size = 5))) +
  labs(title = "Risco hídrico ottobacias ecossistemica")

### Plot resiliencia
ggplot() +
  geom_sf(data = overlap, aes(fill = resilienc), size = 0.1, color = NA) +
  scale_fill_manual(values = c("Alto" = "red", "Médio" = "yellow", "Baixo" = "green", "Mínimo" = "blue", "NA" = "gray"), 
                    guide = guide_legend(title = "Risco", 
                                         override.aes = list(size = 5))) +
  labs(title = "Risco hídrico ottobacias resiliencia")

Plot Brasil (general index)

ISH.poly_mun <- merge(municipalities.poly, ISH_mun, by = "cod_ibge", all.x = TRUE)
ggplot() +
  geom_sf(data = ISH.poly_mun, aes(fill = factor(brasil, levels = c("Alto","Médio","Baixo","Mínimo","NA"))), size = 0.1) +
  scale_fill_manual(values = c("Alto" = "red", "Médio" = "yellow", "Baixo" = "green", "Mínimo" = "blue", "NA" = "gray"), 
                    guide = guide_legend(title = "Risco", 
                                         override.aes = list(size = 5))) +
  labs(title = "Risco hídrico municipal geral") + 
  geom_sf(data = channel.poly, colour = "black", size = 2)

Plot ecossistem

ggplot() +
  geom_sf(data = ISH.poly_mun, aes(fill = factor(ecossistem, levels = c("Alto","Médio","Baixo","Mínimo","NA"))), size = 0.1) +
  scale_fill_manual(values = c("Alto" = "red", "Médio" = "yellow", "Baixo" = "green", "Mínimo" = "blue", "NA" = "gray"), 
                    guide = guide_legend(title = "Risco", 
                                         override.aes = list(size = 5))) +
  labs(title = "Risco hídrico municipal ecossitemico")+ 
  geom_sf(data = channel.poly, colour = "black", size = 2)

Plot Human

ggplot() +
  geom_sf(data = ISH.poly_mun, aes(fill = factor(humana, levels = c("Alto","Médio","Baixo","Mínimo","NA"))), size = 0.1) +
  scale_fill_manual(values = c("Alto" = "red", "Médio" = "yellow", "Baixo" = "green", "Mínimo" = "blue", "NA" = "gray"), 
                    guide = guide_legend(title = "Risco", 
                                         override.aes = list(size = 5))) +
  labs(title = "Risco hídrico municipal humano")+ 
  geom_sf(data = channel.poly, colour = "black", size = 2)

Plot economic

ggplot() +
  geom_sf(data = ISH.poly_mun, aes(fill = factor(economica, levels = c("Alto","Médio","Baixo","Mínimo","NA"))), size = 0.1) +
  scale_fill_manual(values = c("Alto" = "red", "Médio" = "yellow", "Baixo" = "green", "Mínimo" = "blue", "NA" = "gray"), 
                    guide = guide_legend(title = "Risco", 
                                         override.aes = list(size = 5))) +
  labs(title = "Risco hídrico municipal econômico")+ 
  geom_sf(data = channel.poly, colour = "black", size = 2)

Plot Resilience

ggplot() +
  geom_sf(data = ISH.poly_mun, aes(fill = factor(resilienc, levels = c("Alto","Médio","Baixo","Mínimo","NA"))), size = 0.1) +
  scale_fill_manual(values = c("Alto" = "red", "Médio" = "yellow", "Baixo" = "green", "Mínimo" = "blue", "NA" = "gray"), 
                    guide = guide_legend(title = "Risco", 
                                         override.aes = list(size = 5))) +
  labs(title = "Risco hídrico municipal resiliência") + 
  geom_sf(data = channel.poly, colour = "black", size = 2)

Construction group analysis

Loading data

### pib_municipal

pib <- fread("d:/Users/thomas.daher/Desktop/PISF/pesquisas_anuais/pib_municipal_serie_historica.csv")
colnames(pib)[4]  <- "pib_municipal"
colnames(pib)[1]  <- "cod_ibge"
pib <- na.omit(pib)

### merging pib_municipal

pib_grupo <- merge(pib, grupo, by.x = "cod_ibge",  by.y = 'cod_ibge',
              all.x = TRUE)
pib_grupo <- pib_grupo %>% filter(!is.na(grupo)) %>% select(-Município)

### PAM

PAM <- fread("d:/Users/thomas.daher/Desktop/PISF/pesquisas_anuais/PAM com culturas.csv")
colnames(PAM) <- c("cod_ibge", "Ano", "cultura","area_plantada")
PAM <- PAM %>% filter(cod_ibge != 1)
PAM <- PAM %>% filter(cultura != "Total")
PAM$area_plantada <- as.numeric(PAM$area_plantada)
## Warning: NAs introduzidos por coerção
PAM$area_plantada[is.na(PAM$area_plantada)] <- 0

### Merging PAM

PAM$cod_ibge <- as.character(PAM$cod_ibge)
PAM_grupo <- PAM %>%
  left_join(grupo_geom, by = "cod_ibge")
## Warning in left_join(., grupo_geom, by = "cod_ibge"): Detected an unexpected many-to-many relationship between `x` and `y`.
## i Row 484501 of `x` matches multiple rows in `y`.
## i Row 1 of `y` matches multiple rows in `x`.
## i If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
PAM_grupo <- PAM_grupo %>% filter(!is.na(nm_municipio))
gc()
##             used   (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells   8808443  470.5   20756531 1108.6  20756531 1108.6
## Vcells 184611654 1408.5  534173080 4075.5 576529176 4398.6

Plot municipalities and channel with research definition

ggplot() + 
  geom_sf(data = grupo_geom, aes(fill = grupo, geometry = geometry), show.legend = TRUE, size = 0.1) + 
  geom_sf(data = channel.poly, colour = "blue", size = 2)

Descriptive statistics

Municipality GDP

pib_grupo$pib_municipal <- as.numeric(pib_grupo$pib_municipal)
mean_pib <- pib_grupo %>% group_by(grupo, Ano) %>% summarise(pib_mean = mean(pib_municipal))
## `summarise()` has grouped output by 'grupo'. You can override using the
## `.groups` argument.
ggplot(mean_pib, mapping = aes(x = Ano, y = pib_mean, group = grupo)) + geom_line(aes(colour = grupo)) + ggtitle("média do pib municipal") + geom_vline(xintercept = 2008, linetype="dotted", 
                color = "blue", size=1) +
  geom_vline(xintercept = 2012, linetype="dotted", 
                color = "red", size=1) +
  geom_vline(xintercept = 2014, linetype="dotted", 
                color = "blue", size=1)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## i Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

### scatter plot of cities mean

mean_pib <- pib_grupo %>% group_by(grupo, nm_municipio) %>% summarise(pib_mean = mean(pib_municipal))
## `summarise()` has grouped output by 'grupo'. You can override using the
## `.groups` argument.
ggplot(mean_pib, mapping = aes(x = grupo, y = pib_mean, group = grupo)) + geom_point(aes(colour = grupo)) + ggtitle("média do pib municipal, ponto = município")

### pib sum by group

pib_grupo$pib_municipal <- as.numeric(pib_grupo$pib_municipal)
sum_pib <- pib_grupo %>% group_by(grupo, Ano) %>% summarise(pib_sum = log(sum(pib_municipal)))
## `summarise()` has grouped output by 'grupo'. You can override using the
## `.groups` argument.
ggplot(sum_pib, mapping = aes(x = Ano, y = pib_sum, gorup = grupo)) + geom_line(aes(colour = grupo)) + ggtitle("log da soma do pib municipal") + geom_vline(xintercept = 2008, linetype="dotted", 
                color = "blue", size=1) +
  geom_vline(xintercept = 2012, linetype="dotted", 
                color = "red", size=1) +
  geom_vline(xintercept = 2014, linetype="dotted", 
                color = "blue", size=1)

PAM

### mean per group

mean_area <- PAM_grupo %>% group_by(grupo, Ano) %>% summarise(pam_mean = mean(area_plantada))
## `summarise()` has grouped output by 'grupo'. You can override using the
## `.groups` argument.
ggplot(mean_area, mapping = aes(x = Ano, y = pam_mean, gorup = grupo)) + geom_line(aes(colour = grupo)) + ggtitle("média da área plantada") + geom_vline(xintercept = 2008, linetype="dotted", 
                color = "blue", size=1) +
  geom_vline(xintercept = 2012, linetype="dotted", 
                color = "red", size=1) +
  geom_vline(xintercept = 2014, linetype="dotted", 
                color = "blue", size=1)

### scatter plot of cities mean

mean_area <- PAM_grupo %>% group_by(grupo, nm_municipio) %>% summarise(pam_mean = mean(area_plantada))
## `summarise()` has grouped output by 'grupo'. You can override using the
## `.groups` argument.
ggplot(mean_area, mapping = aes(x = grupo, y = pam_mean, group = grupo)) + geom_point(aes(colour = grupo)) + ggtitle("média da área plantada")

### PAM log sum by group

sum_area <- PAM_grupo %>% group_by(grupo, Ano) %>% summarise(pam_sum = log(sum(area_plantada)))
## `summarise()` has grouped output by 'grupo'. You can override using the
## `.groups` argument.
ggplot(sum_area, mapping = aes(x = Ano, y = pam_sum)) + geom_line(aes(colour = grupo)) + ggtitle("log da soma da área plantada") + geom_vline(xintercept = 2008, linetype="dotted", 
                color = "blue", size=1) +
  geom_vline(xintercept = 2012, linetype="dotted", 
                color = "red", size=1) +
  geom_vline(xintercept = 2014, linetype="dotted", 
                color = "blue", size=1)

### production by crops

sum_area <- PAM_grupo %>% 
  group_by(cultura, Ano) %>% 
  summarise(pam_sum = sum(area_plantada, na.rm = TRUE))
## `summarise()` has grouped output by 'cultura'. You can override using the
## `.groups` argument.
ggplot(sum_area, mapping = aes(x = Ano, y = pam_sum)) + geom_line(aes(colour = cultura)) + ggtitle("produção total por tipo de cultura") + geom_vline(xintercept = 2008, linetype="dotted", 
                color = "blue", size=1) +
  geom_vline(xintercept = 2012, linetype="dotted", 
                color = "red", size=1) +
  geom_vline(xintercept = 2014, linetype="dotted", 
                color = "blue", size=1)

### Major crops

sum_area <- PAM_grupo %>% 
  group_by(cultura, Ano) %>% 
  summarise(pam_sum = sum(area_plantada, na.rm = TRUE)) %>% 
  filter((cultura %in% c('Milho (em grão)', 'Feijão (em grão)','Cana-de-açúcar', 'Mandioca')))
## `summarise()` has grouped output by 'cultura'. You can override using the
## `.groups` argument.
ggplot(sum_area, mapping = aes(x = Ano, y = pam_sum)) + geom_line(aes(colour = cultura)) + ggtitle("Produção das principais culturas") + geom_vline(xintercept = 2008, linetype="dotted", 
                color = "blue", size=1) +
  geom_vline(xintercept = 2012, linetype="dotted", 
                color = "red", size=1) +
  geom_vline(xintercept = 2014, linetype="dotted", 
                color = "blue", size=1)

PPM

PPM <- fread("d:/Users/thomas.daher/Desktop/PISF/pesquisas_anuais/PPM.csv")

colnames(PPM)[1:5] <- c('cod_ibge', 'mun_nm','ano','tipo', 'numero_rebanho')
PPM$numero_rebanho <- as.numeric(PPM$numero_rebanho)
## Warning: NAs introduzidos por coerção
### Merging PPM

PPM$cod_ibge <- as.character(PPM$cod_ibge)
PPM_grupo <- PPM %>%
  left_join(grupo_geom, by = "cod_ibge")
## Warning in left_join(., grupo_geom, by = "cod_ibge"): Detected an unexpected many-to-many relationship between `x` and `y`.
## i Row 228201 of `x` matches multiple rows in `y`.
## i Row 1 of `y` matches multiple rows in `x`.
## i If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
PPM_grupo <- PPM_grupo %>% filter(!is.na(nm_municipio))
gc()
##             used   (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells   8834555  471.9   20756531 1108.6  20756531 1108.6
## Vcells 191533363 1461.3  534173080 4075.5 576529176 4398.6
### Media rebanho por grupo

a <- PPM_grupo %>% 
  group_by(grupo, ano) %>% 
  summarise(rebanho = sum(numero_rebanho, na.rm = TRUE))
## `summarise()` has grouped output by 'grupo'. You can override using the
## `.groups` argument.
ggplot(a, aes(x = ano, y = rebanho, color = grupo)) + 
  geom_line() + 
  ggtitle("quantidade de rebanho por grupo") + 
  geom_vline(xintercept = 2008, linetype = "dotted", color = "blue", size = 1) +
  geom_vline(xintercept = 2012, linetype = "dotted", color = "red", size = 1) +
  geom_vline(xintercept = 2014, linetype = "dotted", color = "blue", size = 1)

### Rebanho por tipo

a <- PPM_grupo %>% 
  group_by(tipo, ano) %>% 
  summarise(rebanho = sum(numero_rebanho, na.rm = TRUE))
## `summarise()` has grouped output by 'tipo'. You can override using the
## `.groups` argument.
ggplot(a, aes(x = ano, y = rebanho, color = tipo)) + 
  geom_line() + 
  ggtitle("quantidade de rebanho por tipo") + 
  geom_vline(xintercept = 2008, linetype = "dotted", color = "blue", size = 1) +
  geom_vline(xintercept = 2012, linetype = "dotted", color = "red", size = 1) +
  geom_vline(xintercept = 2014, linetype = "dotted", color = "blue", size = 1)

### Rebanho por tipo sem galinácios

a <- PPM_grupo %>% 
  group_by(tipo, ano) %>% 
  summarise(rebanho = sum(numero_rebanho, na.rm = TRUE)) %>%
  filter(!(tipo %in% c('Galináceos - galinhas', 'Galináceos - total')))
## `summarise()` has grouped output by 'tipo'. You can override using the
## `.groups` argument.
ggplot(a, aes(x = ano, y = rebanho, color = tipo)) + 
  geom_line() + 
  ggtitle("média de rebanho por grupo") + 
  geom_vline(xintercept = 2008, linetype = "dotted", color = "blue", size = 1) +
  geom_vline(xintercept = 2012, linetype = "dotted", color = "red", size = 1) +
  geom_vline(xintercept = 2014, linetype = "dotted", color = "blue", size = 1)

Datasus data

devtools::install_github("rfsaldanha/microdatasus")
## Skipping install of 'microdatasus' from a github remote, the SHA1 (09676794) has not changed since last install.
##   Use `force = TRUE` to force installation
library(microdatasus)

dados <- fetch_datasus(year_start = 2013, year_end = 2013, uf = "CE", information_system = "SIM-DO")
## Your local Internet connection seems to be ok.
## DataSUS FTP server seems to be up. Starting download...
dados <- process_sim(dados)