Install packages

Loading shapefiles

municipalities.poly <- read_sf(dsn = "d:/Users/thomas.daher/Desktop/PISF/Municipios_beneficiados", layer = "Atlas_%C3%81gua_2021_-_PISF_-_Munic%C3%ADpios_Beneficiados")

municipalities.poly$ben_pisf <- gsub("MunicĂ­pios nĂ£o Beneficiados pelo PISF", "MunicĂ­pios nĂ£o beneficiados pelo PISF", municipalities.poly$ben_pisf)

channel.poly <- read_sf(dsn = "d:/Users/thomas.daher/Desktop/PISF/Canais_PISF", layer = "Atlas_%C3%81gua_2021_-_PISF_-_Canais")

adutoras.poly <- read_sf(dsn = "d:/Users/thomas.daher/Desktop/PISF/Adutoras_PISF", layer = "Atlas_%C3%81gua_2021_-_PISF_-_Adutoras")

bacia.poly <-  read_sf(dsn = "d:/Users/thomas.daher/Desktop/PISF/Bacias_semi arido_nordeste1", layer = "GEOFT_BHO_ANO_TDR")

ISH.poly <-  read_sf(dsn = "d:/Users/thomas.daher/Desktop/PISF/ISH/ISH_2017", layer = "ISH_2017")

extract data

### write.csv(municipalities.df, file = "d:/Users/thomas.daher/Desktop/PISF/Municipios_beneficiados/municipios_df.csv")

Plot municipalities and channel with ANA definition

### sem audtoras

ggplot() + 
  geom_sf(data = municipalities.poly, aes(fill = mun_ufe_cd), show.legend = TRUE, size = 0.1)  +
  geom_sf(data = channel.poly, colour = "blue", size = 2)

### com adutoras
ggplot() + 
  geom_sf(data = municipalities.poly, aes(fill = ben_pisf), show.legend = TRUE, size = 0.1)  +
  geom_sf(data = channel.poly, colour = "blue", size = 2) + 
  geom_sf(data = adutoras.poly, colour = "green", size = 0.5)

## Grupos de controle e tratamento

Queen weight matriz

list.queen<-poly2nb(municipalities.poly, queen=TRUE)

W<-nb2listw(list.queen, style="W", zero.policy=TRUE)

Loading data

### grupo

grupo <- fread("d:/Users/thomas.daher/Desktop/PISF/Grupos - Grupos2.csv")
grupo$cod_ibge <- as.character(grupo$cod_ibge)


### Data cleaning


municipalities.poly$cod_ibge <- as.character(municipalities.poly$cod_ibge)
grupo_geom <- merge(grupo, municipalities.poly, by.x = "cod_ibge", by.y = "cod_ibge",
              all.x = TRUE)


### pib_municipal

pib <- fread("d:/Users/thomas.daher/Desktop/PISF/pesquisas_anuais/pib_municipal_serie_historica.csv")
colnames(pib)[4]  <- "pib_municipal"
colnames(pib)[1]  <- "cod_ibge"
pib <- na.omit(pib)

### merging pib_municipal

pib_grupo <- merge(pib, grupo, by.x = "cod_ibge",  by.y = 'cod_ibge',
              all.x = TRUE)
pib_grupo <- pib_grupo %>% filter(!is.na(grupo)) %>% select(-MunicĂ­pio)

### PAM

PAM <- fread("d:/Users/thomas.daher/Desktop/PISF/pesquisas_anuais/PAM com culturas.csv")
colnames(PAM) <- c("cod_ibge", "Ano", "cultura","area_plantada")
PAM <- PAM %>% filter(cod_ibge != 1)
PAM <- PAM %>% filter(cultura != "Total")
PAM$area_plantada <- as.numeric(PAM$area_plantada)
## Warning: NAs introduzidos por coerĂ§Ă£o
PAM$area_plantada[is.na(PAM$area_plantada)] <- 0

### Merging PAM

PAM$cod_ibge <- as.character(PAM$cod_ibge)
PAM_grupo <- PAM %>%
  left_join(grupo_geom, by = "cod_ibge")
## Warning in left_join(., grupo_geom, by = "cod_ibge"): Each row in `x` is expected to match at most 1 row in `y`.
## i Row 484501 of `x` matches multiple rows.
## i If multiple matches are expected, set `multiple = "all"` to silence this
##   warning.
PAM_grupo <- PAM_grupo %>% filter(!is.na(nm_municipio))
gc()
##             used   (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells   7711638  411.9   13154401  702.6  13154401  702.6
## Vcells 165951765 1266.2  300288099 2291.1 300251920 2290.8

Plot municipalities and channel with research definition

ggplot() + 
  geom_sf(data = grupo_geom, aes(fill = grupo, geometry = geometry), show.legend = TRUE, size = 0.1) + 
  geom_sf(data = channel.poly, colour = "blue", size = 2)

Descriptive statistics

Municipality GDP

pib_grupo$pib_municipal <- as.numeric(pib_grupo$pib_municipal)
mean_pib <- pib_grupo %>% group_by(grupo, Ano) %>% summarise(pib_mean = mean(pib_municipal))
## `summarise()` has grouped output by 'grupo'. You can override using the
## `.groups` argument.
ggplot(mean_pib, mapping = aes(x = Ano, y = pib_mean, group = grupo)) + geom_line(aes(colour = grupo)) + ggtitle("média do pib municipal") + geom_vline(xintercept = 2008, linetype="dotted", 
                color = "blue", size=1) +
  geom_vline(xintercept = 2012, linetype="dotted", 
                color = "red", size=1) +
  geom_vline(xintercept = 2014, linetype="dotted", 
                color = "blue", size=1)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## i Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

### scatter plot of cities mean

mean_pib <- pib_grupo %>% group_by(grupo, nm_municipio) %>% summarise(pib_mean = mean(pib_municipal))
## `summarise()` has grouped output by 'grupo'. You can override using the
## `.groups` argument.
ggplot(mean_pib, mapping = aes(x = grupo, y = pib_mean, group = grupo)) + geom_point(aes(colour = grupo)) + ggtitle("média do pib municipal, ponto = município")

### pib sum by group

pib_grupo$pib_municipal <- as.numeric(pib_grupo$pib_municipal)
sum_pib <- pib_grupo %>% group_by(grupo, Ano) %>% summarise(pib_sum = log(sum(pib_municipal)))
## `summarise()` has grouped output by 'grupo'. You can override using the
## `.groups` argument.
ggplot(sum_pib, mapping = aes(x = Ano, y = pib_sum, gorup = grupo)) + geom_line(aes(colour = grupo)) + ggtitle("log da soma do pib municipal") + geom_vline(xintercept = 2008, linetype="dotted", 
                color = "blue", size=1) +
  geom_vline(xintercept = 2012, linetype="dotted", 
                color = "red", size=1) +
  geom_vline(xintercept = 2014, linetype="dotted", 
                color = "blue", size=1)

PAM

### mean per group

mean_area <- PAM_grupo %>% group_by(grupo, Ano) %>% summarise(pam_mean = mean(area_plantada))
## `summarise()` has grouped output by 'grupo'. You can override using the
## `.groups` argument.
ggplot(mean_area, mapping = aes(x = Ano, y = pam_mean, gorup = grupo)) + geom_line(aes(colour = grupo)) + ggtitle("mĂ©dia da Ă¡rea plantada") + geom_vline(xintercept = 2008, linetype="dotted", 
                color = "blue", size=1) +
  geom_vline(xintercept = 2012, linetype="dotted", 
                color = "red", size=1) +
  geom_vline(xintercept = 2014, linetype="dotted", 
                color = "blue", size=1)

### scatter plot of cities mean

mean_area <- PAM_grupo %>% group_by(grupo, nm_municipio) %>% summarise(pam_mean = mean(area_plantada))
## `summarise()` has grouped output by 'grupo'. You can override using the
## `.groups` argument.
ggplot(mean_area, mapping = aes(x = grupo, y = pam_mean, group = grupo)) + geom_point(aes(colour = grupo)) + ggtitle("mĂ©dia da Ă¡rea plantada")

### PAM log sum by group

sum_area <- PAM_grupo %>% group_by(grupo, Ano) %>% summarise(pam_sum = log(sum(area_plantada)))
## `summarise()` has grouped output by 'grupo'. You can override using the
## `.groups` argument.
ggplot(sum_area, mapping = aes(x = Ano, y = pam_sum)) + geom_line(aes(colour = grupo)) + ggtitle("log da soma da Ă¡rea plantada") + geom_vline(xintercept = 2008, linetype="dotted", 
                color = "blue", size=1) +
  geom_vline(xintercept = 2012, linetype="dotted", 
                color = "red", size=1) +
  geom_vline(xintercept = 2014, linetype="dotted", 
                color = "blue", size=1)

### production by crops

sum_area <- PAM_grupo %>% 
  group_by(cultura, Ano) %>% 
  summarise(pam_sum = sum(area_plantada, na.rm = TRUE))
## `summarise()` has grouped output by 'cultura'. You can override using the
## `.groups` argument.
ggplot(sum_area, mapping = aes(x = Ano, y = pam_sum)) + geom_line(aes(colour = cultura)) + ggtitle("produĂ§Ă£o total por tipo de cultura") + geom_vline(xintercept = 2008, linetype="dotted", 
                color = "blue", size=1) +
  geom_vline(xintercept = 2012, linetype="dotted", 
                color = "red", size=1) +
  geom_vline(xintercept = 2014, linetype="dotted", 
                color = "blue", size=1)

### Major crops

sum_area <- PAM_grupo %>% 
  group_by(cultura, Ano) %>% 
  summarise(pam_sum = sum(area_plantada, na.rm = TRUE)) %>% 
  filter((cultura %in% c('Milho (em grĂ£o)', 'FeijĂ£o (em grĂ£o)','Cana-de-aĂ§Ăºcar', 'Mandioca')))
## `summarise()` has grouped output by 'cultura'. You can override using the
## `.groups` argument.
ggplot(sum_area, mapping = aes(x = Ano, y = pam_sum)) + geom_line(aes(colour = cultura)) + ggtitle("ProduĂ§Ă£o das principais culturas") + geom_vline(xintercept = 2008, linetype="dotted", 
                color = "blue", size=1) +
  geom_vline(xintercept = 2012, linetype="dotted", 
                color = "red", size=1) +
  geom_vline(xintercept = 2014, linetype="dotted", 
                color = "blue", size=1)

PPM

PPM <- fread("d:/Users/thomas.daher/Desktop/PISF/pesquisas_anuais/PPM.csv")

colnames(PPM)[1:5] <- c('cod_ibge', 'mun_nm','ano','tipo', 'numero_rebanho')
PPM$numero_rebanho <- as.numeric(PPM$numero_rebanho)
## Warning: NAs introduzidos por coerĂ§Ă£o
### Merging PPM

PPM$cod_ibge <- as.character(PPM$cod_ibge)
PPM_grupo <- PPM %>%
  left_join(grupo_geom, by = "cod_ibge")
## Warning in left_join(., grupo_geom, by = "cod_ibge"): Each row in `x` is expected to match at most 1 row in `y`.
## i Row 228201 of `x` matches multiple rows.
## i If multiple matches are expected, set `multiple = "all"` to silence this
##   warning.
PPM_grupo <- PPM_grupo %>% filter(!is.na(nm_municipio))
gc()
##             used   (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells   7748852  413.9   13154401  702.6  13154401  702.6
## Vcells 172893059 1319.1  300288099 2291.1 300251920 2290.8
### Media rebanho por grupo

a <- PPM_grupo %>% 
  group_by(grupo, ano) %>% 
  summarise(rebanho = sum(numero_rebanho, na.rm = TRUE))
## `summarise()` has grouped output by 'grupo'. You can override using the
## `.groups` argument.
ggplot(a, aes(x = ano, y = rebanho, color = grupo)) + 
  geom_line() + 
  ggtitle("quantidade de rebanho por grupo") + 
  geom_vline(xintercept = 2008, linetype = "dotted", color = "blue", size = 1) +
  geom_vline(xintercept = 2012, linetype = "dotted", color = "red", size = 1) +
  geom_vline(xintercept = 2014, linetype = "dotted", color = "blue", size = 1)

### Rebanho por tipo

a <- PPM_grupo %>% 
  group_by(tipo, ano) %>% 
  summarise(rebanho = sum(numero_rebanho, na.rm = TRUE))
## `summarise()` has grouped output by 'tipo'. You can override using the
## `.groups` argument.
ggplot(a, aes(x = ano, y = rebanho, color = tipo)) + 
  geom_line() + 
  ggtitle("quantidade de rebanho por tipo") + 
  geom_vline(xintercept = 2008, linetype = "dotted", color = "blue", size = 1) +
  geom_vline(xintercept = 2012, linetype = "dotted", color = "red", size = 1) +
  geom_vline(xintercept = 2014, linetype = "dotted", color = "blue", size = 1)

### Rebanho por tipo sem galinĂ¡cios

a <- PPM_grupo %>% 
  group_by(tipo, ano) %>% 
  summarise(rebanho = sum(numero_rebanho, na.rm = TRUE)) %>%
  filter(!(tipo %in% c('GalinĂ¡ceos - galinhas', 'GalinĂ¡ceos - total')))
## `summarise()` has grouped output by 'tipo'. You can override using the
## `.groups` argument.
ggplot(a, aes(x = ano, y = rebanho, color = tipo)) + 
  geom_line() + 
  ggtitle("média de rebanho por grupo") + 
  geom_vline(xintercept = 2008, linetype = "dotted", color = "blue", size = 1) +
  geom_vline(xintercept = 2012, linetype = "dotted", color = "red", size = 1) +
  geom_vline(xintercept = 2014, linetype = "dotted", color = "blue", size = 1)

## ISH

# Filter each poly.df

# municipalities

municipalities.poly <- municipalities.poly %>% select(cod_ibge,geometry)

# ISH.poly

ISH.poly <- ISH.poly %>% select(brasil,geometry)

colSums(is.na(ISH.poly))
##   brasil geometry 
##      333        0
sf_use_s2(FALSE)

# Intersect the two shapefiles to get the overlapping area
overlap <- st_intersection(municipalities.poly, ISH.poly)

# Calculate the area of each polygon in the overlapping area
overlap$area <- st_area(overlap)

# Calculate the area of each state polygon
municipalities.poly$area <- st_area(municipalities.poly)

# Filter the county polygons that exceed the area of the corresponding state polygon
exceeds <- overlap[overlap$area > municipalities.poly$area, ]

### filling the municipality
                  
overlap <- overlap %>% group_by(cod_ibge,brasil) %>% summarise(area = sum(area))


largest_polygons <- overlap %>%
  group_by(cod_ibge) %>%
  slice(which.max(area)) %>%
  select(cod_ibge, brasil, geometry) %>% st_drop_geometry()

ISH.poly_mun <- merge(municipalities.poly, largest_polygons, by = "cod_ibge", all.x = TRUE)

ggplot() +
  geom_sf(data = ISH.poly_mun, aes(fill = brasil), size = 0.1) +
  scale_fill_manual(values = c("Alto" = "red", "Médio" = "yellow", "Baixo" = "green", "Mínimo" = "blue", "NA" = "gray"), 
                    guide = guide_legend(title = "Risco", 
                                         override.aes = list(size = 5))) +
  labs(title = "Risco hĂ­drico municipal")