library(haven)
## Warning: package 'haven' was built under R version 4.0.5
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.5     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.0.2     v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.5
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'readr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(sf)
## Warning: package 'sf' was built under R version 4.0.5
## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1
library(tmap)
## Warning: package 'tmap' was built under R version 4.0.5
## Registered S3 methods overwritten by 'stars':
##   method             from
##   st_bbox.SpatRaster sf  
##   st_crs.SpatRaster  sf
## Warning: multiple methods tables found for 'direction'
## Warning: multiple methods tables found for 'gridDistance'
library(raster)
## Warning: package 'raster' was built under R version 4.0.5
## Loading required package: sp
## 
## Attaching package: 'raster'
## The following object is masked from 'package:dplyr':
## 
##     select
setwd("C:/Users/chiga/OneDrive/Documents/UCSD/SPRING2022/Sustainable Development with Spatial Analysis/project")

dhs_2014 <- read_dta("./data/raw_data/cambodia_2014_hh/KHHR73FL.DTA")
dhs_shp_2014 <- st_read("./data/raw_data/cambodia_2014_shp/KHGE71FL.shp")
## Reading layer `KHGE71FL' from data source 
##   `C:\Users\chiga\OneDrive\Documents\UCSD\SPRING2022\Sustainable Development with Spatial Analysis\project\data\raw_data\cambodia_2014_shp\KHGE71FL.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 611 features and 20 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 102.3609 ymin: 10.45548 xmax: 107.4111 ymax: 14.40743
## Geodetic CRS:  WGS 84
cam_adm1_shp <- 
  st_read("C:/Users/chiga/Projects/geo_data_master/gadm/cambodia/gadm40_KHM_1.shp")
## Reading layer `gadm40_KHM_1' from data source 
##   `C:\Users\chiga\Projects\geo_data_master\gadm\cambodia\gadm40_KHM_1.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 25 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 102.3355 ymin: 9.91361 xmax: 107.6303 ymax: 14.68817
## Geodetic CRS:  WGS 84
pov_ras <- raster("./data/geo_data/pov_ras.tif")
## Warning in showSRID(SRS_string, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown_based_on_WGS84_ellipsoid in Proj4
## definition
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(dhs_shp_2014) + 
  tm_dots() + 
  tm_shape(cam_adm1_shp) + 
  tm_borders()
# Time it takes to get to water sources in dry season 
dhs_2014_sh104_mod <- 
  dhs_2014 %>% 
  mutate(sh104_mod = as.numeric(sh104)) %>% 
  mutate(sh104_mod = if_else(sh104_mod == 996, 0, sh104_mod)) %>% 
  filter(sh104_mod != 998) 
  
dhs_2014_sh104_clust <- 
  dhs_2014_sh104_mod %>% 
  group_by(hv001) %>% 
  summarize(sh104_clust_avg = mean(sh104_mod, rm.na=T)) %>% 
  mutate(sh104_clust_avg = as.numeric(sh104_clust_avg))
  
  
hist(dhs_2014_sh104_clust$sh104_clust_avg)

dhs_shp_2014 <- 
  left_join(dhs_shp_2014, dhs_2014_sh104_clust, by = c("DHSCLUST"="hv001"))

tm_shape(dhs_shp_2014) + 
  tm_dots(col = "sh104_clust_avg") + 
  tm_shape(cam_adm1_shp) + 
  tm_borders()
# Time it takes to get to water source during the wet season
dhs_2014_sh104d_mod <- 
  dhs_2014 %>% 
  mutate(sh104d_mod = as.numeric(sh104d)) %>% 
  mutate(sh104d_mod = if_else(sh104d_mod == 996, 0, sh104d_mod)) %>% 
  filter(sh104d_mod != 998) 
  
dhs_2014_sh104d_clust <- 
  dhs_2014_sh104d_mod %>% 
  group_by(hv001) %>% 
  summarize(sh104d_clust_avg = mean(sh104d_mod, rm.na=T)) %>% 
  mutate(sh104d_clust_avg = as.numeric(sh104d_clust_avg))
  
  
hist(dhs_2014_sh104d_clust$sh104d_clust_avg)

dhs_shp_2014 <- 
  left_join(dhs_shp_2014, dhs_2014_sh104d_clust, by = c("DHSCLUST"="hv001"))

tm_shape(dhs_shp_2014) + 
  tm_dots(col = "sh104d_clust_avg") + 
  tm_shape(cam_adm1_shp) + 
  tm_borders()
# time spent during wet season - time spent during wet season
dhs_shp_2014 <- 
  dhs_shp_2014 %>% mutate(wet_dry_diff = sh104d_clust_avg - 
                            sh104_clust_avg)

red means more divergence between wet and dry season. Specifically, it takes longer in dry season than wet season

tm_shape(dhs_shp_2014) + 
  tm_dots(col = "wet_dry_diff") + 
  tm_shape(cam_adm1_shp) + 
  tm_borders() + 
  tm_shape(pov_ras) + 
  tm_raster()  
## Variable(s) "wet_dry_diff" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "NA" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

This is to look at the average time to get to a water source in the dry season

tm_shape(dhs_shp_2014) + 
  tm_dots(col = "sh104_clust_avg") + 
  tm_shape(cam_adm1_shp) + 
  tm_borders() + 
  tm_shape(pov_ras) + 
  tm_raster()  
## Variable(s) "NA" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.