Story: Only roughly half of water facilities installed from 1900 to 1919 have been improved since their installation date.

water <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-05-04/water.csv')
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   row_id = col_double(),
##   lat_deg = col_double(),
##   lon_deg = col_double(),
##   report_date = col_character(),
##   status_id = col_character(),
##   water_source = col_character(),
##   water_tech = col_character(),
##   facility_type = col_character(),
##   country_name = col_character(),
##   install_year = col_double(),
##   installer = col_character(),
##   pay = col_character(),
##   status = col_character()
## )
water1919 <- filter(water, install_year < 1920)

waterzimbabwe <- filter(water1919, country_name == 'Zimbabwe')

ggplot(waterzimbabwe, aes(x = report_date, y = facility_type)) + 
  geom_point() +
  theme_pubr() + theme(text = element_text(family = 'serif', size  = 4.5)) +
  labs(x= "Date Inspected", y= "Facility Type", title ="Water Facilites in Zimbabwe from 1900-1919")

Story: The relationship between installation years and their longitude in Ghana seems to be completely random and have almost no correlation.

water <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-05-04/water.csv')
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   row_id = col_double(),
##   lat_deg = col_double(),
##   lon_deg = col_double(),
##   report_date = col_character(),
##   status_id = col_character(),
##   water_source = col_character(),
##   water_tech = col_character(),
##   facility_type = col_character(),
##   country_name = col_character(),
##   install_year = col_double(),
##   installer = col_character(),
##   pay = col_character(),
##   status = col_character()
## )
newyear <- filter(water, report_date == '11/06/2014')

newYearGhana <- filter(newyear, country_name == 'Ghana')

ggplot(newYearGhana, aes(x = install_year, y = lon_deg)) + 
  geom_line() +
  theme_pubr() + theme(text = element_text(family = 'serif', size  = 12)) +
  labs(x= "Install Year", y= "Longitude", title = "Inspection Longitudes on 11/06/2014 in Ghana")
## Warning: Removed 1 row(s) containing missing values (geom_path).

*Just for fun since I don’t think my stories above are particularly interesting.

Story: Zimbabwe had the most years of installations from 1900-1920, although Swaziland had a very large number installed at the turn of the century. Every other country on this list was lagging behind during this time period.

water <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-05-04/water.csv')
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   row_id = col_double(),
##   lat_deg = col_double(),
##   lon_deg = col_double(),
##   report_date = col_character(),
##   status_id = col_character(),
##   water_source = col_character(),
##   water_tech = col_character(),
##   facility_type = col_character(),
##   country_name = col_character(),
##   install_year = col_double(),
##   installer = col_character(),
##   pay = col_character(),
##   status = col_character()
## )
water1921 <- filter(water, install_year < 1921)

ggplot(water1921, aes(x = install_year, y = country_name)) + 
  geom_bin2d(binwidth = .25) +
  theme_pubr() + theme(legend.position = c(.85, .575)) +
  labs(x= "Year Installed", y= "Country", title ="Water Supply Installation (1900-1920)")