Import the Data

# Import the data
read.csv('Army_Dependents.csv', stringsAsFactors = F,
         na.strings= c("NA", " ", "")) -> army_dependents

read.csv('Irish Army Census.csv', stringsAsFactors = F,
         na.strings= c("NA", " ", "")) -> army_census

st_read('counties/counties.shp') -> cty
## Reading layer `counties' from data source 
##   `/Users/jack/Dropbox/R_Book_Chapter/counties/counties.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 32 features and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -10.66262 ymin: 51.38887 xmax: -5.426816 ymax: 55.4353
## Geodetic CRS:  WGS 84
sf::sf_use_s2(FALSE)
## Spherical geometry (s2) switched off
# Convert to tibble
army_census %>% as_tibble() -> army_census
army_dependents %>% as_tibble() -> army_dependents

# Convert dates to date class
as.Date(army_census$Date.of.Attestation) -> army_census$Date.of.Attestation
as.Date(army_dependents$Date.of.Death.Full) -> army_dependents$Date.of.Death.Full

# Check the class()
class(army_census$Date.of.Attestation)
## [1] "Date"
class(army_dependents$Date.of.Death.Full)
## [1] "Date"
#
all_ire <- c("Antrim","Armagh", "Carlow", "Cavan", "Clare", 
              "Cork", "Londonderry", "Donegal", "Down", 
              "Dublin","Fermanagh", "Galway", "Kerry", 
              "Kildare", "Kilkenny", "Laois", "Leitrim",
              "Limerick", "Longford", "Louth", "Mayo", 
              "Meath", "Monaghan", "Offaly", "Roscommon", 
              "Sligo", "Tipperary", "Tyrone", "Waterford", 
              "Westmeath", "Wexford","Wicklow")
# Dates of Death - Full & Partial
army_dependents %>% count(Date.of.Death.Full) -> army_dependents_dates
colnames(army_dependents_dates) <- c("Dates", "No")

army_dependents_dates %>% group_by(Date=floor_date(Dates, "month")) %>% summarize(No=sum(No)) -> army_dependents_dates_monthly

army_dependents_dates_monthly %>% na.omit() -> army_dependents_dates_monthly

army_dependents %>% count(Date.of.Death.Partial) -> army_dependents_dates_partial

dates_list <-c("July 1922", "August 1922", 
               "September 1922", "October 1922", 
               "November 1922")

army_dependents_dates_partial %>% arrange(factor(Date.of.Death.Partial, 
                                          levels = dates_list)) -> army_dependents_dates_partial

colnames(army_dependents_dates_partial) <- c("Date", "No")

my(army_dependents_dates_partial$Date) -> army_dependents_dates_partial$Date

army_dependents_dates_partial %>% na.omit() -> army_dependents_dates_partial

army_dependents_dates_partial %>% add_row(Date = c(as.Date("1922-06-01"),
                                                   as.Date("1922-12-01"))) -> army_dependents_dates_partial

army_dependents_dates_partial[is.na(army_dependents_dates_partial)] <- 0

inner_join(army_dependents_dates_monthly, 
           army_dependents_dates_partial, by="Date") -> army_dependents_dates_monthly_unif

army_dependents_dates_monthly_unif %>% rowwise() %>% 
mutate(sumnumeric = sum(c_across(where(is.numeric)), na.rm = T)) -> army_dependents_dates_monthly_unif

army_dependents_dates_monthly_unif$No.x <- NULL
army_dependents_dates_monthly_unif$No.y <- NULL

colnames(army_dependents_dates_monthly_unif) <- c("Date", "No")
army_dependents_dates_monthly_unif %>% ggplot(aes(x=Date, y=No)) + 
geom_bar(stat = "identity", width = 8) + 
scale_y_continuous(limits = c(0, 120), 
                   breaks = seq(0, 120, by=10)) +
scale_x_date(date_breaks = "1 months", 
               date_labels = "%b") + 
scale_fill_colorblind() + 
labs(x = "Month",
     y = "No. of Deaths") + 
theme_stata() + 
theme(axis.text.x = element_text(colour = "darkslategrey", size = 16), 
      axis.text.y = element_text(colour = "darkslategrey", size = 16, angle = 0),        
      text = element_text(family = "Times"),
      plot.title = element_text(size = 12, 
                                margin = margin(b = 10)),
      plot.subtitle = element_text(size = 12, color = "darkslategrey", 
                                   margin = margin(b = 25)),
      plot.caption = element_text(size = 8, margin = margin(t = 10), 
                                  color = "grey70", hjust = 0))

# Home Address & Places of Death of National Army dependents
army_dependents %>% count(Place.of.Death.County)-> army_dependents_place_of_death
colnames(army_dependents_place_of_death) <- c("County", "Death")

army_dependents %>% count(Home.Address.County) -> army_dependents_home_address
colnames(army_dependents_home_address) <- c("County", "Home")

army_dependents_home_address %>% filter(County %in% all_ire) -> army_dependents_home_address_ire
army_dependents_place_of_death %>% filter(County %in% all_ire) -> army_dependents_place_of_death_ire

army_dependents_place_of_death_ire %>% add_row(County =c("Londonderry","Armagh",
                                                         "Tyrone","Fermanagh",
                                                         "Antrim","Down")) -> army_dependents_place_of_death_ire

inner_join(army_dependents_home_address_ire, army_dependents_place_of_death_ire, by="County") -> army_dependents_home_deaths

merge(cty, army_dependents_home_deaths, by.x="NAME_TAG", by.y="County") -> cty_home_deaths
merge(cty, army_dependents_place_of_death_ire, by.x="NAME_TAG", by.y="County") -> cty_deaths
merge(cty, army_dependents_home_address_ire, by.x="NAME_TAG", by.y="County") -> cty_home
tm_shape(cty_home_deaths) + tm_polygons(col = c("Death", "Home"), 
                                        title = c("Place of Death/Wounding", "Home Address"), 
                                        colorNA = "grey",
                                        textNA = "No Deaths",
                                        style = "fixed", breaks=c(0,5,10,25,50,75,100,120)) + 
tm_style("col_blind") + 
tm_layout(frame.lwd = 0.3,
          frame.double.line = TRUE,
          legend.title.size = 1,
          legend.text.size = 0.8,
          legend.position = c("0","1"),
          legend.just = c("left", "top"),
          legend.width = 0.70,
          bg.color = "beige",
          asp = 1) + 
tm_scale_bar(text.size = 0.45, 
             position = c("RIGHT", "BOTTOM"))

tm_shape(cty_deaths) + tm_polygons(col = "Death", 
                                   title = "Place of Death/Wounding", 
                                   colorNA = "grey",
                                   textNA = "No Deaths",
                                   style = "fixed", breaks=c(0,5,10,25,50,75,100,120)) + 
tm_style("col_blind") + 
tm_layout(frame.lwd = 0.3,
          frame.double.line = TRUE,
          legend.title.size = 1,
          legend.text.size = 0.8,
          legend.position = c("0","1"),
          legend.just = c("left", "top"),
          legend.width = 0.70,
          bg.color = "beige",
          asp = 1) + 
tm_scale_bar(text.size = 0.45, 
             position = c("RIGHT", "BOTTOM"))

# Create Age Groups for the census
army_census %>% mutate(Age_Group = case_when(Age >= 0   & Age <= 14 ~ '1-14',
                                             Age >= 15  & Age <= 19 ~ '15-19',
                                             Age >= 20  & Age <= 24 ~ '20-24',
                                             Age >= 25  & Age <= 34 ~ '25-34',
                                             Age >= 35  & Age <= 44 ~ '35-44',
                                             Age >= 45  & Age <= 54 ~ '45-54',
                                             Age >= 55  & Age <= 64 ~ '55-64',
                                             Age >= 65  & Age <= 75 ~ '65-75')) -> army_census

# Create Age Groups for the casualties
army_dependents %>% mutate(Age_Group = case_when(Age >= 0   & Age <= 14 ~ '1-14',
                                                 Age >= 15  & Age <= 19 ~ '15-19',
                                                 Age >= 20  & Age <= 24 ~ '20-24',
                                                 Age >= 25  & Age <= 34 ~ '25-34',
                                                 Age >= 35  & Age <= 44 ~ '35-44',
                                                 Age >= 45  & Age <= 54 ~ '45-54',
                                                 Age >= 55  & Age <= 64 ~ '55-64',
                                                 Age >= 65  & Age <= 75 ~ '65-75')) -> army_dependents

# Age and Rank totals
army_dependents %>% count(Age_Group, Rank.Type) -> army_dependents_age_rank
army_census %>% count(Age_Group, Rank_Type) -> army_census_age_rank

# Change the column names
colnames(army_dependents_age_rank) <- c("Age", "Rank", "No.") 
colnames(army_census_age_rank) <- c("Age", "Rank", "No.") 

# National Army casualties - Age and Rank
army_dependents_age_rank %>% na.omit() %>% ggplot(aes(x=Age, y=No.)) + 
  geom_bar(aes(fill=Rank), 
           position = "dodge", 
           stat = "identity", 
           width = 0.3) +
  scale_fill_colorblind() + 
  labs(x = "Age Ranges",
       y = "No.",
       fill = "Rank") + 
  theme_stata() + 
  theme(axis.text.x = element_text(colour = "darkslategrey", size = 16),
        axis.text.y = element_text(colour = "darkslategrey", size = 16, angle = 0),          
        text = element_text(family = "Georgia"),
        plot.title = element_text(size = 18, margin = margin(b = 10)),
        plot.subtitle = element_text(size = 12, color = "darkslategrey", margin = margin(b = 25)),
        plot.caption = element_text(size = 8, margin = margin(t = 10), color = "grey70", hjust = 0))

subset(army_census, as.Date(Date.of.Attestation) >= '1922-06-28' & as.Date(Date.of.Attestation) <= '1922-12-31') -> army_census_vol_levy
army_census_vol_levy %>% count(Date.of.Attestation) -> army_census_vol_levy_dates
colnames(army_census_vol_levy_dates) <- c("Dates", "Recruits")
army_dependents_dates
## # A tibble: 158 × 2
##    Dates         No
##    <date>     <int>
##  1 1922-06-28     2
##  2 1922-06-29     3
##  3 1922-06-30     5
##  4 1922-07-01     3
##  5 1922-07-02     4
##  6 1922-07-03     2
##  7 1922-07-04     2
##  8 1922-07-05     4
##  9 1922-07-06     3
## 10 1922-07-07     4
## # ℹ 148 more rows
colnames(army_dependents_dates) <- c("Dates", "Deaths")

army_dependents_dates %>% na.omit() -> army_dependents_dates
all_dates_1922 <- data.frame(Dates=seq(as.Date("1922-06-28"), by='day', length.out=187))
anti_join(all_dates_1922, army_dependents_dates, by="Dates") -> army_dependents_missing_dates
anti_join(all_dates_1922, army_census_vol_levy_dates, by="Dates") -> army_census_vol_levy_missing_dates
merge(army_dependents_dates, army_dependents_missing_dates, by="Dates", all.y = T, all.x = T) -> army_dependents_dates_all
merge(army_census_vol_levy_dates, army_census_vol_levy_missing_dates, by="Dates", all.y = T, all.x = T) -> army_census_vol_levy_dates_all
army_dependents_dates_all[is.na(army_dependents_dates_all)] <- 0
army_census_vol_levy_dates_all[is.na(army_census_vol_levy_dates_all)] <- 0

inner_join(army_dependents_dates_all, army_census_vol_levy_dates_all, by="Dates") -> daily_deaths_recruits

melt(daily_deaths_recruits, id="Dates") -> daily_deaths_recruits_melt
daily_deaths_recruits_melt %>% ggplot(aes(x=Dates, 
                                          y=value, 
                                          colour=variable)) + 
  geom_line(size=0.8) + 
  facet_wrap(~variable, scales = "free_y", shrink = TRUE) + 
  theme_ipsum_es() +
  labs(x = "Month",
       y = "No.",
       color = "") +
  scale_color_colorblind() + 
  theme_classic() +
  theme(axis.text.x = element_text(colour = "darkslategrey", size = 14), 
        axis.text.y = element_text(colour = "darkslategrey", size = 14),
        legend.background = element_rect(fill = "white", linewidth = 4, colour = "white"),
        legend.justification = c(0, 1),
        legend.position = c(0.6, 1.3),
        text = element_text(family = "Baskerville"),
        strip.text = element_text(size = 16),
        plot.title = element_text(size = 14, margin = margin(b = 12)),
        plot.subtitle = element_text(size = 12, color = "darkslategrey", margin = margin(b = 25)),
        plot.caption = element_text(size = 12, margin = margin(t = 12), color = "grey70", hjust = 0)) 
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

marriages <- c("Single", "Married")

army_dependents %>% filter(Marital.Status %in% marriages) -> army_dependents_marriages

army_dependents_marriages %>% count(Age_Group, Rank.Type, Marital.Status) -> army_dependents_age_rank_marital_count

colnames(army_dependents_age_rank_marital_count) <- c("Age", "Rank", "Marital", "No.")
army_dependents_age_rank_marital_count %>% ggplot(aes(x=Age, y=No.)) + 
geom_bar(aes(fill=Rank),
         position = "dodge",
         stat = "identity",
         width = 0.3) + 
facet_grid(~Marital, scales = "free_y") +
scale_y_continuous(limits = c(0, 120), 
                   breaks = seq(0, 120, by=10)) + 
scale_fill_colorblind() + 
labs(x = "Ages",
     y = "No. of Deaths",
     fill = "Rank") + 
theme_stata() + 
theme(axis.text.x = element_text(colour = "darkslategrey", size = 16),
      axis.text.y = element_text(colour = "darkslategrey", size = 16, angle = 0),          
      text = element_text(family = "Georgia"),
      strip.text = element_text(size = 14),
      plot.title = element_text(size = 18, margin = margin(b = 10)),
      plot.subtitle = element_text(size = 12, color = "darkslategrey", margin = margin(b = 25)),
      plot.caption = element_text(size = 8, margin = margin(t = 10), color = "grey70", hjust = 0))