Book Chapter - Programming Notes

Introduction

This document provides links to the data utilised in the chapter analysis along with all the R code sequences that were utilised to create the tables, maps and figures.

The following link will provide access to the anonymised Irish Army Census data, National Army dependents data, a ESRI shapefile for the island of Ireland and a lookup table linking each dependent record to the relevant archival sources utilised.

Import the Data

This code sequence imports the CSV files and the shapefile. Be sure and turn off the s2.

# Import the data
read.csv('Army_Dependents.csv', stringsAsFactors = F,
         na.strings= c("NA", " ", "")) %>% as_tibble -> army_dependents

read.csv('Irish Army Census.csv', stringsAsFactors = F,
         na.strings= c("NA", " ", "")) %>% as_tibble() -> army_census

st_read('counties/counties.shp') -> cty

## Reading layer `counties' from data source 
##   `/Users/jackkavanagh/Dropbox/R_Repro_Chapter_Code/counties/counties.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 32 features and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -10.66262 ymin: 51.38887 xmax: -5.426816 ymax: 55.4353
## Geodetic CRS:  WGS 84

sf::sf_use_s2(FALSE)

## Spherical geometry (s2) switched off

Tidy the Data

Tidy and convert the data. First convert the dates to Date data.

# Convert dates to date class
as.Date(army_census$Date.of.Attestation) -> army_census$Date.of.Attestation
as.Date(army_dependents$Date.of.Death.Full) -> army_dependents$Date.of.Death.Full

# Check the class()
class(army_census$Date.of.Attestation)

## [1] "Date"

class(army_dependents$Date.of.Death.Full)

## [1] "Date"

# Create a character list of Irish counties
all_ire <- c("Antrim","Armagh", "Carlow", "Cavan", "Clare", 
              "Cork", "Londonderry", "Donegal", "Down", 
              "Dublin","Fermanagh", "Galway", "Kerry", 
              "Kildare", "Kilkenny", "Laois", "Leitrim",
              "Limerick", "Longford", "Louth", "Mayo", 
              "Meath", "Monaghan", "Offaly", "Roscommon", 
              "Sligo", "Tipperary", "Tyrone", "Waterford", 
              "Westmeath", "Wexford","Wicklow")

Since the National Army casualties Dates are split between full dates and month only, these need to be counted separately and then combined into a single dataframe.

# Dates of Death - Full & Partial
army_dependents %>% count(Date.of.Death.Full) -> army_dependents_dates
colnames(army_dependents_dates) <- c("Dates", "No")

army_dependents_dates %>% group_by(Date=floor_date(Dates, "month")) %>% summarize(No=sum(No)) -> army_dependents_dates_monthly

army_dependents_dates_monthly %>% na.omit() -> army_dependents_dates_monthly

army_dependents %>% count(Date.of.Death.Partial) -> army_dependents_dates_partial

dates_list <-c("July 1922", "August 1922", 
               "September 1922", "October 1922", 
               "November 1922")

army_dependents_dates_partial %>% arrange(factor(Date.of.Death.Partial, 
                                          levels = dates_list)) -> army_dependents_dates_partial

colnames(army_dependents_dates_partial) <- c("Date", "No")

my(army_dependents_dates_partial$Date) -> army_dependents_dates_partial$Date

army_dependents_dates_partial %>% na.omit() -> army_dependents_dates_partial

army_dependents_dates_partial %>% add_row(Date = c(as.Date("1922-06-01"),
                                                   as.Date("1922-12-01"))) -> army_dependents_dates_partial

army_dependents_dates_partial[is.na(army_dependents_dates_partial)] <- 0

inner_join(army_dependents_dates_monthly, 
           army_dependents_dates_partial, by="Date") -> army_dependents_dates_monthly_unif

army_dependents_dates_monthly_unif %>% rowwise() %>% 
mutate(sumnumeric = sum(c_across(where(is.numeric)), na.rm = T)) -> army_dependents_dates_monthly_unif

army_dependents_dates_monthly_unif$No.x <- NULL
army_dependents_dates_monthly_unif$No.y <- NULL

colnames(army_dependents_dates_monthly_unif) <- c("Date", "No")

Next arrange the Ages into separate categories for the army census and National Army deaths

# Create Age Groups for the census
army_census %>% mutate(Age_Group = case_when(Age >= 0   & Age <= 14 ~ '1-14',
                                             Age >= 15  & Age <= 19 ~ '15-19',
                                             Age >= 20  & Age <= 24 ~ '20-24',
                                             Age >= 25  & Age <= 34 ~ '25-34',
                                             Age >= 35  & Age <= 44 ~ '35-44',
                                             Age >= 45  & Age <= 54 ~ '45-54',
                                             Age >= 55  & Age <= 64 ~ '55-64',
                                             Age >= 65  & Age <= 75 ~ '65-75')) -> army_census

# Create Age Groups for the casualties
army_dependents %>% mutate(Age_Group = case_when(Age >= 0   & Age <= 14 ~ '1-14',
                                                 Age >= 15  & Age <= 19 ~ '15-19',
                                                 Age >= 20  & Age <= 24 ~ '20-24',
                                                 Age >= 25  & Age <= 34 ~ '25-34',
                                                 Age >= 35  & Age <= 44 ~ '35-44',
                                                 Age >= 45  & Age <= 54 ~ '45-54',
                                                 Age >= 55  & Age <= 64 ~ '55-64',
                                                 Age >= 65  & Age <= 75 ~ '65-75')) -> army_dependents

Create a new character list for Marital Status and then filter the National Army casualties by marriages

marriages <- c("Single", "Married")

army_dependents %>% filter(Marital.Status %in% marriages) -> army_dependents_marriages

army_dependents_marriages %>% count(Age_Group, Rank.Type, Marital.Status) -> army_dependents_age_rank_marital_count

colnames(army_dependents_age_rank_marital_count) <- c("Age", "Rank", "Marital", "No.")

Repeat this process for the army census

army_census %>% select(Age_Group, Marital.Status, Rank_Type) %>% group_by(Marital.Status) -> army_census_age_rank_marital
army_census_age_rank_marital %>% filter(Marital.Status %in% marriages) -> army_census_age_rank_marital
army_census_age_rank_marital %>% count(Age_Group, Rank_Type) -> army_census_age_rank_marital_count
colnames(army_census_age_rank_marital_count) <- c("Marital", "Age", "Rank", "No.")

Create a new army_dependents object showing the Pension received and the cause of death

army_dependents %>% filter(!(Pension.Awarded == "Successful under 1953 Act.")) %>% select(Cause.of.Death.Type, 
                                                                                          Pension.Type, 
                                                                                          Pension.Awarded) %>% group_by(Cause.of.Death.Type) %>% count(Pension.Type, 
                                                                                                                                                       Pension.Awarded) -> army_dependents_cod_pensions     

colnames(army_dependents_cod_pensions) <- c("Cause_of_Death","Type","Status","No.")

Follow the same logic when examining the types of pensions received by next-of-kin type

army_dependents %>% filter(!(Pension.Awarded == "Successful under 1953 Act.")) %>% select(NOK_Type, 
                                                                                          Pension.Awarded, 
                                                                                          Pension.Type) %>% group_by(NOK_Type) %>% count(Pension.Type, 
                                                                                                                                         Pension.Awarded) -> army_dependents_nok_pension

colnames(army_dependents_nok_pension) <- c("NOK", "Type", "Status", "No.")

army_dependents_nok_pension %>% replace_na(list(NOK = "Unknown NOK", Type = "No Award")) -> army_dependents_nok_pensions

In order to create a comparative graph of National Army daily recruitment and deaths, it was necessary to account for days where no deaths or recruitment took place.

# Filter the army census for recruitment between June-December 1922
subset(army_census, as.Date(Date.of.Attestation) >= '1922-06-28' & as.Date(Date.of.Attestation) <= '1922-12-31') -> army_census_vol_levy

# Count the number per day
army_census_vol_levy %>% count(Date.of.Attestation) -> army_census_vol_levy_dates
colnames(army_census_vol_levy_dates) <- c("Dates", "Recruits")

# Re-use the casualties daily deaths dataframe from earlier
army_dependents_dates

## # A tibble: 158 × 2
##    Dates         No
##    <date>     <int>
##  1 1922-06-28     2
##  2 1922-06-29     3
##  3 1922-06-30     5
##  4 1922-07-01     3
##  5 1922-07-02     4
##  6 1922-07-03     2
##  7 1922-07-04     2
##  8 1922-07-05     4
##  9 1922-07-06     3
## 10 1922-07-07     4
## # ℹ 148 more rows

colnames(army_dependents_dates) <- c("Dates", "Deaths")

# Remove the NAs
army_dependents_dates %>% na.omit() -> army_dependents_dates

# Create a full dataframe of days from 28 June 1922 to 31 December 1922
all_dates_1922 <- data.frame(Dates=seq(as.Date("1922-06-28"), by='day', length.out=187))

# Use anti_join() to show the days of no deaths and no recruits
anti_join(all_dates_1922, army_dependents_dates, by="Dates") -> army_dependents_missing_dates
anti_join(all_dates_1922, army_census_vol_levy_dates, by="Dates") -> army_census_vol_levy_missing_dates


merge(army_dependents_dates, army_dependents_missing_dates, by="Dates", all.y = T, all.x = T) -> army_dependents_dates_all
merge(army_census_vol_levy_dates, army_census_vol_levy_missing_dates, by="Dates", all.y = T, all.x = T) -> army_census_vol_levy_dates_all


army_dependents_dates_all[is.na(army_dependents_dates_all)] <- 0
army_census_vol_levy_dates_all[is.na(army_census_vol_levy_dates_all)] <- 0

inner_join(army_dependents_dates_all, army_census_vol_levy_dates_all, by="Dates") -> daily_deaths_recruits

melt(daily_deaths_recruits, id="Dates") -> daily_deaths_recruits_melt

For the two maps the following use count() and filter the results by the Irish counties character list.

# Home Address & Places of Death of National Army dependents
army_dependents %>% count(Place.of.Death.County)-> army_dependents_place_of_death
colnames(army_dependents_place_of_death) <- c("County", "Death")

army_dependents %>% count(Home.Address.County) -> army_dependents_home_address
colnames(army_dependents_home_address) <- c("County", "Home")

army_dependents_home_address %>% filter(County %in% all_ire) -> army_dependents_home_address_ire
army_dependents_place_of_death %>% filter(County %in% all_ire) -> army_dependents_place_of_death_ire

army_dependents_place_of_death_ire %>% add_row(County =c("Londonderry","Armagh",
                                                         "Tyrone","Fermanagh",
                                                         "Antrim","Down")) -> army_dependents_place_of_death_ire

inner_join(army_dependents_home_address_ire, army_dependents_place_of_death_ire, by="County") -> army_dependents_home_deaths

merge(cty, army_dependents_home_deaths, by.x="NAME_TAG", by.y="County") -> cty_home_deaths
merge(cty, army_dependents_place_of_death_ire, by.x="NAME_TAG", by.y="County") -> cty_deaths
merge(cty, army_dependents_home_address_ire, by.x="NAME_TAG", by.y="County") -> cty_home

Monthly Deaths

The unified monthly dates show a complete breakdown of the National Army deaths per month.

army_dependents_dates_monthly_unif %>% ggplot(aes(x=Date, y=No)) + 
geom_bar(stat = "identity", width = 8) + 
scale_y_continuous(limits = c(0, 120), 
                   breaks = seq(0, 120, by=10)) +
scale_x_date(date_breaks = "1 months", 
               date_labels = "%b") + 
scale_fill_colorblind() + 
labs(x = "Month",
     y = "No. of Deaths") + 
theme_stata() + 
theme(axis.text.x = element_text(colour = "darkslategrey", size = 16), 
      axis.text.y = element_text(colour = "darkslategrey", size = 16, angle = 0),        
      text = element_text(family = "Times"),
      plot.title = element_text(size = 12, 
                                margin = margin(b = 10)),
      plot.subtitle = element_text(size = 12, color = "darkslategrey", 
                                   margin = margin(b = 25)),
      plot.caption = element_text(size = 8, margin = margin(t = 10), 
                                  color = "grey70", hjust = 0))

Figures 1-7

Figure 1

# National Army Casualties - Age, Rank, Marital Status
army_dependents_age_rank_marital_count %>% ggplot(aes(x=Age, y=No.)) + 
geom_bar(aes(fill=Rank),
         position = "dodge",
         stat = "identity",
         width = 0.3) + 
facet_grid(~Marital, scales = "free_y") +
scale_y_continuous(limits = c(0, 120), 
                   breaks = seq(0, 120, by=10)) + 
scale_fill_colorblind() + 
labs(x = "Ages",
     y = "No. of Deaths",
     fill = "Rank") + 
theme_stata() + 
theme(axis.text.x = element_text(colour = "darkslategrey", size = 16),
      axis.text.y = element_text(colour = "darkslategrey", size = 16, angle = 0),          
      text = element_text(family = "Georgia"),
      strip.text = element_text(size = 14),
      plot.title = element_text(size = 18, margin = margin(b = 10)),
      plot.subtitle = element_text(size = 12, color = "darkslategrey", margin = margin(b = 25)),
      plot.caption = element_text(size = 8, margin = margin(t = 10), color = "grey70", hjust = 0))

Figure 2

army_census_age_rank_marital_count %>% ggplot(aes(x=Age, y=No.)) + 
  geom_bar(aes(fill=Rank),
           position = "dodge",
           stat = "identity",
           width = 0.3) + 
  facet_grid(~Marital, scales = "free_y") +
  scale_y_continuous(limits = c(0, 9500), 
                     breaks = seq(0, 9500, by=500)) + 
  scale_fill_colorblind() + 
  labs(x = "Ages",
       y = "No. of Deaths",
       fill = "Rank") + 
  theme_stata() + 
  theme(axis.text.x = element_text(colour = "darkslategrey", size = 16),
        axis.text.y = element_text(colour = "darkslategrey", size = 16, angle = 0),          
        text = element_text(family = "Georgia"),
        strip.text = element_text(size = 14),
        plot.title = element_text(size = 18, margin = margin(b = 10)),
        plot.subtitle = element_text(size = 12, color = "darkslategrey", margin = margin(b = 25)),
        plot.caption = element_text(size = 8, margin = margin(t = 10), color = "grey70", hjust = 0))

Figure 3

# Comparative graph of military recruitment and deaths from July-December 1922
daily_deaths_recruits_melt %>% ggplot(aes(x=Dates, 
                                          y=value, 
                                          colour=variable)) + 
  geom_line(size=0.8) + 
  facet_wrap(~variable, scales = "free_y", shrink = TRUE) + 
  theme_ipsum_es() +
  labs(x = "Month",
       y = "No.",
       color = "") +
  scale_color_colorblind() + 
  theme_classic() +
  theme(axis.text.x = element_text(colour = "darkslategrey", size = 14), 
        axis.text.y = element_text(colour = "darkslategrey", size = 14),
        legend.background = element_rect(fill = "white", linewidth = 4, colour = "white"),
        legend.justification = c(0, 1),
        legend.position = c(0.6, 1.3),
        text = element_text(family = "Baskerville"),
        strip.text = element_text(size = 16),
        plot.title = element_text(size = 14, margin = margin(b = 12)),
        plot.subtitle = element_text(size = 12, color = "darkslategrey", margin = margin(b = 25)),
        plot.caption = element_text(size = 12, margin = margin(t = 12), color = "grey70", hjust = 0))

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Figure 4

tm_shape(cty_deaths) + tm_polygons(col = "Death", 
                                   title = "Place of Death/Wounding", 
                                   colorNA = "grey",
                                   textNA = "No Deaths",
                                   style = "jenks") + 
tm_style("col_blind") + 
tm_layout(frame.lwd = 0.3,
          frame.double.line = TRUE,
          legend.title.size = 1,
          legend.text.size = 0.8,
          legend.position = c("0","1"),
          legend.just = c("left", "top"),
          legend.width = 0.70,
          bg.color = "beige",
          asp = 1) + 
tm_scale_bar(text.size = 0.45, 
             position = c("RIGHT", "BOTTOM"))

Figure 5

tm_shape(cty_home) + tm_polygons(col = "Home", 
                                 title = "Home Address of Casualty/Dependent",
                                 colorNA = "grey",
                                 textNA = "No Deaths",
                                 style = "jenks") + 
tm_style("col_blind") + 
tm_layout(frame.lwd = 0.3,
          frame.double.line = TRUE,
          legend.title.size = 1,
          legend.text.size = 0.8,
          legend.position = c("0","1"),
          legend.just = c("left", "top"),
          legend.width = 0.70,
          bg.color = "beige",
          asp = 1) + 
tm_scale_bar(text.size = 0.45, 
             position = c("RIGHT", "BOTTOM"))

Figure 6

army_dependents_cod_pensions %>% ggplot(aes(x=Cause_of_Death, y=No.)) + 
  geom_bar(aes(fill=Status),
           position = "dodge",
           stat = "identity",
           width = 0.1) + 
  facet_grid(~Type, scales = "free_y") +
  scale_y_continuous(limits = c(0, 220), 
                     breaks = seq(0, 220, by=20)) + 
  scale_fill_colorblind() + 
  labs(x = "Cause of Death",
       y = "No. of Cases",
       fill = "Status") + 
  theme_stata() + 
  theme(axis.text.x = element_text(colour = "darkslategrey", size = 16),
        axis.text.y = element_text(colour = "darkslategrey", size = 16, angle = 0),          
        text = element_text(family = "Georgia"),
        strip.text = element_text(size = 14),
        plot.title = element_text(size = 18, margin = margin(b = 10)),
        plot.subtitle = element_text(size = 12, color = "darkslategrey", margin = margin(b = 25)),
        plot.caption = element_text(size = 8, margin = margin(t = 10), color = "grey70", hjust = 0))

Figure 7

army_dependents_nok_pensions %>% ggplot(aes(x=Type, y=No.)) + 
  geom_bar(aes(fill=NOK),
           stat = "identity",
           width = 0.2) + 
  scale_fill_brewer(palette = "Set3") +
  labs(x = "Pension Type",
       y = "No. of Cases",
       fill = "NOK") + 
  theme_stata() + 
  coord_flip() + 
  theme(axis.text.x = element_text(colour = "darkslategrey", size = 16),
        axis.text.y = element_text(colour = "darkslategrey", size = 16, angle = 0),          
        text = element_text(family = "Georgia"),
        strip.text = element_text(size = 14),
        plot.title = element_text(size = 18, margin = margin(b = 10)),
        plot.subtitle = element_text(size = 12, color = "darkslategrey", margin = margin(b = 25)),
        plot.caption = element_text(size = 8, margin = margin(t = 10), color = "grey70", hjust = 0))