Our data was gathered from the PRDH database (Institut Drouin). It encompasses all death records collected in parishes across the province between 1729 and 1849 (n = 484,266).
# Setting work directory
setwd(dirname(rstudioapi::getSourceEditorContext()$path))
# Reading in the dataset
prdh_raw <- read.csv("prdh_cleaned.csv")
# Printing out the number of observations
nrow(prdh_raw)
## [1] 696434
# Printing out the columns names
colnames(prdh_raw)
## [1] "X" "idActe" "date"
## [4] "type" "Parish" "role"
## [7] "sex" "age" "standard.name"
## [10] "standard.first.name" "annee" "codeP"
## [13] "lat" "long" "Parish.Map"
## [16] "Attacked" "Burnt" "DateOA_raw"
## [19] "DateOfAttack" "Details" "Note"
## [22] "Text" "correct_date" "death_date"
## [25] "death_date_m" "death_date_y" "age_clean"
## [28] "birth_year" "infant_death" "BefAttack"
# Printing out the three first observations
head(prdh_raw,3)
## X idActe date type Parish role sex age
## 1 4 72655 1730-01-01 s Québec (Notre-Dame-de-Québec) subject m 12
## 2 5 183502 1730-01-01 s Varennes (Ste-Anne) subject x 02j
## 3 6 150485 1730-01-02 s Montréal (Notre-Dame-de-Montréal) subject m 45
## standard.name standard.first.name annee codeP lat long Parish.Map
## 1 GEOFFROY Thomas 1616 4501 46.81363 -71.20570 <NA>
## 2 MARTIN Xxxxx 1681 6202 45.68691 -73.43203 <NA>
## 3 LOISEAU Jean 1642 3901 45.50466 -73.55681 <NA>
## Attacked Burnt DateOA_raw DateOfAttack Details Note Text correct_date
## 1 1 NA <NA> 1759-06-26 <NA> <NA> <NA> TRUE
## 2 0 NA <NA> <NA> <NA> <NA> <NA> TRUE
## 3 1 NA <NA> 1760-07-14 <NA> <NA> <NA> TRUE
## death_date death_date_m death_date_y age_clean birth_year infant_death
## 1 1730-01-01 1729-12-01 1729 12 1717 0
## 2 1730-01-01 1729-12-01 1729 0 1729 1
## 3 1730-01-02 1730-01-01 1730 45 1685 0
## BefAttack
## 1 TRUE
## 2 NA
## 3 TRUE
For now, we’ll exclude the parishes that only collected death data before or after the conquest, to avoid distortion relative to the data collection (a new clean dataset called prdh). We’ll run replication tests using the raw dataset (prdh_raw) in the Appendix.
# Loading doBy for summaryBy()
library(doBy)
# Loading data.table for setorder()
library(data.table)
# Finding the earliest and latest year of death recorded by parish
datesParishes <- summaryBy(death_date_y ~ Parish, data=prdh_raw, FUN=c(min,max))
colnames(datesParishes) <- c("Parish","min","max")
datesParishes <- setorder(datesParishes,min)
# Removed: parishes for which we only have pre-conquest data
datesParishes[which(datesParishes$min < 1761 & datesParishes$max < 1761),]
## Parish min max
## 139 Pabos (Ste-Adélaïde) 1752 1753
## 37 Fort-St-Jean 1757 1760
toRemove_pre <- datesParishes$Parish[which(datesParishes$min < 1761 & datesParishes$max < 1761)]
length(toRemove_pre)
## [1] 2
# Removed: parishes for which we only have post-conquest data
datesParishes[which(datesParishes$min > 1760),]
## Parish min max
## 90 Loretteville (Registre des Hurons) 1762 1795
## 6 Beauceville (St-François-de-Beauce) 1765 1849
## 224 St-Henri-de-Lauzon (St-Henri) 1766 1849
## 66 L'Isle-Verte (St-Jean-Baptiste) 1767 1849
## 237 St-Jean-Port-Joli (L'Islet) 1767 1849
## 150 Québec (Anglican Metropolitan Church) 1768 1794
## 158 Québec (Protestants) 1768 1800
## 210 St-Eustache (St-Eustache) 1768 1849
## 318 Trois-Rivières (Congrégation protestante) 1769 1849
## 157 Québec (Presbyterian Saint Andrew's Church) 1770 1794
## 200 St-Cuthbert (Berthier) 1770 1849
## 11 Beloeil (St-Mathieu) 1772 1849
## 70 La Malbaie (St-Étienne) 1774 1849
## 79 Laval (St-Martin) 1774 1849
## 323 Vaudreuil (St-Michel) 1774 1849
## 231 St-Jacques (St-Jacques-de-l'Achigan) 1775 1849
## 22 Carleton (St-Joseph) 1778 1849
## 227 St-Hyacinthe (Notre-Dame-du-Rosaire) 1778 1849
## 314 Sts-Gervais et Protais (Bellechasse) 1780 1849
## 40 Gentilly (St-Édouard) 1784 1849
## 58 L'Acadie (Ste-Marguerite-de-Blairfindie) 1784 1849
## 265 St-Régis (St-François-Régis) 1785 1849
## 54 ÃŽle-Perrot (Ste-Jeanne-de-Chantal) 1786 1849
## 83 Lavaltrie (St-Paul) 1786 1849
## 272 St-Stanislas (Champlain) 1786 1849
## 268 St-Roch-de-l'Achigan (L'Assomption) 1787 1849
## 289 Ste-Anne-des-Plaines (Ste-Anne) 1788 1849
## 311 Ste-Thérèse (Ste-Thérèse-d'Avila) 1789 1849
## 176 St-André (Kamouraska) 1791 1849
## 14 Bonaventure (St-Bonaventure) 1794 1849
## 251 St-Marc-sur-Richelieu (Verchères) 1794 1849
## 91 Loretteville (St-Ambroise-de-la-Jeune-Lorette) 1795 1849
## 140 Paspébiac (Notre-Dame-de-la-Purification) 1796 1849
## 149 Québec (Anglican Cathedral Holy Trinity) 1796 1849
## 172 Sorel (Anglican Christ Church) 1796 1849
## 319 Trois-Rivières (Hôpital des Ursulines) 1796 1804
## 235 St-Jean-Baptiste-de-Rouville (St-Jean-Baptiste) 1797 1849
## 102 Mont-St-Hilaire (St-Hilaire) 1799 1849
## 189 St-Benoît (Deux-Montagnes) 1799 1849
## 95 Marieville (St-Nom-de-Marie) 1801 1849
## 105 Montréal (Anglican Christ Church Cathedral) 1801 1807
## 250 St-Luc (St-Jean) 1801 1849
## 141 Percé (St-Michel) 1802 1849
## 164 Rigaud (Ste-Madeleine) 1802 1849
## 222 St-Grégoire-le-Grand (Nicolet) 1802 1849
## 246 St-Léon-le-Grand (Maskinongé) 1802 1849
## 296 Ste-Élisabeth (Seigneurie Dautray) 1802 1849
## 48 Havre-Aubert (Notre-Dame-de-la-Visitation et Présentation) 1803 1849
## 72 La Présentation (St-Hyacinthe) 1806 1849
## 215 St-Ferréol-les-Neiges (St-Ferréol) 1806 1849
## 39 Frelighsburg (Anglican Church Holy Trinity) 1808 1849
## 121 Montréal (Presbyterian Saint Gabriel) 1808 1808
## 208 St-Esprit (Montcalm) 1808 1849
## 17 Cacouna (St-Georges) 1813 1849
## 167 Rivière-du-Loup (St-Patrice) 1813 1849
## 106 Montréal (Anglican Garrison) 1814 1834
## 103 Montebello (Notre-Dame-de-Bonsecours) 1815 1849
## 239 St-Jean-sur-Richelieu (Church of England) 1817 1849
## 74 Lachine (Presbyterian Saint Andrew) 1818 1849
## 117 Montréal (Methodist Saint James) 1818 1849
## 178 St-Anicet (Huntingdon) 1818 1849
## 7 Beauharnois (St-Clément) 1819 1849
## 99 Matane (St-Jérôme) 1819 1849
## 263 St-Polycarpe (Soulanges) 1819 1849
## 35 Drummondville (St-Frédéric) 1822 1849
## 169 Shefford (Anglican Church) 1822 1849
## 194 St-Césaire (Rouville) 1822 1849
## 243 St-Jude (St-Hyacinthe) 1822 1849
## 51 Iberville (St-Athanase-de-Bleury) 1823 1849
## 129 Napierville (St-Cyprien-de-Léry) 1823 1849
## 201 St-Damase (St-Hyacinthe) 1823 1849
## 276 St-Timothée (Beauharnois) 1823 1849
## 306 Ste-Martine (Châteauguay) 1823 1849
## 49 Havre-aux-Maisons (Ste-Madeleine) 1824 1849
## 62 L'Avenir (St-Pierre-Apôtre-de-Durham) 1824 1835
## 288 Ste-Anne-des-Monts (Ste-Anne) 1824 1849
## 294 Ste-Claire (Dorchester) 1824 1849
## 310 Ste-Scholastique (Deux-Montagnes) 1825 1849
## 84 Lennoxville (Church of England) 1827 1849
## 226 St-Hugues (Bagot) 1827 1849
## 277 St-Urbain (Charlevoix) 1827 1849
## 187 St-Barthélemy (Berthier) 1828 1849
## 238 St-Jean-sur-Richelieu (Cathédrale) 1828 1849
## 159 Québec (St-Roch) 1829 1849
## 257 St-Pascal (Kamouraska) 1829 1849
## 274 St-Sylvestre (Lotbinière) 1829 1849
## 38 Frampton (St-Edouard) 1830 1849
## 179 St-Anselme (Dorchester) 1830 1849
## 221 St-Gilles-de-Beaurivage (St-Gilles) 1830 1849
## 236 St-Jean-Chrysostome (Lévis) 1830 1849
## 259 St-Pie (Bagot St-Pie) 1830 1849
## 267 St-Rémi (St-Rémi-de-Lasalle) 1830 1849
## 278 St-Valentin (St-Jean) 1830 1849
## 55 Isle-aux-Grues (St-Antoine) 1831 1849
## 131 New Richmond (Sts-Anges-Gardiens-de-Cascapédiac) 1831 1849
## 307 Ste-Mélanie-d'Ailleboust (Ste-Mélanie) 1831 1849
## 119 Montréal (Presbyterian American Presbyterian) 1832 1849
## 155 Québec (Methodist Wesleyan Methodist Church) 1832 1849
## 175 St-Ambroise-de-Kildare (St-Ambroise) 1832 1849
## 270 St-Simon (Bagot) 1832 1849
## 292 Ste-Catherine-de-Fossambault (Ste-Catherine) 1832 1849
## 31 Côteau-du-Lac (St-Ignace) 1833 1849
## 50 Henryville (St-Georges) 1833 1849
## 177 St-André-Est (St-André-d'Argenteuil) 1833 1849
## 185 St-Barnabé-Nord (St-Maurice) 1833 1849
## 206 St-Édouard (Napierville) 1833 1849
## 230 St-Isidore (La Prairie) 1833 1849
## 283 Ste-Agnès-de-Charlevoix (Ste-Agnès) 1833 1849
## 47 Grosse-ÃŽle (St-Luc) 1834 1849
## 112 Montréal (Congregational Zion) 1834 1849
## 170 Sherbrooke (Cathédrale St-Michel) 1834 1849
## 229 St-Isidore (Dorchester) 1834 1849
## 309 Ste-Rosalie (Bagot) 1834 1849
## 202 St-David-d'Yamaska (St-David) 1835 1849
## 247 St-Lin (St-Lin-des-Laurentides) 1835 1849
## 16 Buckingham (Saint-Grégoire-de-Nazianze) 1836 1848
## 98 Massueville (St-Aimé) 1836 1849
## 111 Montréal (Congregational United Free) 1836 1837
## 151 Québec (Anglican Travelling Mission) 1836 1848
## 198 St-Colomban (Deux-Montagnes) 1836 1849
## 223 St-Guillaume-d'Upton (St-Guillaume) 1836 1849
## 271 St-Simon (Rimouski) 1836 1849
## 291 Ste-Brigitte-de-Laval (Ste-Brigitte) 1836 1849
## 161 Rawdon (Marie-Reine-du-Monde-et-St-Patrice) 1837 1849
## 205 St-Dominique (Bagot) 1837 1849
## 225 St-Hermas (Deux-Montagnes) 1837 1849
## 240 St-Jérôme (Cathédrale) 1837 1849
## 152 Québec (Congregational Church Congregational Societies) 1838 1849
## 171 Sherbrooke (Congregational Church) 1838 1849
## 183 St-Augustin (Deux-Montagnes) 1838 1849
## 45 Grenville (Notre-Dame-des-Sept-Douleurs) 1839 1849
## 197 St-Chrysostôme (St-Jean-Chrysostôme) 1839 1849
## 219 St-Gabriel-de-Brandon (St-Gabriel) 1839 1849
## 100 Mercier (Ste-Philomène) 1840 1849
## 108 Montréal (Anglican Trinity Memorial Chapel) 1840 1849
## 144 Plessisville (St-Calixte-de-Somerset) 1840 1849
## 186 St-Barnabé-Sud (St-Hyacinthe) 1840 1849
## 232 St-Jacques-le-Mineur (La Prairie) 1840 1849
## 303 Ste-Marguerite (Dorchester) 1840 1849
## 2 Aylmer (St-Paul) 1841 1849
## 101 Mont-St-Grégoire (St-Grégoire-le-Grand) 1841 1849
## 123 Montréal (United Church Jewish – Spanish – Portuguese) 1841 1849
## 203 St-Denis-de-la-Bouteillerie (St-Denis) 1841 1849
## 220 St-Georges (Beauce) 1841 1849
## 275 St-Thomas (Joliette) 1841 1849
## 18 Cantons-de-l'Est (Bromont) 1842 1845
## 42 Granby (Congregational Church) 1842 1849
## 68 La Baie (St-Alexis) 1842 1849
## 107 Montréal (Anglican Saint Thomas) 1842 1849
## 212 St-Félix-de-Kingsey (St-Félix-de-Valois) 1842 1849
## 302 Ste-Luce (Rimouski) 1842 1849
## 312 Ste-Ursule (Maskinongé) 1842 1849
## 44 Granby & Milton (Methodist Church) 1843 1843
## 56 Joliette (Cathédrale St-Charles-Borromée-de-l'Industrie) 1843 1849
## 94 Maniwaki (L'Assomption-de-la-Vierge-Marie) 1843 1843
## 191 St-Bernard-de-Lacolle (St-Bernard) 1843 1849
## 192 St-Bruno-de-Montarville (St-Bruno) 1843 1849
## 213 St-Félix-de-Valois (St-Félix) 1843 1849
## 228 St-Irénée (Charlevoix) 1843 1849
## 290 Ste-Brigide (Iberville) 1843 1849
## 313 Ste-Victoire-de-Sorel (Ste-Victoire) 1843 1849
## 321 Valcartier (St-Gabriel) 1843 1849
## 41 Granby (Anglican Church) 1844 1849
## 43 Granby (Notre-Dame) 1844 1849
## 52 Île-Bizard (St-Raphaël-Archange) 1844 1849
## 110 Montréal (Congregational Second) 1844 1846
## 116 Montréal (Methodist Mountain Street) 1844 1849
## 174 St-Alphonse-Rodriguez (St-Alphonse) 1844 1849
## 190 St-Bernard (Dorchester) 1844 1849
## 253 St-Maurice (Champlain) 1844 1849
## 264 St-Raymond (St-Raymond-Nonnat) 1844 1849
## 305 Ste-Marthe (Vaudreuil) 1844 1849
## 308 Ste-Monique (Nicolet) 1844 1849
## 1 Ascot (Universalist Church) 1845 1849
## 29 Chicoutimi (St-François-Xavier) 1845 1849
## 34 Douglastown (St-Patrick) 1845 1849
## 133 Norbertville (St-Norbert-d'Arthabaska) 1845 1849
## 137 Old Chelsea (St. Stephen) 1845 1849
## 281 St-Zéphirin (Yamaska) 1845 1849
## 53 ÃŽle-du-Grand-Calumet (Ste-Anne) 1846 1849
## 64 L'Isle-aux-Allumettes (St-Alphonse Chapeau) 1846 1849
## 88 Les Escoumins (St-Marcellin) 1846 1849
## 115 Montréal (Methodist East-End Lagauchetiere) 1846 1849
## 134 Notre-Dame-de-Stanbridge 1846 1849
## 138 Ormstown (St-Malachie) 1846 1849
## 207 St-Elzéar-de-Linière (St-Elzéar) 1846 1849
## 233 St-Janvier-de-Blainville (St-Janvier) 1846 1849
## 249 St-Louis-de-Gonzague (Beauharnois) 1846 1849
## 266 St-Rémi (Episcopal Church) 1846 1849
## 293 Ste-Cécile-de-Milton (Ste-Cécile) 1846 1849
## 315 Stukely-Nord (Notre-Dame-du-Bonsecours) 1846 1849
## 109 Montréal (Congregational Évangélique Française) 1847 1849
## 120 Montréal (Presbyterian Côte Saint George) 1847 1848
## 147 Pointe-Gatineau (St-François-de-Sales) 1847 1849
## 160 Quyon (St. Mary's) 1847 1849
## 163 Richmond (Ste-Bibiane) 1847 1847
## 188 St-Basile-Sud (Portneuf) 1847 1849
## 193 St-Casimir (Portneuf) 1847 1849
## 214 St-Ferdinand (St-Ferdinand-d'Halifax) 1847 1849
## 255 St-Norbert (Berthier) 1847 1849
## 75 Lambton (St-Vital) 1848 1849
## 135 Odanak (St-François-de-Sales) 1848 1849
## 209 St-Eusèbe-de-Stanfold (St-Eusèbe) 1848 1849
## 211 St-Fabien (Rimouski) 1848 1849
## 248 St-Louis-de-Blandford (St-Louis) 1848 1849
## 280 St-Victor-de-Tring (St-Victor) 1848 1849
## 282 Stanstead (Sacré-Coeur-de-Jésus) 1848 1849
## 300 Ste-Gertrude (Nicolet) 1848 1849
## 182 St-Arsène (St-Arsène-de-Cacouna) 1849 1849
## 245 St-Lazare (Bellechasse) 1849 1849
## 301 Ste-Julienne (Montcalm) 1849 1849
toRemove_post <- datesParishes$Parish[which(datesParishes$min > 1760)]
length(toRemove_post)
## [1] 211
# Most of these are of no interest to us, since they started collecting data long after the conquest
# Saving these parishes
toRemove <- c(toRemove_pre, toRemove_post)
# Keeping only parishes for which we have both pre-conquest and post-conquest data
prdh <- prdh_raw[!prdh_raw$Parish %in% toRemove,]
# This is the number of death records within our dataset
nrow(prdh)
## [1] 484266
# This is the number of death records by parish attack status within our dataset,
# within parishes that were attacked (1) versus not attacked (0) for the entire period
table(prdh$Attacked)
##
## 0 1
## 316647 167619
# Converting the Attacked variable to a factor
prdh$Attacked <- as.factor(prdh$Attacked)
# Loading the ggplot2 library for ggplot()
library(ggplot2)
# This is the distribution of death records for our entire dataset, by year of death and attacked status of the parish
ggplot(prdh, aes(death_date_y, fill = Attacked)) +
geom_histogram(binwidth=1) +
theme_minimal() +
ylab("Count \n") +
xlab("\n Recorded death date") +
scale_fill_manual(values = c("steelblue", "red"),
labels = c("0 = No","1 = Yes"),
name="Parish attacked \nduring Conquest")
# Let's zoom in on the time surrounding the Conquest (1740 to 1780)
ggplot(prdh, aes(death_date_y, fill = Attacked)) +
geom_histogram(binwidth=1) +
theme_minimal() +
ylab("Count \n") +
xlab("\n Recorded death date") +
scale_fill_manual(values = c("steelblue", "red"),
labels = c("0 = No","1 = Yes"),
name="Parish attacked \nduring Conquest") +
xlim(1740,1780)
## Warning: Removed 384347 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 4 rows containing missing values (`geom_bar()`).
Nothing too noticeable going on here, except an overall wider distribution of death peaks around the period of the Seven Year’s War, in contrast with the rest of the distribution.
Here are all the attacked and non-attacked parishes within our data, based on the classification used in the following map: The Battle for Quebec by W.I. Eccles and Susan L. Laskin.
attackedParishes <- unique(prdh[prdh$Attacked==1,c("Parish","DateOfAttack","lat","long")])
library(DT)
datatable(attackedParishes[,c("Parish","DateOfAttack","lat","long")],
rownames=F,
options = list(paging=TRUE,
pageLength = 10,
info=F,
searching= TRUE,
autoWidth = FALSE))
nonAttackedParishes <- unique(prdh[prdh$Attacked==0,c("Parish","lat","long")])
library(DT)
nonAttackedParishes <- unique(prdh[prdh$Attacked==0,c("Parish","lat","long")])
datatable(nonAttackedParishes[,c("Parish","lat","long")],
rownames=F,
options = list(paging=TRUE,
pageLength = 10,
info=F,
searching= TRUE,
autoWidth = FALSE))
We now take a closer look at the geographical location of the attacked parishes, to see whether this classification makes sense.
# Loading packages for map visualization
library(sf)
library(rnaturalearth)
## Warning: package 'rnaturalearth' was built under R version 4.2.3
library(rnaturalearthhires)
library(plotly)
# Collecting background data
new_france <- ne_states(country="canada", returnclass = "sf")
# Creating dataset with all parishes
allParishes <- unique(prdh[!is.na(prdh$lat),c("Parish","DateOfAttack","Attacked","lat","long")])
allParishes = allParishes %>%
st_as_sf(coords=c("long","lat"), crs=4326)
allParishes$Attacked <- as.factor(allParishes$Attacked)
map <- new_france %>%
st_transform(crs = "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0","+proj=moll") %>%
ggplot() +
geom_sf() +
theme_minimal() +
geom_sf(data = allParishes, aes(color=Attacked, text=Parish)) +
coord_sf(ylim=c(45.31,48.45),xlim=c(-74.09,-66.92)) +
scale_color_manual(values = c("steelblue", "red"),
labels = c("0 = No","1 = Yes"),
name="Parish attacked \nduring Conquest")
## Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat =
## stat, : Ignoring unknown aesthetics: text
ggplotly(map)
This is an interactive map (you can Zoom in and click on the parishes to see their name and attack status). We need to discuss whether we should code some additional parishes as attacked, but for now, this is what we’ll use.
# We first need to create or overwrite these functions
mean <- function(x)base::mean(x, na.rm=TRUE)
sd <- function(x)stats::sd(x, na.rm=TRUE)
ste <- function(x) sd(x)/sqrt(length(x))
These graphs show the number of individuals who died, first by year, then by month, by parish attack status.
# By year
nbDeaths_trends <- summaryBy(age_clean ~ death_date_y + Attacked, data=prdh, FUN=length)
nbDeaths_trends <- na.omit(nbDeaths_trends)
nbDeaths_trends$Attacked <- as.factor(nbDeaths_trends$Attacked)
nbDeaths_trends$death_date_y <- as.numeric(nbDeaths_trends$death_date_y)
ggplot(nbDeaths_trends, aes(x=death_date_y, y=age_clean.length, group=Attacked, color=Attacked)) +
geom_line() +
scale_color_brewer(palette="Paired")+theme_minimal() +
xlim(1730,1820) +
xlab("\n Year of Death") +
ylab("Count \n") +
scale_color_manual(name='Parish',
values=c("grey","black"),
labels=c("Not attacked","Attacked")) +
geom_vline(xintercept=1759, linetype="dashed", color = "red") + # red line identifies year 1759
theme_minimal()
## Warning: Removed 60 rows containing missing values (`geom_line()`).
# By month
nbDeaths_trends <- summaryBy(age_clean ~ death_date_m + Attacked, data=prdh, FUN=c(length))
nbDeaths_trends <- na.omit(nbDeaths_trends)
nbDeaths_trends$Attacked <- as.factor(nbDeaths_trends$Attacked)
# Load lubridate for ymd()
library(lubridate)
nbDeaths_trends$death_date_m <- as.Date(ymd(nbDeaths_trends$death_date_m))
ggplot(nbDeaths_trends, aes(x=death_date_m, y=age_clean.length, group=Attacked, color=Attacked)) +
geom_line() +
scale_color_brewer(palette="Paired")+theme_minimal() +
ylim(0,400) +
xlim(as.Date("1750-01-01"),as.Date("1770-01-01")) +
xlab("\n Month of Death") +
ylab("Count \n") +
scale_color_manual(name='Parish',
values=c("grey","black"),
labels=c("Not attacked","Attacked")) +
geom_vline(xintercept=as.Date("1759-06-01"), linetype="dashed", color = "red") + # red line identifies June 1759
theme_minimal()
## Warning: Removed 2400 rows containing missing values (`geom_line()`).
There seems to be a bigger jump in death counts in attacked parishes during the Conquest, but in the months preceding our recorded attack dates. This raises the question of the exact start and end of attacks, and/or blocades.
These graphs show the number of individuals who died before their first birthday, first by year, then by month, by parish attack status.
# By year
infantDeath_trends <- summaryBy(infant_death ~ death_date_y + Attacked, data=prdh, FUN=c(mean,ste))
infantDeath_trends <- na.omit(infantDeath_trends)
infantDeath_trends$Attacked <- as.factor(infantDeath_trends$Attacked)
infantDeath_trends$death_date_y <- as.numeric(infantDeath_trends$death_date_y)
ggplot(infantDeath_trends, aes(x=death_date_y, y=infant_death.mean, group=Attacked, color=Attacked)) +
geom_line() +
scale_color_brewer(palette="Paired")+theme_minimal() +
xlim(1730,1820) +
xlab("\n Year of Death") +
ylab("Average Proportion\n of Infant Deaths \n") +
scale_color_manual(name='Parish',
values=c("grey","black"),
labels=c("Not attacked","Attacked")) +
geom_vline(xintercept=1760, linetype="dashed", color = "red") +
theme_minimal()
## Warning: Removed 58 rows containing missing values (`geom_line()`).
# By month
nbDeaths_trends$death_date_m <- as.Date(ymd(nbDeaths_trends$death_date_m))
infantDeath_trends <- summaryBy(infant_death ~ death_date_m + Attacked, data=prdh, FUN=c(mean,ste))
infantDeath_trends <- na.omit(infantDeath_trends)
infantDeath_trends$Attacked <- as.factor(infantDeath_trends$Attacked)
infantDeath_trends$death_date_m <- as.Date(ymd(infantDeath_trends$death_date_m))
ggplot(infantDeath_trends, aes(x=death_date_m, y=infant_death.mean, group=Attacked, color=Attacked)) +
geom_line() +
scale_color_brewer(palette="Paired")+theme_minimal() +
xlim(as.Date("1750-01-01"),as.Date("1770-01-01")) +
xlab("\n Month of Death") +
ylab("Average Proportion\n of Infant Deaths \n") +
scale_color_manual(name='Parish',
values=c("grey","black"),
labels=c("Not attacked","Attacked")) +
geom_vline(xintercept=as.Date("1759-06-01"), linetype="dashed", color = "red") +
theme_minimal()
## Warning: Removed 2398 rows containing missing values (`geom_line()`).
These graphs show the average age at the time of death, first by year, then by month, by parish attack status.
# By year
ageAtDeath_trends <- summaryBy(age_clean ~ death_date_y + Attacked, data=prdh, FUN=c(mean,ste))
ageAtDeath_trends <- na.omit(ageAtDeath_trends)
ageAtDeath_trends$Attacked <- as.factor(ageAtDeath_trends$Attacked)
ageAtDeath_trends$death_date_y <- as.numeric(ageAtDeath_trends$death_date_y)
ggplot(ageAtDeath_trends, aes(x=death_date_y, y=age_clean.mean, group=Attacked, color=Attacked)) +
geom_line() +
scale_color_brewer(palette="Paired")+theme_minimal() +
xlim(1730,1820) +
xlab("\n Year of Death") +
ylab("Average Age at Death \n") +
scale_color_manual(name='Parish',
values=c("grey","black"),
labels=c("Not attacked","Attacked")) +
geom_vline(xintercept=1760, linetype="dashed", color = "red") +
theme_minimal()
## Warning: Removed 58 rows containing missing values (`geom_line()`).
# By month
ageAtDeath_trends <- summaryBy(age_clean ~ death_date_m + Attacked, data=prdh, FUN=c(mean,ste))
ageAtDeath_trends <- na.omit(ageAtDeath_trends)
ageAtDeath_trends$Attacked <- as.factor(ageAtDeath_trends$Attacked)
ageAtDeath_trends$death_date_m <- as.Date(ymd(ageAtDeath_trends$death_date_m))
ggplot(ageAtDeath_trends, aes(x=death_date_m, y=age_clean.mean, group=Attacked, color=Attacked)) +
geom_line() +
scale_color_brewer(palette="Paired")+theme_minimal() +
xlim(as.Date("1750-01-01"),as.Date("1770-01-01")) +
xlab("\n Year of Death") +
ylab("Average Age at Death \n") +
scale_color_manual(name='Parish',
values=c("grey","black"),
labels=c("Not attacked","Attacked")) +
geom_vline(xintercept=as.Date("1759-06-01"), linetype="dashed", color = "red") +
theme_minimal()
## Warning: Removed 2398 rows containing missing values (`geom_line()`).
We run an interrupted time series model to see whether we observe an impact of the date of attack at the cutoff, for each DV. This is a very simple model specification using the gls() command.
Here is the number of months separating the death of all individuals and the date of the attack, for the subset of respondents living in attacked parishes. We’ll use a monthly measure of time for the RDD.
rdd_data <- prdh[prdh$Attacked==1,]
rdd_data <- rdd_data %>% filter(if_any(everything(), ~ !is.na(.)))
rdd_data$time_bef_attack <- as.Date(ymd(rdd_data$death_date))-as.Date(ymd(rdd_data$DateOfAttack))
rdd_data$months_bef_attack <- interval(rdd_data$DateOfAttack,rdd_data$death_date) %/% months(1)
plot(density(rdd_data$months_bef_attack), main="Months separating Death and Attack")
First, let’s check death rates (attack significant at p<0.001).
# Death rates
rdd_deathRate_trends <- summaryBy(death_date_y ~ months_bef_attack, data=rdd_data, FUN=c(length))
rdd_deathRate_trends <- na.omit(rdd_deathRate_trends)
rdd_deathRate_trends$intervention <- NA
rdd_deathRate_trends$intervention <- ifelse(rdd_deathRate_trends$months_bef_attack>-1,
1,
0)
rdd_deathRate_trends$post_int_time <- NA
rdd_deathRate_trends$post_int_time <- ifelse(rdd_deathRate_trends$months_bef_attack<0,
0,
rdd_deathRate_trends$months_bef_attack)
rdd_deathRate_trends$time <- seq(1,nrow(rdd_deathRate_trends))
library(nlme)
model = gls(death_date_y.length ~ time + intervention + post_int_time, data = rdd_deathRate_trends,method="ML")
summary(model)
## Generalized least squares fit by maximum likelihood
## Model: death_date_y.length ~ time + intervention + post_int_time
## Data: rdd_deathRate_trends
## AIC BIC logLik
## 16511.49 16537.91 -8250.747
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 22.86235 7.346319 3.112083 0.0019
## time 0.21479 0.034506 6.224776 0.0000
## intervention -79.80903 8.493512 -9.396470 0.0000
## post_int_time -0.01086 0.035169 -0.308873 0.7575
##
## Correlation:
## (Intr) time intrvn
## time -0.867
## intervention 0.434 -0.750
## post_int_time 0.850 -0.981 0.651
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -3.2864034 -0.4558861 -0.1241422 0.2235619 14.4923066
##
## Residual standard error: 70.22313
## Degrees of freedom: 1455 total; 1451 residual
Second, let’s check infant mortality (attack significant at p=0.0021).
# Infant mortality
rdd_infant_trends <- summaryBy(infant_death ~ months_bef_attack, data=rdd_data, FUN=c(mean,ste))
rdd_infant_trends <- na.omit(rdd_infant_trends)
rdd_infant_trends$intervention <- NA
rdd_infant_trends$intervention <- ifelse(rdd_infant_trends$months_bef_attack>-1,
1,
0)
rdd_infant_trends$post_int_time <- NA
rdd_infant_trends$post_int_time <- ifelse(rdd_infant_trends$months_bef_attack<0,
0,
rdd_infant_trends$months_bef_attack)
rdd_infant_trends$time <- seq(1,nrow(rdd_infant_trends))
library(nlme)
model = gls(infant_death.mean ~ time + intervention + post_int_time, data = rdd_infant_trends,method="ML")
summary(model)
## Generalized least squares fit by maximum likelihood
## Model: infant_death.mean ~ time + intervention + post_int_time
## Data: rdd_infant_trends
## AIC BIC logLik
## -2289.526 -2263.116 1149.763
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 0.3735701 0.011495399 32.49736 0.0000
## time -0.0000525 0.000054142 -0.96905 0.3327
## intervention 0.0410006 0.013285940 3.08601 0.0021
## post_int_time -0.0000558 0.000055174 -1.01206 0.3117
##
## Correlation:
## (Intr) time intrvn
## time -0.867
## intervention 0.434 -0.750
## post_int_time 0.850 -0.981 0.652
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.4816752 -0.6751492 -0.0998929 0.6085773 3.6883075
##
## Residual standard error: 0.1097339
## Degrees of freedom: 1454 total; 1450 residual
Third, let’s check average age at death (attack not significant).
# Age at death
rdd_age_trends <- summaryBy(age_clean ~ months_bef_attack, data=rdd_data, FUN=c(mean,ste))
rdd_age_trends <- na.omit(rdd_age_trends)
rdd_age_trends$intervention <- NA
rdd_age_trends$intervention <- ifelse(rdd_age_trends$months_bef_attack>-1,
1,
0)
rdd_age_trends$post_int_time <- NA
rdd_age_trends$post_int_time <- ifelse(rdd_age_trends$months_bef_attack<0,
0,
rdd_age_trends$months_bef_attack)
rdd_age_trends$time <- seq(1,nrow(rdd_age_trends))
# Loading nlme for gls()
library(nlme)
model = gls(age_clean.mean ~ time + intervention + post_int_time, data = rdd_age_trends,method="ML")
summary(model)
## Generalized least squares fit by maximum likelihood
## Model: age_clean.mean ~ time + intervention + post_int_time
## Data: rdd_age_trends
## AIC BIC logLik
## 9150.193 9176.603 -4570.096
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 16.646284 0.5874700 28.335544 0.0000
## time 0.005544 0.0027669 2.003760 0.0453
## intervention 0.427417 0.6789753 0.629503 0.5291
## post_int_time -0.007166 0.0028196 -2.541621 0.0111
##
## Correlation:
## (Intr) time intrvn
## time -0.867
## intervention 0.434 -0.750
## post_int_time 0.850 -0.981 0.652
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.64303162 -0.74715901 -0.01304273 0.64596453 3.46726741
##
## Residual standard error: 5.607928
## Degrees of freedom: 1454 total; 1450 residual
It seems like (somewhat unsurprisingly) the Conquest had an effect on life expectancy indicators, and particularly so in the short term for areas impacted by physical destruction.
The next step is to identify whether these trends can be explained by (1) available livestock and agricultural land, (2) distance from Quebec City (as a proxy for market integration… if I recall correctly), (3) income and (4) life expectancy indicators prior to the conquest.
Let’s see whether census data shows aggregate differences in the number of families (as a sanity check), various types of livestock and agricultural land between 1736 and 1765.
# Loading the historical census data
census1736 <- read.csv("Recensements de Nouvelle-France - 1736.csv")
census1737 <- read.csv("Recensements de Nouvelle-France - 1737.csv")
census1739 <- read.csv("Recensements de Nouvelle-France - 1739.csv")
census1765_1 <- read.csv("Recensements de Nouvelle-France - 1765 - 1.csv")
census1765_2 <- read.csv("Recensements de Nouvelle-France - 1765 - 2.csv")
census1765 <- cbind(census1765_1,census1765_2)
rm(census1765_1,census1765_2)
# One issue here is the classification of the areas pre- and post-Conquest
# as illustrated by the following (raw) nb of unique observations for "Ville/Government"
length(unique(census1736$Ville.Gouvernement))
## [1] 4
length(unique(census1737$Ville.Gouvernement))
## [1] 4
length(unique(census1739$Ville.Gouvernement))
## [1] 5
length(unique(census1765$Ville.Gouvernement))
## [1] 111
# For now, let's just look at the aggregate data to see if there is something
# noteworthy here
## Families
value_1 <- census1736$Familles[census1736$Noms.des.Seigneuries=="TOTAUX"]
value_2 <- max(census1737$Familles[census1737$Noms.des.Seigneuries=="TOTAL"])
value_3 <- max(census1739$Familles[census1739$Noms.des.Seigneuries=="TOTAL"])
value_4 <- census1765$Families.Ménages[census1765$Ville.Gouvernement=="TOTAL"]
graphData <- data.frame(count=c(value_1,value_2,value_3,0,value_4),
year=c("1736","1737","1739","1740-1764","1765"))
graphData$count <- as.numeric(graphData$count)
ggplot(data=graphData, aes(x=year, y=count)) +
geom_bar(stat="identity", fill="steelblue", color="white")+
theme_minimal() +
ylab("Count \n") +
xlab("\n Year")
## Swine
value_1 <- census1736$Cochons[census1736$Noms.des.Seigneuries=="TOTAUX"]
value_2 <- max(census1737$Cochons[census1737$Noms.des.Seigneuries=="TOTAL"])
value_3 <- max(census1739$Cochons[census1739$Noms.des.Seigneuries=="TOTAL"])
value_4 <- census1765$Swine...Cattle[census1765$Ville.Gouvernement=="TOTAL"]
graphData <- data.frame(count=c(value_1,value_2,value_3,0,value_4),
year=c("1736","1737","1739","1740-1764","1765"))
ggplot(data=graphData, aes(x=year, y=count)) +
geom_bar(stat="identity", fill="steelblue", color="white")+
theme_minimal() +
ylab("Count \n") +
xlab("\n Year")
## Sheep
value_1 <- census1736$Moutons[census1736$Noms.des.Seigneuries=="TOTAUX"]
value_2 <- max(census1737$Moutons[census1737$Noms.des.Seigneuries=="TOTAL"])
value_3 <- max(census1739$Moutons[census1739$Noms.des.Seigneuries=="TOTAL"])
value_4 <- census1765$Sheep...Cattle[census1765$Ville.Gouvernement=="TOTAL"]
graphData <- data.frame(count=c(value_1,value_2,value_3,0,value_4),
year=c("1736","1737","1739","1740-1764","1765"))
ggplot(data=graphData, aes(x=year, y=count)) +
geom_bar(stat="identity", fill="steelblue", color="white")+
theme_minimal() +
ylab("Count \n") +
xlab("\n Year")
## Bêtes à cornes (= oxen?)
value_1 <- census1736$Bêtes.à .cornes[census1736$Noms.des.Seigneuries=="TOTAUX"]
value_2 <- max(census1737$Bêtes.à .cornes[census1737$Noms.des.Seigneuries=="TOTAL"])
value_3 <- max(census1739$Bêtes.à .cornes[census1739$Noms.des.Seigneuries=="TOTAL"])
value_4 <- census1765$Oxen...Cattle[census1765$Ville.Gouvernement=="TOTAL"]
graphData <- data.frame(count=c(value_1,value_2,value_3,0,value_4),
year=c("1736","1737","1739","1740-1764","1765"))
ggplot(data=graphData, aes(x=year, y=count)) +
geom_bar(stat="identity", fill="steelblue", color="white")+
theme_minimal() +
ylab("Count \n") +
xlab("\n Year")
## Horses
value_1 <- census1736$Chevaux[census1736$Noms.des.Seigneuries=="TOTAUX"]
value_2 <- max(census1737$Chevaux[census1737$Noms.des.Seigneuries=="TOTAL"])
value_3 <- max(census1739$Chevaux[census1739$Noms.des.Seigneuries=="TOTAL"])
value_4 <- census1765$Horses...Cattle[census1765$Ville.Gouvernement=="TOTAL"] +
census1765$Houses...Agriculture[census1765$Ville.Gouvernement=="TOTAL"]
graphData <- data.frame(count=c(value_1,value_2,value_3,0,value_4),
year=c("1736","1737","1739","1740-1764","1765"))
ggplot(data=graphData, aes(x=year, y=count)) +
geom_bar(stat="identity", fill="steelblue", color="white")+
theme_minimal() +
ylab("Count \n") +
xlab("\n Year")
## "Arpents" and "terres en valeur" (probably not the same unit)
value_1 <- census1736$Terres.en.valeur[census1736$Noms.des.Seigneuries=="TOTAUX"]
value_2 <- max(census1737$Terres.en.valeur[census1737$Noms.des.Seigneuries=="TOTAL"])
value_3 <- max(census1739$Terres.en.valeur[census1739$Noms.des.Seigneuries=="TOTAL"])
value_4 <- census1765$Arpents.Posses.d...Agriculture[census1765$Ville.Gouvernement=="TOTAL"]
graphData <- data.frame(count=c(value_1,value_2,value_3,0,value_4),
year=c("1736","1737","1739","1740-1764","1765"))
ggplot(data=graphData, aes(x=year, y=count)) +
geom_bar(stat="identity", fill="steelblue", color="white")+
theme_minimal() +
ylab("Count \n") +
xlab("\n Year")
The next step here is to match the census subdivisions between all years, and then, to match these areas with the ones provided in the PRDH data.
Let’s now see whether the distance between one’s individual death location and Quebec City is a predictor of life expectancy indicators. Here is one example with the proportion of infant deaths.
Distance is coded as follows: 1=less than 15 km, 2=between 15 and 99 km, 3=between 100 and 199 km, 4=between 200 and 249 km, 5=250km or more.
# Load geosphere for distm()
library(geosphere)
# Calculting the distance from Quebec City (Hotel-Dieu used as center)
prdh$distance <- distm(prdh[,c("long","lat")],c("-71.21165","46.8154"))
prdh$distance_km <- prdh$distance/1000
# Plotting the distribution for all individuals
plot(density(prdh$distance_km), main="Distance between individual's \ndeath location and Quebec City (in km)")
# Creating a variable with distance categories
prdh$distance_cat <- NA
prdh$distance_cat[prdh$distance_km < 15] <- 1
prdh$distance_cat[prdh$distance_km > 14 & prdh$distance_km < 100] <- 2
prdh$distance_cat[prdh$distance_km > 99 & prdh$distance_km < 200] <- 3
prdh$distance_cat[prdh$distance_km > 199 & prdh$distance_km < 250] <- 4
prdh$distance_cat[prdh$distance_km > 249] <- 5
# Deaths by distance from Quebec City (entire dataset)
table(prdh$distance_cat)
##
## 1 2 3 4 5
## 71574 101293 103709 183483 24207
# One example with infant deaths - to discuss furhter
infantDeath_trends <- summaryBy(infant_death ~ death_date_y + distance_cat, data=prdh, FUN=c(mean,ste))
infantDeath_trends <- na.omit(infantDeath_trends)
infantDeath_trends$distance_cat <- as.factor(infantDeath_trends$distance_cat)
infantDeath_trends$death_date_y <- as.numeric(infantDeath_trends$death_date_y)
ggplot(infantDeath_trends, aes(x=death_date_y, y=infant_death.mean, group=distance_cat, color=distance_cat)) +
geom_line() +
scale_color_brewer(palette="Paired")+theme_minimal() +
xlim(1730,1820) +
xlab("\n Year of Death") +
ylab("Average Proportion\n of Infant Deaths \n") +
labs(color = 'Parish distance \nfrom Qc City') +
geom_vline(xintercept=1760, linetype="dashed", color = "red") +
theme_minimal()
## Warning: Removed 145 rows containing missing values (`geom_line()`).
The spike in infant deaths around the time of the Conquest is visible outside of normal seasonality trends + noise. We’ll need to refine this, but I’m including it as a placeholder for something more elaborate for now.
library(stats)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Attaching package: 'forecast'
## The following object is masked from 'package:nlme':
##
## getResponse
rdd_infant_trends <- summaryBy(infant_death ~ months_bef_attack, data=rdd_data, FUN=c(mean,ste))
rdd_infant_trends <- na.omit(rdd_infant_trends)
tsData <- ts(rdd_infant_trends$infant_death.mean, frequency=12)
summary(tsData)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.09431 0.26154 0.32520 0.34339 0.41339 0.77778
decomposedRes <- decompose(tsData, type="mult")
plot(decomposedRes)
stlRes <- stl(tsData, s.window = "periodic")