setwd("/Users/yourname/Dropbox/foldername")
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✓ ggplot2 3.2.1 ✓ purrr 0.3.4
## ✓ tibble 2.1.3 ✓ dplyr 0.8.3
## ✓ tidyr 1.0.0 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.4.0
## ── Conflicts ─────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
read.csv('CavanBarracksSurvey-IrishArmyCensus1922.csv', stringsAsFactors = F) -> cavan
head(cavan)
## Date.Created Date.Modified Forename Surname Age
## 1 2015-06-18T16:43:39 2015-11-08T00:43:37 HUGH McGOVERN 22
## 2 2015-07-15T11:25:11 2015-11-08T00:53:09 MICHAEL ANDERSON 28
## 3 2015-02-22T08:17:57 2015-11-07T23:46:04 THOMAS MURPHY 19
## 4 2015-02-22T08:17:27 2015-11-07T23:02:25 MARTIN NOONE 22
## 5 2015-07-15T11:25:25 2015-11-08T01:07:11 LAURENCE GERAGHTY 17
## 6 2015-02-22T08:17:56 2015-11-07T23:45:20 JOHN CASSIDY 20
## Barracks...Post Date.of.Attestation Rank Corps
## 1 Dowra 1922-03-10 Private West Cavan Brigade
## 2 Virginia Lodge 1922-05-05 Private Infantry
## 3 Cavan 1922-10-16 Private Infantry
## 4 Belturbet 1922-04-15 Staff / Sergeant
## 5 Virginia Lodge 1922-05-06 Private Infantry
## 6 Cavan 1922-08-05 Private Infantry
## Division Command
## 1 Barracks 1st Midland
## 2 Co. Cavan 1st Eastern
## 3 5th Northern Division Eastern Command
## 4 1st Midland Division Mid Western Command
## 5 Co. Cavan 1st Eastern
## 6 5th Northern Division Eastern Command
## Home.Address No..on.Pay.Book
## 1 Culleagh, Co. Cavan 8153
## 2 Suncroft, Curragh Camp, Co. Kildare 17746
## 3 97 McDonald Street, Belfast, Co. Antrim 25239
## 4 Inver, Barnatra, Ballina, Co. Mayo 7164
## 5 Maddenstown, Curragh Camp, Co. Kildare 17752
## 6 Newington Avenue, Belfast, Co. Antrim 18157
## Place.of.Attestation Next.of.Kin
## 1 Swanlinbar
## 2 Naas Mother
## 3 Cavan Mother
## 4 Longford Mother
## 5 Trim Father
## 6 Clones Mother
## Name...Address.of.Next.of.Kin Marital.Status
## 1 MRS. MARY McGOVERN - Culleagh, Co. Cavan Single
## 2 MRS. KATE ANDERSON - Suncroft, Curragh Camp, Co. Kildare Single
## 3 ELLEN MURPHY - 97 McDonald Street, Belfast, Co. Antrim Single
## 4 MRS. ELLEN NOONE - Inver, Barnatra, Ballina, Co. Mayo Single
## 5 PATRICK GERAGHTY - Maddenstown, Curragh Camp, Co. Kildare Single
## 6 MARY CASSIDY - Newington Avenue, Belfast, Co. Antrim Single
## Religion Additional.Notes Regimental.Number
## 1 Roman Catholic 802 (23257)
## 2 Roman Catholic Check address 13592 R.
## 3 Roman Catholic 15679 V.R.
## 4 Roman Catholic 2389 (23325)
## 5 Roman Catholic 13547 R.
## 6 Roman Catholic 15673 V.R.
# This method uses the %>% as a pipeline to count the number of soldiers per division - using cavan$ will bring up all the columns within the dataset as shown in head(cavan)
cavan %>% count(Division)
## # A tibble: 4 x 2
## Division n
## <chr> <int>
## 1 1st Midland Division 108
## 2 5th Northern Division 169
## 3 Barracks 21
## 4 Co. Cavan 65
# If you were interested in specific columns within the dataset, the following command allows for a selection of specific fields in this example: Division, Command and Date.of.Attestation
cavan %>% select(Division, Command, Date.of.Attestation) -> cavan_date_comm_div
head(cavan_date_comm_div)
## Division Command Date.of.Attestation
## 1 Barracks 1st Midland 1922-03-10
## 2 Co. Cavan 1st Eastern 1922-05-05
## 3 5th Northern Division Eastern Command 1922-10-16
## 4 1st Midland Division Mid Western Command 1922-04-15
## 5 Co. Cavan 1st Eastern 1922-05-06
## 6 5th Northern Division Eastern Command 1922-08-05
# This converts the class of the field from a factor i.e. text to a specific date class of object. The class() allows you to check that the change occurred successfully.
class(cavan$Date.of.Attestation)
## [1] "character"
cavan$Date.of.Attestation <- as.Date(cavan$Date.of.Attestation)
class(cavan$Date.of.Attestation)
## [1] "Date"
# This creates a specific date range between July-October 1922 as the dates when soldiers were recruited.
subset(cavan, as.Date(Date.of.Attestation) >= '1922-01-01' & as.Date(Date.of.Attestation) <= '1922-10-01') -> jan_oct
# For example if you wanted to count the number of men who were attested into the military and their age between January - October 1922, the following code would create discrete statistics
jan_oct %>% count(Date.of.Attestation, Age) -> jan_oct_date_age
# You can change the column names using colnames()
colnames(jan_oct_date_age) <- c("Date", "Age", "No_of_Soldiers")
head(jan_oct_date_age)
## # A tibble: 6 x 3
## Date Age No_of_Soldiers
## <date> <int> <int>
## 1 1922-02-01 21 1
## 2 1922-02-02 22 2
## 3 1922-02-05 19 1
## 4 1922-02-07 20 1
## 5 1922-02-07 21 2
## 6 1922-02-07 23 1
# Graphs - One of the most important ways to help readers understand the data that has been collated is by visual representations such as graphs.
# The following code will create a chart of the above dataset showing the distribution of the soldiers' age across the period July - October 1922.
ggplot(jan_oct_date_age, aes(Date)) + geom_line(aes(y=Age)) + xlab("Months") + ylab("Age") + theme_minimal() + theme(axis.text.x = element_text(colour = "darkslategrey", size = 20), axis.text.y = element_text(colour = "darkslategrey", size = 20), text = element_text(family = "Georgia"))