Hood’s Texas Brigade Dataset Collection:

HTB_Vets <- read.csv("Compare_HTB_Vets_R.csv")
htb_all <- read.csv("htb_all_2_ural_nondesert_gone.csv")

R Libraries:

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
library(stringr)
library(USAboundaries)
library(maptools)
## Loading required package: sp
## Checking rgeos availability: TRUE
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(broom)
library(readr)
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
## 
##     col_factor
library(plyr)
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following object is masked from 'package:lubridate':
## 
##     here
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize

Graphs on Social Data:

ggplot(HTB_Vets, aes(x= en_state)) +
  geom_bar(width = 1) +
  labs(title = "States Where HTB Enlisted",
       x= "States Enlisted")

ggplot(HTB_Vets, aes(x = rank, y = en_state)) +
geom_count(color = "blue") +
  labs(title = "Enlistment Rank Divided By State",
       x= "Rank",
       y= "States Enlisted")

Average Total Wealth of Sample in 1860:

wealth1 <- as.character(HTB_Vets$total_1860)
wealth1a <- as.numeric(wealth1)
## Warning: NAs introduced by coercion
sum_wealth1 <- sum(wealth1a, na.rm = TRUE)

mean_wealth1 <- sum_wealth1/nrow(HTB_Vets)

mean_wealth1
## [1] 11451.19

Average Total Wealth of Sample in 1870:

wealth2 <- as.character(HTB_Vets$total_1870)

wealth2a <- as.numeric(wealth2)

sum_wealth2 <- sum(wealth2a, na.rm = TRUE)

mean_wealth2 <- sum_wealth2/nrow(HTB_Vets)

mean_wealth2
## [1] 2966.068

As you can see, the total wealth of those in HTB goes down significantly. We can gather two things from this. Either 1) They take an economic hit just like the rest of the state (as you can see in the maps), or 2) we have a lot more missing data for 1870 soldiers than we do 1860. This drop could just be from missing data if the soldier died or moved.

Start Total Data (Includes ALL Texas Brigade Data):

Information surrounding Rank: Lists of Rank:

unique(htb_all$rank)
## [1] N   E   O   HQO HQN HQE
## Levels: E HQE HQN HQO N O

How Many of Each Rank?

count(htb_all$rank)
##     x freq
## 1   E 1049
## 2 HQE   11
## 3 HQN   11
## 4 HQO   31
## 5   N  142
## 6   O   70

How many soldiers/officers in each regiment:

table(htb_all$regiment)
## 
## Arkansas  Georgia    Texas 
##      142       84     1063

Graph of regiment data:

ggplot(htb_all, aes(x = regiment)) +
  geom_bar(stat = "count")+
  labs(title = "Soldiers/Officers Enlisted by Regiments",
       x= "Regiments")

How many soldiers/officers in each company:

table(htb_all$company)
## 
##   A1G   A1T   A3A   A4T   A5T   AHL  B18G   B1G   B1T   B3A   B4T   B5T 
##     8    24    14    31    40     3     2     9    18    12    39    31 
##   BHL   C1G   C1T   C3A   C4T   C5T   CHL  CHLC   D1G   D1T   D3A   D4T 
##     3    10    32    22    29    32     2     1     6    32     1    29 
##   D5T   DHL  E18G   E1G   E1T   E3A   E4T   E5T   EHL   F1G   F1T   F3A 
##    38     3     1     5    21    18    28    31     3     8    27    11 
##   F4T   F5T   FHL   G1G   G1T   G3A   G4T    G5   G5T   GHL  H18G   H1G 
##    31    37     3     6    34    13    39     1    40     2     1     9 
##   H1T   H3A   H4T   H5T   HHL  HQ1A  HQ1G  HQ1T  HQ3A  HQ4T  HQ5T HQHIL 
##    39    11    39    34     3     6     1    27     3     8     2     1 
##  HQHL  I18G   I1G   I1T   I3A   I4T   I5T   K1G   K1T   K3A   K4T   K5T 
##     1     3     4    41    12    31    34     9    26    15    27    36 
##   L1T   L3A   M1T 
##    28     6    27
htbtx <- htb_all %>%
  filter(regiment == "Texas") %>%
  group_by(company) 

ggplot(htbtx, aes(x = company)) +
  geom_bar(stat = "count")+
  labs(title = "Companies in the Texas Regiment",
       x= "Companies")

Number of People who Mustered by Year:

table(year(as.Date(htb_all$muster_date)))
## 
## 1861 1862 1863 
##  813   26   29
table(year(as.Date(htb_all$comb_date)))
## 
## 1861 1862 1863 1864 
##  891  325   51    3

Number of People who Enlisted by Year:

table(year(as.Date(htb_all$enlist_date)))
## 
## 1861 1862 1863 1864 
##   79  294   23    3

Wound 1 Data, how many people were injured and in what year:

table(year(as.Date(htb_all$wound_date_1)))
## 
## 1862 1863 1864 1865 
##  330   87   83    1

Wound 2 Data, how many people were injured a second time and in what year:

table(year(as.Date(htb_all$wound_date_2)))
## 
## 1862 1863 1864 1865 
##   50   19   59    1

Wound 3 Data, how many people were injured a third time and in what year:

table(year(as.Date(htb_all$wound_date_3)))
## 
## 1863 1864 
##    5   15

Wound 4 Data, how many people were injured a fourth time and in what year:

table(year(as.Date(htb_all$wound_date_4)))
## 
## 1863 1864 
##    1    5

How many people went AWOL and in what year?

table(year(as.Date(htb_all$awol_date)))
## 
## 1862 1863 1864 1865 
##    8   10    1    1

How many people from HTB Deserted in each year:

table(year(as.Date(htb_all$desert_date)))
## 
## 1861 1862 1863 1864 1865 
##    8   16   17   26    2

How many people in HTB died by year:

table(year(as.Date(htb_all$death_date)))
## 
## 1861 1862 1863 1864 1865 
##   37  198   79   40    4

Desertion dates: Line 1: Desertion by Year, Line 2: Desertion by Enlistment/Muster Combination Year

deserters <- htb_all %>%
  filter(!is.na(desert_date))

test <- as.Date(deserters$desert_date)
test2 <- year(test)

table(year(as.Date(deserters$desert_date)))
## 
## 1861 1862 1863 1864 1865 
##    8   16   17   26    2
table(year(as.Date(deserters$comb_date)))
## 
## 1861 1862 1863 1864 
##   41   21    3    1

Which regiment had the most deserters:

table(deserters$regiment)
## 
## Arkansas  Georgia    Texas 
##        9        4       56

Which Company had the most deserters? Texas C1 had the highest with 11.

table(deserters$company)
## 
##   A1G   A1T   A3A   A4T   A5T   AHL  B18G   B1G   B1T   B3A   B4T   B5T 
##     0     0     1     0     8     0     0     0     2     0     2     2 
##   BHL   C1G   C1T   C3A   C4T   C5T   CHL  CHLC   D1G   D1T   D3A   D4T 
##     0     0     2     4     0     1     0     0     0     0     0     1 
##   D5T   DHL  E18G   E1G   E1T   E3A   E4T   E5T   EHL   F1G   F1T   F3A 
##     4     0     0     0     1     0     1     3     0     0     3     1 
##   F4T   F5T   FHL   G1G   G1T   G3A   G4T    G5   G5T   GHL  H18G   H1G 
##     1     0     0     0     2     0     2     0     2     0     0     1 
##   H1T   H3A   H4T   H5T   HHL  HQ1A  HQ1G  HQ1T  HQ3A  HQ4T  HQ5T HQHIL 
##     2     3     2     2     0     0     0     1     0     0     0     0 
##  HQHL  I18G   I1G   I1T   I3A   I4T   I5T   K1G   K1T   K3A   K4T   K5T 
##     0     0     0     3     0     0     0     3     1     0     1     1 
##   L1T   L3A   M1T 
##     5     0     1
deserters_test <- deserters %>% mutate(desertion_year = substring(desert_date,1,4))

crossing_fingers <- data.frame(table(deserters_test$regiment, deserters_test$desertion_year))


ggplot(crossing_fingers, aes(x = Var2, y = Freq,
color = Var1)) +
geom_point() +
  labs(title = "Desertion by HTB Regiments",
       x= "Desertion Years",
       y = "Total") 

ggplot(data = crossing_fingers, aes(x = Var2, y = Freq,
fill = Var1)) +
geom_bar(stat="identity")+
  labs(title = "Desertion by HTB Regiments",
       x= "Desertion Years",
       y = "Total") 

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.