library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(pastecs)
## Warning: package 'pastecs' was built under R version 4.4.3
library(ggplot2)
library(readxl)
veteran <- read_excel("VA_National_2001-2022_Appendix_508 (1).xlsx", sheet = "Veteran", range = "A2:N24")
print(head(veteran))
## # A tibble: 6 × 14
## `Year of Death` `Suicide Deaths` `Population Estimate` Crude Rate per 100,00…¹
## <dbl> <dbl> <dbl> <dbl>
## 1 2001 6005 25798000 23.3
## 2 2002 6146 25423000 24.2
## 3 2003 6012 25039000 24.0
## 4 2004 6006 24800000 24.2
## 5 2005 6131 24616000 24.9
## 6 2006 6039 24163000 25.0
## # ℹ abbreviated name: ¹​`Crude Rate per 100,000`
## # ℹ 10 more variables: `Age-Adjusted Rate per 100,000` <dbl>,
## # `Age- and Sex-Adjusted Rate per 100,000` <dbl>,
## # `Male Suicide Deaths` <dbl>, `Male Population Estimate` <dbl>,
## # `Male Crude Rate\r\n per 100,000` <dbl>,
## # `Male Age-Adjusted Rate per 100,000` <dbl>, `Female Suicide Deaths` <dbl>,
## # `Female Population Estimate` <dbl>, …
print(colnames(veteran))
## [1] "Year of Death"
## [2] "Suicide Deaths"
## [3] "Population Estimate"
## [4] "Crude Rate per 100,000"
## [5] "Age-Adjusted Rate per 100,000"
## [6] "Age- and Sex-Adjusted Rate per 100,000"
## [7] "Male Suicide Deaths"
## [8] "Male Population Estimate"
## [9] "Male Crude Rate\r\n per 100,000"
## [10] "Male Age-Adjusted Rate per 100,000"
## [11] "Female Suicide Deaths"
## [12] "Female Population Estimate"
## [13] "Female Crude Rate per 100,000"
## [14] "Female Age-Adjusted Rate per 100,000"
I am choosing to explore whether female crude suicide rates for 100,000 persons has declined by year.
stat.desc(veteran[ ,c(13)])
## Female Crude Rate per 100,000
## nbr.val 22.0000000
## nbr.null 0.0000000
## nbr.na 0.0000000
## min 8.0191502
## max 17.5939850
## range 9.5748348
## sum 280.6852714
## median 13.2407173
## mean 12.7584214
## SE.mean 0.5621309
## CI.mean.0.95 1.1690151
## var 6.9518045
## std.dev 2.6366275
## coef.var 0.2066578
Na’s are 0
ggplot(veteran,aes (x= `Female Crude Rate per 100,000`))+
geom_histogram()+
ggtitle("female veteran crude suicide rate per 100,000 2001-2022")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
veteran<-veteran %>% mutate(sqrtfemalerate=sqrt(`Female Crude Rate per 100,000`))
ggplot(veteran,aes (x= sqrtfemalerate))+
geom_histogram()+
ggtitle("female veteran crude suicide rate per 100,000 2001-2022")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(veteran,aes (x= `Year of Death`, y=`Female Crude Rate per 100,000`))+
geom_point()+
geom_smooth(method="lm")+
ggtitle("female veteran crude suicide rate per 100,000 2001-2022")
## `geom_smooth()` using formula = 'y ~ x'
Strong increase in female veteran suicide rate over time.