library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(pastecs)
## Warning: package 'pastecs' was built under R version 4.4.3
library(ggplot2)
library(readxl)
veteran <- read_excel("VA_National_2001-2022_Appendix_508 (1).xlsx",  sheet = "Veteran", range = "A2:N24")
print(head(veteran))
## # A tibble: 6 × 14
##   `Year of Death` `Suicide Deaths` `Population Estimate` Crude Rate per 100,00…¹
##             <dbl>            <dbl>                 <dbl>                   <dbl>
## 1            2001             6005              25798000                    23.3
## 2            2002             6146              25423000                    24.2
## 3            2003             6012              25039000                    24.0
## 4            2004             6006              24800000                    24.2
## 5            2005             6131              24616000                    24.9
## 6            2006             6039              24163000                    25.0
## # ℹ abbreviated name: ¹​`Crude Rate per 100,000`
## # ℹ 10 more variables: `Age-Adjusted Rate per 100,000` <dbl>,
## #   `Age- and Sex-Adjusted Rate per 100,000` <dbl>,
## #   `Male Suicide Deaths` <dbl>, `Male Population Estimate` <dbl>,
## #   `Male Crude Rate\r\n per 100,000` <dbl>,
## #   `Male Age-Adjusted Rate per 100,000` <dbl>, `Female Suicide Deaths` <dbl>,
## #   `Female Population Estimate` <dbl>, …
print(colnames(veteran))
##  [1] "Year of Death"                         
##  [2] "Suicide Deaths"                        
##  [3] "Population Estimate"                   
##  [4] "Crude Rate per 100,000"                
##  [5] "Age-Adjusted Rate per 100,000"         
##  [6] "Age- and Sex-Adjusted Rate per 100,000"
##  [7] "Male Suicide Deaths"                   
##  [8] "Male Population Estimate"              
##  [9] "Male Crude Rate\r\n per 100,000"       
## [10] "Male Age-Adjusted Rate per 100,000"    
## [11] "Female Suicide Deaths"                 
## [12] "Female Population Estimate"            
## [13] "Female Crude Rate per 100,000"         
## [14] "Female Age-Adjusted Rate per 100,000"

I am choosing to explore whether female crude suicide rates for 100,000 persons has declined by year.

stat.desc(veteran[ ,c(13)])
##              Female Crude Rate per 100,000
## nbr.val                         22.0000000
## nbr.null                         0.0000000
## nbr.na                           0.0000000
## min                              8.0191502
## max                             17.5939850
## range                            9.5748348
## sum                            280.6852714
## median                          13.2407173
## mean                            12.7584214
## SE.mean                          0.5621309
## CI.mean.0.95                     1.1690151
## var                              6.9518045
## std.dev                          2.6366275
## coef.var                         0.2066578

Na’s are 0

ggplot(veteran,aes (x= `Female Crude Rate per 100,000`))+
  geom_histogram()+
  ggtitle("female veteran crude suicide rate per 100,000 2001-2022")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

veteran<-veteran %>% mutate(sqrtfemalerate=sqrt(`Female Crude Rate per 100,000`))
ggplot(veteran,aes (x= sqrtfemalerate))+
  geom_histogram()+
  ggtitle("female veteran crude suicide rate per 100,000 2001-2022")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(veteran,aes (x= `Year of Death`, y=`Female Crude Rate per 100,000`))+
  geom_point()+
  geom_smooth(method="lm")+
  ggtitle("female veteran crude suicide rate per 100,000 2001-2022")
## `geom_smooth()` using formula = 'y ~ x'

Strong increase in female veteran suicide rate over time.