calculations for mean, sd and Z Scores
dataset <- dataset %>%
group_by(Year) %>%
mutate(Mean_Suicides_in_Year=mean(Suicides),
SD_Suicides_in_Year = sd(Suicides),
Z_Score = (Suicides-Mean_Suicides_in_Year)/SD_Suicides_in_Year) %>%
mutate(Middle_Age=if_else(Age %in% c(45:55),"45_55","Others")) %>%
mutate(High=if_else(Suicides>100,"High","Low")) %>%
mutate(Z_Score_High=if_else(Z_Score>1,"High","Low")) %>%
mutate(HighestValue= max(Suicides)) %>%
mutate(P=ifelse(Suicides==HighestValue,Age,0))
dataset <- dataset %>% group_by(Year) %>% mutate(Age_Point=max(P))
head(dataset)
## # A tibble: 6 x 12
## # Groups: Year [1]
## Year Age Suicides Mean_Suicides_i~ SD_Suicides_in_~ Z_Score Middle_Age
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 1981 10 0 71.4 34.0 -2.10 Others
## 2 1981 11 0 71.4 34.0 -2.10 Others
## 3 1981 12 2 71.4 34.0 -2.04 Others
## 4 1981 13 2 71.4 34.0 -2.04 Others
## 5 1981 14 1 71.4 34.0 -2.07 Others
## 6 1981 15 20 71.4 34.0 -1.51 Others
## # ... with 5 more variables: High <chr>, Z_Score_High <chr>,
## # HighestValue <dbl>, P <dbl>, Age_Point <dbl>
Plotting
g <- ggplot(dataset,
aes(x=Age,y=Suicides)) + geom_col(color="lightgreen",fill="lightgreen")+
geom_line(aes(x=Age,y=Suicides,group=1),color="darkgreen",size=1) +
geom_vline(aes(xintercept=Age_Point), color = "orange",size=1) +
geom_hline(aes(yintercept=Mean_Suicides_in_Year), linetype="dashed", color = "white",size=1) +
facet_wrap(~Year)+
theme_dark()
ggplotly(g)