title: “R Notebook” output: html_notebook —
##Tendencies of Suicide Rates Among Different Gender and Age Groups in Population of Diiferent Countries
I will use the Suicide Rates Overview 1985 to 2016 data from Kaggle that compares socio-economic information with suicide rates by year and country to explore tendencies of suicides in different countries, among various age groups and gender.
Importing data for analysis:
library (readr)
master<-read_csv("C:/Users/Marcy/Documents/soc 712/master.csv")
## Parsed with column specification:
## cols(
## country = col_character(),
## year = col_double(),
## sex = col_character(),
## age = col_character(),
## suicides_no = col_double(),
## population = col_double(),
## `suicides/100k pop` = col_double(),
## `country-year` = col_character(),
## `HDI for year` = col_double(),
## `gdp_for_year ($)` = col_number(),
## `gdp_per_capita ($)` = col_double(),
## generation = col_character()
## )
head (master)
library(ggplot2)
library(ggthemes)
library(babynames)
library(Zelig)
## Loading required package: survival
##
## Attaching package: 'Zelig'
## The following object is masked from 'package:ggplot2':
##
## stat
library(ggrepel)
library(HistData)
library(tidyverse)
## -- Attaching packages ------------- tidyverse 1.2.1 --
## v tibble 2.1.1 v dplyr 0.8.0.1
## v tidyr 0.8.3 v stringr 1.4.0
## v purrr 0.3.2 v forcats 0.4.0
## -- Conflicts ---------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x purrr::reduce() masks Zelig::reduce()
## x Zelig::stat() masks ggplot2::stat()
variables of the data as following:
names (master)
## [1] "country" "year" "sex"
## [4] "age" "suicides_no" "population"
## [7] "suicides/100k pop" "country-year" "HDI for year"
## [10] "gdp_for_year ($)" "gdp_per_capita ($)" "generation"
Plotting number of suicides by generation of population
ggplot(master, aes(x = suicides_no, y=generation)) +
geom_path()
Plotting number of suicides by different gender
ggplot(master, aes(x = suicides_no, y=sex)) +
geom_path()
Renaming variable that represent a ratio of suicides by 100,000 of people to “rate”
m <- rename(master, rate = "suicides/100k pop")
Plotting rate and age of population which demonstrates a ratio number of suicides (per 100,000 persons) of certain age groups.
ggplot(m, aes(x = rate, y=age)) +
geom_path()
Highlighted demonstration of suicide ratio among age groups.
Masrate_plot <- ggplot(data = m, aes(x = rate, y = age)) +
geom_line(aes(color = rate), size = 12)
Masrate_plot
By different gender:
Masrate <- m %>%
filter(sex %in% c("35-54 years", "75+years", "25-34 years"))
Masrate_plot <- ggplot(data = m, aes(x = rate, y = sex)) +
geom_line(aes(color = rate), size = 12)
Masrate_plot
library(ggthemes)
library(gganimate)
library(tidyverse)
library(dplyr)
library(viridis)
## Loading required package: viridisLite
Looking at rate of suicides over the years.
graph_data <- ggplot(master, mapping =aes(x= year, y = suicides_no))
graph1 <- graph_data + geom_smooth()
graph1 + labs(title = 'Trend of World Suicide Frequency Over the Years') + theme_tufte()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
library (dplyr)
library (ggthemes)
Looking at number of suicides by age groups
gg2<-ggplot(master, aes(y = suicides_no, x = age) ) + geom_point(color='green') + geom_line(color='pink') +theme_calc() +stat_smooth(method = "lm",color="orange")+labs(title = 'Rate of Suicides Per Age Group')
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ggplotly(gg2)
library (ggplot2)
library (gganimate)
library (gifski)
library (devtools)
library (gapminder)
library (png)
Demonstrating that the highest number of suicides has been during the mid 1990s. In recent years, since 2010 the number of suicides goes down.
m %>%
ggplot(aes(x=factor(year),y=rate, fill = suicides_no)) +
geom_col(alpha = 0.8) +
scale_size(range = c(4, 12)) +
guides(fill=guide_legend(title="Suicide Ratio over the years"))+
labs(title = 'Trend of Suicide Ratio in Population (per 100,000) Over The Years',
subtitle='Date: {frame_time}',
x = 'Year',
y = 'Ratio')+
transition_time(year)+
coord_flip()+
theme_gray()
Looking at trend of suicides by gender difference in 30 years. Males consistently (over the years) have triple higher rates of suicides. However, there is a variation in sucided rates over the years for both genders.
master %>%
ggplot(aes(x=factor(sex),y=suicides_no, fill = suicides_no)) +
geom_col(alpha = 0.8) +
scale_size(range = c(4, 12)) +
guides(fill=guide_legend(title="Suicide Rate in Years"))+
labs(title = 'Difference of Suicide Rate By Gender in 30 Years',
subtitle='Date: {frame_time}',
x = 'gender',
y = 'rate of suicides')+
transition_time(year)+
coord_flip()+
theme_gray()