library(tidyverse)
suicideData<-read.csv("/Users/kanwallatif/Documents/master.csv")
library(tidyverse) # metapackage with lots of helpful functions
library(ggplot2)
x<-subset(suicideData$suicides_no,suicideData$sex=="male")
x<-sum(x)
y<-subset(suicideData$suicides_no,suicideData$sex=="female")
y<-sum(y)
z<-c(x,y)
piepercent<- round(100*z/sum(z), 1)
pie(z, labels = piepercent ,col = rainbow(length(z)),main="Percentage of Men & Women Commited Suicide",sub="From 1985 to 2016") 
legend("topright", c("Men","Women"), cex = 1.5,ncol=3,fill = rainbow(length(z)))
NO.Of_Suicidies<-suicideData$suicides_no
Year<-suicideData$year
Sex<-factor(suicideData$sex)
suicideDataProcessed <- suicideData %>% 
mutate(suicideRate = (suicides_no/population)*100, year = lubridate::year(lubridate::years(year)), country = as.factor(country), generation = as.factor(generation), row_id = row_number()) 
suicideDataProcessed %>% 
ggplot(aes(x = suicideRate, fill = age, alpha = 0.5)) + 
geom_histogram() + 
facet_wrap(~age) +
guides(alpha = FALSE) +
labs(title = "Distribution of suicide rates across age groups", subtitle = "Lots of countries having low suicide rates for the youngest age group", x = "Suicide rates (in %age)")
suicideDataProcessed %>% 
ggplot(aes(x = suicideRate, fill = age, alpha = 0.5)) + 
geom_histogram(position = "dodge") + 
facet_wrap(~generation) +
guides(alpha = FALSE) +
labs(title = "Suicide rates across generations", subtitle = "Distribution within generations visible as well", x = "Suicide rate (in %age)")
suicideDataProcessed %>% 
ggplot(aes(year, suicideRate, color = age)) +
geom_smooth(method = "lm") +
labs(y = "Suicide rate (in %age)", title = "Suicide rates over time", subtitle = "Suicide rates for the youngest age group nearly constant over time")
suicideDataProcessed %>% 
select(row_id,country, year, sex, age, gdp_per_capita, gdp_for_year, suicides_no, suicideRate) %>% 
head()
suicideDataProcessed %>% 
group_by(country) %>% 
summarize(count = n(), medianSuicides = median(suicides_no))  %>% 
mutate(country = fct_reorder(country, medianSuicides)) %>% 
top_n(20) %>% 
ungroup() %>% 
ggplot(aes(country, medianSuicides)) + 
geom_col(aes(fill = country)) +
coord_flip() +
labs(title = "Median number of suicides per country", x = "Country", y = "Median suicides") +
guides(fill = FALSE)
suicideDataProcessed %>% 
group_by(age) %>% 
summarize(count = n(), medianSuicides = median(suicides_no)) %>% 
mutate(age = as.factor(age), age = fct_reorder(age, medianSuicides)) %>% 
ungroup() %>% 
ggplot(aes(age, medianSuicides)) +
geom_col(aes(fill = age)) +
coord_flip() +
labs(title = "Median suicides per age group", x = "Age group", y = "Median suicides")
suicideDataProcessed %>% 
group_by(generation) %>% 
summarize(count = n(), medianSuicides = median(suicides_no)) %>% 
mutate(generation = as.factor(generation), generation = fct_reorder(generation, medianSuicides)) %>% 
ungroup() %>% 
ggplot(aes(generation, medianSuicides)) +
geom_col(aes(fill = generation)) +
coord_flip() +
labs(title = "Median suicides per generation", x = "Generation", y = "Median suicides")
suicideDataProcessed %>% 
select(suicideRate, year, population)  %>% 
ggplot(aes(population, suicideRate)) +
geom_smooth(method = "lm") +
scale_x_continuous(labels = scales::comma_format()) + 
labs(title = "Suicide rates vs population", subtitle = "Suicide rates increase and become more variable", y = "Suicide rates (in %age)", x = "Population")
suicideDataProcessed %>% 
group_by(year) %>% 
summarize(count = n(), medianSuicides = median(suicides_no)) %>% 
ungroup() %>% 
ggplot(aes(year, medianSuicides)) +
geom_point(aes(size = medianSuicides, color = medianSuicides)) +
geom_line(aes(color = medianSuicides,alpha = 0.2)) +
geom_hline(aes(yintercept = max(medianSuicides),alpha = 0.2), linetype = "dotted") +
geom_hline(aes(yintercept = median(medianSuicides),alpha = 0.2), linetype = "dotted") +
geom_hline(aes(yintercept = min(medianSuicides),alpha = 0.2), linetype = "dotted") +
#geom_vline(aes(xintercept = max(medianSuicides), alpha = 0.2)) +
scale_color_continuous(low = "blue", high = "red") +
guides(alpha = FALSE) +
labs(title = "Median suicides across the years", subtitle = "Median suicides overall is roughly 25", y = "Median suicides", x = "Year")
worldData <- map_data('world')

str(worldData)
countryCount <- function(countryName){
worldData %>% 
filter(str_detect(region, pattern = countryName)) %>% 
group_by(region) %>% 
summarize(count = n())
}

countryNames <- c("Korea", "Antigua", "Barbuda", "Saint", "Trinidad", "Verde")
lapply(countryNames, countryCount)
  suicideDataProcessed %>% 
rename(region = country) %>% 
mutate(region = as.character(region)) %>% 
anti_join(worldData) %>% 
group_by(region) %>% 
summarize(count = n())

worldData <- worldData %>% 
mutate(region = ifelse(str_detect(region, "Korea"),"Korea", region),
      region = ifelse(str_detect(region, "Barbuda"),"Antigua", region))
library(viridis)

suicideDataProcessed %>% 
mutate(country = as.character(country), 
       country = ifelse(country == "Russian Federation", "Russia", country),
       country = ifelse(country == "United Kingdom", "UK", country),
       country = ifelse(country == "United States", "USA", country),
       country = ifelse(country == "Trinidad and Tobago", "Trinidad", country),
       country = ifelse(country == "Saint Kitts and Nevis", "Saint Kitts", country),
       country = ifelse(country == "Saint Vincent and Grenadines", "Saint Vincent", country),
       country = ifelse(country == "Cabo Verde", "Cape Verde", country),
       country = ifelse(country == "Antigua and Barbuda", "Antigua", country),
       country = ifelse(country == "Republic of Korea", "Korea", country)
      ) %>%
group_by(country) %>% 
rename(region = country) %>%
summarize(count = n(), medianSuicides = median(suicides_no)) %>% 
full_join(worldData, by = "region") %>% 
ungroup() %>% 
ggplot(aes(x = long, y = lat, group = group)) +
geom_polygon(aes(fill = medianSuicides)) +
scale_fill_viridis(option = "B", direction = -1) +
#scale_fill_gradientn(colors = heat.colors(10)) +
labs(title = "Median suicides globally", subtitle = "Gray areas represent missing data", x = "", y = "") +
coord_map("globular") +
theme(axis.text.x = element_blank(),
      axis.text.y = element_blank(),
      axis.ticks = element_blank()
    )