setwd("C:/Users/Administrator/Desktop/BIG DATA/suicide")
data<-read.csv("master.csv", header = TRUE)
#rename(테이블이름, "바꿀 이름" = "원래 이름")
data<-rename(data, "country"= "癤풻ountry")
colSums(is.na(data)) # Missing data ## country year sex age
## 0 0 0 0
## suicides_no population suicides.100k.pop country.year
## 0 0 0 0
## HDI.for.year gdp_for_year.... gdp_per_capita.... generation
## 19456 0 0 0
data <- data %>%
mutate(country = fct_recode(country, "The Bahamas" = "Bahamas"),
country = fct_recode(country, "Cape Verde" = "Cabo Verde"),
country = fct_recode(country, "South Korea" = "Republic of Korea"),
country = fct_recode(country, "Russia" = "Russian Federation"),
country = fct_recode(country, "Republic of Serbia" = "Serbia"),
country = fct_recode(country, "United States of America" = "United States"))#년도, 자살수, 총 인수구
data %>%
select(year, suicides_no, population) %>%
group_by(year) %>% #따로 잡아서는 안됨 수식 같이 넣어서 뽑기
summarise(suicide_capita = round((sum(suicides_no)/sum(population))*100000, 2)) -> suicide_rate
highchart() %>%
hc_add_series(suicide_rate, hcaes(x= year, y=suicide_capita,
color= suicide_capita), type = "line") %>%
hc_tooltip(crosshairs = TRUE, ## title _ subtitle 부분
borderWidth = 1.5, # x.y 축 변경
headerFormat ="",
pointFormat = paste("Year: <b>{point.x}</b> <br>
Suicides: <b>{point.y}</b>")) %>%
hc_title(text = "Worldwide suicides by year") %>%
hc_subtitle(text = "1985-2015") %>%
hc_xAxis(title = list(text ="Year"))sex_color <- c("#EE6AA7", "#87CEEB") # baby blue & pink
data %>%
select(year, sex, suicides_no, population) %>%
group_by(year, sex) %>%
summarise(suicide_capita = round((sum(suicides_no)/sum(population))*100000, 2)) -> gender_rate
highchart() %>%
hc_add_series(gender_rate, hcaes(x= year, y =suicide_capita, group=sex),
type="line", color=sex_color) %>%
hc_tooltip(crosshair = TRUE,
borderwidth = 1.5,
pointFormat =paste("Year: <b>{point.x}</b> <br>",
"Gender: <b>{point.sex}</b><br>",
"Suicides: <b>{point.y}</b>")) %>%
hc_title(text="World Suicide by gender") %>%
hc_subtitle(text= "1985-2015")age_color <- rev(plasma(6))
data %>%
select(year, age, suicides_no, population) %>%
group_by(year, age) %>%
summarise(suicide_capita = round((sum(suicides_no)/sum(population))*100000, 2)) -> age_table
highchart() %>%
hc_add_series(age_table, hcaes(x= year, y= suicide_capita, group= age), type= "line",
color=age_color) %>%
hc_tooltip(crosshair =FALSE,
borderwith= 1.5,
pointFormat =paste("Year: <b>{point.x}</b> <br>",
"Gender: <b>{point.age}</b><br>",
"Suicides: <b>{point.y}</b>")) %>%
hc_title(text= "World Sucide by age") %>%
hc_subtitle(text= "1985-2014") %>%
hc_add_theme(hc_theme_economist())## Factor w/ 2321 levels "1,002,219,052,968",..: 727 727 727 727 727 727 727 727 727 727 ...
data$gdp_for_year<-gsub(",","", data$gdp_for_year....)
data$gdp_for_year <- as.numeric(data$gdp_for_year )
data %>%
select(country, gdp_for_year) %>%
group_by(country) %>%
filter(!duplicated(country)) %>%
mutate(mean_gdp = mean(gdp_for_year)) %>%
arrange(desc(mean_gdp)) -> gdp_10
datatable(gdp_10)country_color <- rev(plasma(6))
countries<-c("United State of America",
"Germany","Japan",
"France", "United Kingdom",
"Canada", "South Korea")
data %>%
filter(country %in% countries) %>%
group_by(year, country) %>%
filter(!duplicated(country)) %>%
summarise(s = sum(suicides_no)) ->dfPlot
dfPlot## # A tibble: 178 x 3
## # Groups: year [31]
## year country s
## <int> <fct> <int>
## 1 1985 Canada 111
## 2 1985 France 1522
## 3 1985 Japan 1340
## 4 1985 South Korea 42
## 5 1985 United Kingdom 264
## 6 1986 Canada 143
## 7 1986 France 1465
## 8 1986 Japan 1488
## 9 1986 South Korea 59
## 10 1986 United Kingdom 280
## # ... with 168 more rows
highchart() %>%
hc_add_series(dfPlot, "line",hcaes(x= year, y= s, group =country),color=country_color) %>%
hc_tooltip(crosshair = FALSE,
borderwidth = 1.2,
pointFormat =paste("Year: <b>{point.x}</b> <br>",
"Country: <b>{point.country}</b><br>",
"Suicides Number: <b>{point.y}</b>")) %>%
hc_title(text = "Sucide change in top GDP countries and South Korea") %>%
hc_subtitle(text= "1985-2015") %>%
hc_add_theme(hc_theme_economist())흥미로운 것은 JAPAN 이 1999 에 사망률이 극심하게 올랐다가 2011년에
확 떨어졌다. 일본이 GDP 상위에 들어갔었는데 무슨 일이있었을까?
1998-2012년의 데이터만 뽑아보기 - 일본의 GDP와 인구 증가율은 상당히 안정적으로 보인다.
countries<-c("United State of America",
"Germany","Japan",
"France", "United Kingdom",
"Canada", "South Korea")
data %>%
filter(country %in% countries) %>%
filter(year >=1998 & year < 2012) %>%
group_by(year, country) %>%
summarise(gdp_sum = sum(gdp_per_capita....)) -> data_2
highchart() %>%
hc_add_series(data_2, hcaes(x= year, y= gdp_sum, group=country),
type="line") %>%
hc_tooltip(crosshair =FALSE,
borderwith= 1.2) %>%
hc_title(text ="GDP rate between 1998 to 2012") %>%
hc_subtitle(text ="Japan`s GDPs are relatively higher than some countries") %>%
hc_xAxis(title = list(text ="Year")) %>%
hc_add_theme(hc_theme_economist()) countries<-c("United State of America",
"Germany","Japan",
"France", "United Kingdom",
"Canada", "South Korea")
country_color## [1] "#F0F921FF" "#FCA636FF" "#E16462FF" "#B12A90FF" "#6A00A8FF" "#0D0887FF"
data %>%
filter(country %in% countries) %>%
filter(year >=1998 & year < 2012) %>%
group_by(year, country) %>%
summarise(popul = sum(population)) -> data_3
highchart() %>%
hc_add_series(data_3, hcaes(x= year, y= popul, group=country),
type="line", color=country_color) %>%
hc_tooltip(crosshair =FALSE,
borderwith= 1.2) %>%
hc_title(text ="Population rate between 1998 to 2012") %>%
hc_subtitle(text ="Japan`s GDPs are relatively higher than some countries") %>%
hc_xAxis(title = list(text ="Year")) %>%
hc_add_theme(hc_theme_economist()) data %>%
select(year, generation, suicides_no, population) %>%
group_by(year, generation) %>%
summarise(suicide_capita = round((sum(suicides_no)/sum(population))*100000, 2))-> gene_table
highchart() %>%
hc_add_series(gene_table, hcaes(x= year, y=suicide_capita, group=generation),
type= "line") %>%
hc_tooltip(crosshair = FALSE,
borderwidth = 1.2,
pointFormat =paste("Year: <b>{point.x}</b> <br>",
"Generation: <b>{point.Generation}</b><br>",
"Suicides Number: <b>{point.y}</b>")) %>%
hc_title(text = "Sucide number by generation") %>%
hc_subtitle(text= "1985-2015") %>%
hc_add_theme(hc_theme_economist())어디서 가장 많이 죽었나 나라 보기 by generation
country_age <-data %>%
select(country, generation, suicides_no, population) %>%
group_by(country, generation) %>%
summarize(suicide_capita = round((sum(suicides_no)/sum(population))*100000, 2)) %>%
arrange(desc(suicide_capita))
datatable(country_age)gene_color <- rev(plasma(6))
highchart() %>%
hc_add_series(country_age, hcaes(x=country, y=suicide_capita, group=generation),
type="bar", color =gene_color) %>%
hc_plotOptions(bar = list(stacking = "percent",## stack 옵션
pointPadding = 0, groupPadding = 0, borderWidth = 0.5)) %>%
hc_xAxis(categories = country_age$country,
labels = list(step=1),
min = 0, max = 25,
scrollbar = list(enabled = TRUE))Age 별 인구대비 사망률은 한국이 가장 높음
data %>%
select(country, age, suicides_no, population) %>%
group_by(country, age) %>%
summarize(suicide_capita = round((sum(suicides_no)/sum(population))*100000, 2)) %>%
arrange(desc(suicide_capita)) -> age_bar
highchart() %>%
hc_add_series(age_bar, hcaes(x= country, y = suicide_capita, group = age ),
type="bar") %>%
hc_xAxis(categories = age_bar$country,
labels = list(step=1),
min = 0, max = 25,
scrollbar = list(enabled= TRUE)) %>% ## 나라 주석 달아주기
hc_plotOptions(bar = list(stacking = "percent")) ## stack option