#Packages
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Warning: package 'Hmisc' was built under R version 3.6.3
## Loading required package: lattice
## Loading required package: survival
## Warning: package 'survival' was built under R version 3.6.3
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
## Warning: package 'forecast' was built under R version 3.6.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
## Warning: package 'psychometric' was built under R version 3.6.3
## Loading required package: multilevel
## Warning: package 'multilevel' was built under R version 3.6.3
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:forecast':
##
## getResponse
## The following object is masked from 'package:dplyr':
##
## collapse
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
##
## Attaching package: 'psychometric'
## The following object is masked from 'package:ggplot2':
##
## alpha
Pembunuhan : https://ourworldindata.org/grapher/homicides-per-100000-people-per-year Gangguan Mental : https://ourworldindata.org/grapher/people-with-mental-health-disorders Popukasi : https://ourworldindata.org/grapher/population
#Import Data
Angka_Pembunuhan <-read.csv("Kasus Pembunuhan.csv",header = TRUE,
stringsAsFactors = FALSE,
col.names = c("Negara","Kode","Tahun","Angka_Pembunuhan"))
str(Angka_Pembunuhan)## 'data.frame': 20 obs. of 4 variables:
## $ Negara : chr "United States" "United States" "United States" "United States" ...
## $ Kode : chr "USA" "USA" "USA" "USA" ...
## $ Tahun : int 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 ...
## $ Angka_Pembunuhan: num 24959880 26472057 25442110 25953200 24650653 ...
## Negara Kode Tahun Angka_Pembunuhan
## 1 United States USA 1990 24959880
## 2 United States USA 1991 26472057
## 3 United States USA 1992 25442110
## 4 United States USA 1993 25953200
## 5 United States USA 1994 24650653
## 6 United States USA 1995 22804104
Gangguan_Mental <-read.csv("Gangguan Mental.csv", header = TRUE,
stringsAsFactors = FALSE,
col.names = c("Negara","Kode","Tahun","Gangguan_Mental") )
str( Gangguan_Mental)## 'data.frame': 20 obs. of 4 variables:
## $ Negara : chr "United States" "United States" "United States" "United States" ...
## $ Kode : chr "USA" "USA" "USA" "USA" ...
## $ Tahun : int 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 ...
## $ Gangguan_Mental: num 36714377 37177571 37697832 38202852 38705887 ...
Populasi <-read.csv("Populasi.csv", header = TRUE,
stringsAsFactors = FALSE,
col.names = c("Negara","Kode","Tahun","Total_Populasi") )
str(Populasi)## 'data.frame': 20 obs. of 4 variables:
## $ Negara : chr "United States" "United States" "United States" "United States" ...
## $ Kode : chr "USA" "USA" "USA" "USA" ...
## $ Tahun : int 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 ...
## $ Total_Populasi: int 252120000 254539008 256991008 259532000 262240992 265164000 268335008 271713984 275175008 278548000 ...
#Combine Data
# joining Angka Pembunuhan dan Gangguan Mental
joined_rates_data <- right_join(Angka_Pembunuhan, Gangguan_Mental,
by = c('Negara','Kode','Tahun'))
# joining joined_rates_data and Populasi
joined_rates_data <- left_join(joined_rates_data, Populasi,
by= c('Negara','Kode','Tahun'))
head(joined_rates_data)## Negara Kode Tahun Angka_Pembunuhan Gangguan_Mental Total_Populasi
## 1 United States USA 1990 24959880 36714377 252120000
## 2 United States USA 1991 26472057 37177571 254539008
## 3 United States USA 1992 25442110 37697832 256991008
## 4 United States USA 1993 25953200 38202852 259532000
## 5 United States USA 1994 24650653 38705887 262240992
## 6 United States USA 1995 22804104 39220805 265164000
#Struktur
## 'data.frame': 20 obs. of 6 variables:
## $ Negara : chr "United States" "United States" "United States" "United States" ...
## $ Kode : chr "USA" "USA" "USA" "USA" ...
## $ Tahun : int 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 ...
## $ Angka_Pembunuhan: num 24959880 26472057 25442110 25953200 24650653 ...
## $ Gangguan_Mental : num 36714377 37177571 37697832 38202852 38705887 ...
## $ Total_Populasi : int 252120000 254539008 256991008 259532000 262240992 265164000 268335008 271713984 275175008 278548000 ...
# Applying type conversion
joined_rates_data$Negara <- as.factor(joined_rates_data$Negara)
joined_rates_data$Kode <- as.factor(joined_rates_data$Kode)
joined_rates_data$Gangguan_Mental <- as.integer(joined_rates_data$Gangguan_Mental)
joined_rates_data$Total_Populasi <- as.integer(joined_rates_data$Total_Populasi)
# Verifying type of variables
class(joined_rates_data$Negara)## [1] "factor"
## [1] "factor"
## [1] "integer"
## [1] "integer"
#Merapikan Data
# removing county_code column
joined_rates_data <- subset(joined_rates_data,
select= c('Negara','Tahun','Angka_Pembunuhan','Gangguan_Mental','Total_Populasi'))
joined_rates_data%>% group_by(Negara) %>% summarise(avg_Angka_Pembunuhan = round(mean(Angka_Pembunuhan),3),
avg_person_mental_disorder = round(mean(Gangguan_Mental,na.rm = TRUE),3)) ## # A tibble: 1 x 3
## Negara avg_Angka_Pembunuhan avg_person_mental_disorder
## <fct> <dbl> <dbl>
## 1 United States 20206733. 41972066.
## Negara Tahun Angka_Pembunuhan Gangguan_Mental Total_Populasi
## 1 United States 1990 24959880 36714376 252120000
## 2 United States 1991 26472057 37177570 254539008
## 3 United States 1992 25442110 37697832 256991008
## 4 United States 1993 25953200 38202852 259532000
## 5 United States 1994 24650653 38705887 262240992
## 6 United States 1995 22804104 39220805 265164000
#Memanipulasi Data
# Mutating Gangguan_Mental_rate
joined_rates_data <- mutate(joined_rates_data,
Gangguan_Mental_rate =((Gangguan_Mental/Total_Populasi)*100000))
head(joined_rates_data)## Negara Tahun Angka_Pembunuhan Gangguan_Mental Total_Populasi
## 1 United States 1990 24959880 36714376 252120000
## 2 United States 1991 26472057 37177570 254539008
## 3 United States 1992 25442110 37697832 256991008
## 4 United States 1993 25953200 38202852 259532000
## 5 United States 1994 24650653 38705887 262240992
## 6 United States 1995 22804104 39220805 265164000
## Gangguan_Mental_rate
## 1 14562.26
## 2 14605.84
## 3 14668.93
## 4 14719.90
## 5 14759.66
## 6 14791.15
#Mutating percentage
joined_rates_data <- mutate(joined_rates_data, percentage = (Angka_Pembunuhan/ Gangguan_Mental_rate)*100)
head(joined_rates_data)## Negara Tahun Angka_Pembunuhan Gangguan_Mental Total_Populasi
## 1 United States 1990 24959880 36714376 252120000
## 2 United States 1991 26472057 37177570 254539008
## 3 United States 1992 25442110 37697832 256991008
## 4 United States 1993 25953200 38202852 259532000
## 5 United States 1994 24650653 38705887 262240992
## 6 United States 1995 22804104 39220805 265164000
## Gangguan_Mental_rate percentage
## 1 14562.26 171401.1
## 2 14605.84 181242.9
## 3 14668.93 173442.2
## 4 14719.90 176313.7
## 5 14759.66 167013.7
## 6 14791.15 154174.0
## Negara Tahun Angka_Pembunuhan
## 0 0 0
## Gangguan_Mental Total_Populasi Gangguan_Mental_rate
## 0 0 0
## percentage
## 0
# mean imputation (for numerical variables)
joined_rates_data$Total_Populasi<- impute(joined_rates_data$Total_Populasi, fun = mean)
joined_rates_data <- mutate(joined_rates_data,
Gangguan_Mental_rate = ifelse(is.na( Gangguan_Mental_rate),
( Gangguan_Mental/Total_Populasi)*100000, Gangguan_Mental_rate))
joined_rates_data <- mutate(joined_rates_data,
percentage = ifelse(is.na(percentage),(Angka_Pembunuhan/ Gangguan_Mental_rate)*100,
percentage))
#re verifying for na values
sum(is.na(joined_rates_data$Total_Populasi))## [1] 0
## [1] 0
## [1] 0
# special function to find NaN and inconsistent values
is.special <- function(x){
if (is.numeric(x)) (is.infinite(x) | is.nan(x))
}
#finding number of inconsistent values in data
sapply(joined_rates_data, function(x) sum(is.special(x)))## Negara Tahun Angka_Pembunuhan
## 0 0 0
## Gangguan_Mental Total_Populasi Gangguan_Mental_rate
## 0 0 0
## percentage
## 0
# Subsetting joined_rates_data
joined_data_sub <- joined_rates_data %>% dplyr:: select(Angka_Pembunuhan,Gangguan_Mental,Total_Populasi,
Gangguan_Mental_rate,percentage)
summary(joined_data_sub) ## Angka_Pembunuhan Gangguan_Mental Total_Populasi
## Min. :16540632 Min. :36714376 Min. :252120000
## 1st Qu.:17504745 1st Qu.:39092076 1st Qu.:264433248
## Median :18399073 Median :42595030 Median :280129504
## Mean :20206733 Mean :41972066 Mean :279164152
## 3rd Qu.:23265741 3rd Qu.:44782734 3rd Qu.:293014752
## Max. :26472057 Max. :46131343 Max. :306308000
## Gangguan_Mental_rate percentage
## Min. :14562 Min. :108944
## 1st Qu.:14783 1st Qu.:114614
## Median :15095 Median :120949
## Mean :15022 Mean :134896
## 3rd Qu.:15259 3rd Qu.:157384
## Max. :15289 Max. :181243
#z-score method for detecting ouliers
z.scores <- joined_data_sub %>% scores(type = "z")
length(which( abs(z.scores) >3 ))## [1] 0
# capping for imputing outliers
cap <- function(x){
quantiles <- quantile( x, c(.05, 0.25, 0.75, .95 ))
x[ x < quantiles[2] - 1.5*IQR(x) ] <- quantiles[1]
x[ x > quantiles[3] + 1.5*IQR(x) ] <- quantiles[4]
x
}
# Apply a user defined function "cap" to a data frame
joined_data_capped <- as.data.frame(sapply(joined_data_sub, FUN = cap ))
summary(joined_data_capped)## Angka_Pembunuhan Gangguan_Mental Total_Populasi
## Min. :16540632 Min. :36714376 Min. :252120000
## 1st Qu.:17504745 1st Qu.:39092076 1st Qu.:264433248
## Median :18399073 Median :42595030 Median :280129504
## Mean :20206733 Mean :41972066 Mean :279164152
## 3rd Qu.:23265741 3rd Qu.:44782734 3rd Qu.:293014752
## Max. :26472057 Max. :46131343 Max. :306308000
## Gangguan_Mental_rate percentage
## Min. :14562 Min. :108944
## 1st Qu.:14783 1st Qu.:114614
## Median :15095 Median :120949
## Mean :15022 Mean :134896
## 3rd Qu.:15259 3rd Qu.:157384
## Max. :15289 Max. :181243
#Square root Transformation
Angka_Pembunuhan_transformed<- sqrt(joined_data_capped$Angka_Pembunuhan)
hist(Angka_Pembunuhan_transformed) #Square root Transformation
percentage_transformed <- sqrt(joined_data_capped$ percentage)
hist(percentage_transformed) Untuk mengetahui hubungan antara Angka Pembunuhan dengan angka gangguan kesehatan jiwa, dilakukan uji paired t-test dengan asumsi bahwa data berdistribusi normal.
pttest <- t.test(joined_data_capped$Angka_Pembunuhan, joined_data_capped$ Gangguan_Mental_rate,
paired = TRUE,alternative = "two.sided",conf.level = .95)
pttest##
## Paired t-test
##
## data: joined_data_capped$Angka_Pembunuhan and joined_data_capped$Gangguan_Mental_rate
## t = 25.818, df = 19, p-value = 2.937e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 18554824 21828597
## sample estimates:
## mean of the differences
## 20191710
Hipotesis nol: Tidak ada hubungan antara tingkat Pembunuhan dan tingkat gangguan kesehatan mental. Hipotesis alternatif: Ada hubungan antara Angka Pembunuhan dengan angka gangguan kesehatan jiwa. Temuan dari hasil uji hipotesis dengan memperoleh nilai p dan interval kepercayaan.
Uji-t sampel berpasangan dilakukan untuk analisis. Uji-t menghasilkan nilai p dengan nilai p <2.937e-16 lebih kecil dari nilai alpha 0.05. Perbedaan rata-rata tidak termasuk dalam 95% CI. Hasilnya signifikan secara statistik. Jadi, hasil kami mendukung Hipotesis alternatif Oleh karena itu kami sampai pada kesimpulan bahwa:
Uji-t sampel berpasangan menemukan hubungan yang signifikan secara statistik antara tingkat Pembunuhan dan tingkat gangguan kesehatan mental
Analisisnya akan lebih baik jika dilakukan lagi penelitian kepada orang yang berbeda usia dan keadaan yang berbeda seperti masalah ekonomi, tingkat kriminalitas, tingkat konsumsi alkohol, gender, aturan yang berlaku di tiap negara bagian.