Library

Ini adalah library yang akan kita gunakan

library(readxl)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.3
library(magrittr)
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 3.6.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(corpcor)
library(Hotelling)
library(MVTests)
## Loading required package: matrixcalc
## 
## Attaching package: 'matrixcalc'
## The following object is masked from 'package:corpcor':
## 
##     is.positive.definite
## 
## Attaching package: 'MVTests'
## The following object is masked from 'package:datasets':
## 
##     iris
library(RVAideMemoire)
## Warning: package 'RVAideMemoire' was built under R version 3.6.3
## *** Package RVAideMemoire v 0.9-75 ***
## 
## Attaching package: 'RVAideMemoire'
## The following object is masked from 'package:magrittr':
## 
##     mod

Import data

Pertama-tama kita import data

df1<-read_excel("tigapulsa.xlsx")
df2<-read_excel("tigapulsa2.xlsx")

Cek Boxplot

ggboxplot(df1, x = "label", y = "bc", 
          color = "label", palette = c("#00AFBB", "#E7B800"),
          ylab = "Bond Count", xlab = "Label")

ggboxplot(df1, x = "label", y = "rbc", 
          color = "label", palette = c("#00AFBB", "#E7B800"),
          ylab = "RotateBC", xlab = "Label")

Cek histogram

hist(df1$bc,main = "Historgram", xlab = "BC", border ="red",col="green")

hist(df1$rbc,main = "Historgram", xlab = "RBC", border ="red",col="green")

Cek Normalitas

1. Multivariat Q-Q Plot

qq1 <-mqqnorm(df2[1:31,1:2], main = "Q-Q Plot Aktif")

qq2 <-mqqnorm(df2[32:62,1:2], main = "Q-Q Plot Tidak Aktif")

mqqnorm(df2, main = "Multi-normal Q-Q Plot")

## [1] 31 42

2. Normal Multivariat dengan Shapiro-Wilks (p value > 0.05 berdistribusi Normal)

aktif <-df1[1:31,1:2]
ta <-df1[32:62,1:2]

aktiftest<-mvShapiro(aktif)
tatest<- mvShapiro(ta)

Nilai uji Multivariat Shapiro-Wilks Alva-Estrada adalah p-value = 0.107 > 0.05 sehingga dapat diputuskan gagal tolak H0. Artinya, data nilai deskriptor molekul bond count dan rotatable bonds count molekul aktif berasal dari populasi yang berdistribusi Normal Multivariat. Uji normal multivariat untuk populasi tidak aktif

summary(aktiftest)
##        Multivariate Shapiro Wilk Test for Normality 
## 
## The Value of Test Statistic = 0.9498153 
##  p-value: 0.107

Nilai uji Multivariat Shapiro-Wilks Alva-Estrada adalah 0.162 > 0.05 sehingga dapat diputuskan gagal tolak H0. Artinya, data nilai deskriptor molekul bond count dan rotatable bonds count molekul tidak aktif berasal dari populasi yang berdistribusi Normal Multivariat.

summary(tatest)
##        Multivariate Shapiro Wilk Test for Normality 
## 
## The Value of Test Statistic = 0.9541639 
##  p-value: 0.162

Selanjutnya akan di periksa pengujian kesamaan matriks varians-covarians melaui uji Box’s M.

Cek Multivariat Homogenitas (p value > 0.05)

homdf<-BoxM(data = df1[,-c(3)], group = df1$label)
summary(homdf)
##        Box's M Test 
## 
## Chi-Squared Value = 2.5736 , df = 3  and p-value: 0.462
results <- BoxM(data=df1[,1:2],df1$label)
summary(results)
##        Box's M Test 
## 
## Chi-Squared Value = 2.5736 , df = 3  and p-value: 0.462

Berdasarkan hasil diatas, nilai p-value = 0.462>0.05 artinya matriks varians dan kovarians homogen. Sehingga populasi aktif dan tidak aktif memenuhi asumsi homogenitas berdasarkan variabel yang terkait. Berdasarkan penelitian tersebut populasi aktif dan tidak aktif memenuhi uji normal mulitvariat dan homoskedastisitas matriks varian-kovarian sehingga dapat digunakan untuk penelitian lebih lanjut seperti yaitu uji vektor mean dua populasi.

Hoteling test

split.df<-split(df1[,-3],df1$label)
x<-split.df[['aktif']]
y<-split.df[['ta']]
hotstat<-hotelling.stat(x, y)
hotstat
## $statistic
## [1] 0.3109666
## 
## $m
## [1] 0.4916667
## 
## $df
## [1]  2 59
## 
## $nx
## [1] 31
## 
## $ny
## [1] 31
## 
## $p
## [1] 2
mod1<-hotelling.test(.~label, data = df1)
mod1
## Test stat:  0.15289 
## Numerator df:  2 
## Denominator df:  59 
## P-value:  0.8586
G <- c(rep(1,31),rep(2,31))
mod2 <- TwoSamplesHT2(data=df1[,-c(3)],group = G, alpha = 0.05, Homogenity = TRUE)
summary(mod2)
##               Two Independent Samples Hotelling T Square Test 
## 
## Hotelling T Sqaure Statistic = 0.3109666 
##  F value = 0.153 , df1 = 2 , df2 = 59 , p-value: 0.859 
## 
##             Descriptive Statistics (The First Group) 
## 
##             bc       rbc
## Means 30.67742 11.870968
## Sd    10.13702  4.080375
## 
## 
##             Descriptive Statistics (The Second Group) 
## 
##              bc       rbc
## Means 32.032258 12.032258
## Sd     8.957246  4.700949
## 
## 
##             Detection important variable(s)
## 
##         Lower    Upper Important Variables?
## bc  -7.507593 4.797916                FALSE
## rbc -2.992559 2.669978                FALSE

Tingkat kepercayaan 95% diketahui bahwa nilai T2 lebih kecil dari nilai T2(0.05,2,59) atau 0.31097< 10.2, maka dapat diputuskan gagal tolak H0. Sehingga dapat disimpulkan bahwa vektor mean nilai deskriptor bond count dan rotatable bond count aktif sama dengan vektor mean nilai deskriptor bond count dan rotatable bond count tidak aktif