Kelompok 4 MSR P2

2023-05-16

Anggota

  1. Muhammad Nachnoer Novatron Fitra Arss (G1401201014)
  2. Maulana Ahsan Fadillah (G1401201062)
  3. Reynaldo Mnurung (G1401201084)

Package

lapply(c("purrr","robustbase","ggpubr","hrbrthemes","gridExtra","grid",
         "SimDesign","fitdistrplus","lmtest"),
       library,character.only=T)[[1]]
## [1] "purrr"     "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [7] "methods"   "base"

Fungsi Uji Normal

uji.normal<-function(x, object.name="x", graph=TRUE, graph.transformed=TRUE){
  lapply(c("fitdistrplus", "kSamples", "rcompanion"), library, character.only=T) 
  if(any(x<0))x<-x-min(x)+1
  mean <- fitdist(x, "norm")$estimate[1]; sd <- fitdist(x, "norm")$estimate[2]
  uji<-ks.test(x, "pnorm", mean=mean, sd=sd)
  uji1<- ad.test(x, rnorm(length(x), mean=mean, sd=sd))
  pvalue<-uji$p.value
  PVALUE1<-uji1$ad[1,3]
  PVALUE2<-uji1$ad[2,3]
  t<-transformTukey(x,quiet = TRUE,plotit = FALSE)
  pt<-ks.test(t, "pnorm", mean=fitdist(t,"norm")$estimate[1], 
              sd=fitdist(t,"norm")$estimate[2])$p.value
  lambda<-transformTukey(x,returnLambda =TRUE,quiet=TRUE,plotit = FALSE)
  if(graph==TRUE){
    if(graph.transformed==FALSE){
      par(mfrow=c(1,2))
      hist(x, freq=F, col="steelblue", border="white", 
           main=paste("Histogram of ",object.name),xlab=object.name)
      lines(density(x),lwd=2, col="coral")
      qqnorm(x,col="coral");qqline(x,col="steelblue",lwd=2)
    }
    else{
      par(mfrow=c(2,2))
      hist(x, freq=F, col="steelblue", border="white", 
           main=paste("Histogram of ",object.name),xlab=object.name)
      lines(density(x),lwd=2, col="coral")
      hist(t, main=paste("Histogram of ",object.name,"transformed"), 
           xlab=paste(object.name,"transformed"), freq=F, 
           col="steelblue",border = "white")
      lines(density(t),lwd=2, col="coral")
      qqnorm(x,col="coral");qqline(x,col="steelblue",lwd=2)
      qqnorm(t, col="coral");qqline(t,col="steelblue", lwd=2)
    }
  }
  z<-ifelse((PVALUE1>=0.05 & PVALUE2<0.05 ||PVALUE1<0.05 & PVALUE2>=0.05), 
            
            ifelse(pvalue>=0.05, 
                   return(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                                                    Keputusan="Terima H0, data menyebar normal")), 
                   return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                                                         Keputusan="Tolak H0, data tidak menyebar normal"),
                               `lambda transformasi`=lambda,
                               `Data Hasil Transformasi Tukey`= t,
                               `Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt, 
                                                                                        `Keputusan`=ifelse(pt>=0.05, 
                                                                                                           "Terima H0, data menyebar normal", 
                                                                                                           "Tolak H0, data tidak menyebar normal"))))),
            
            ifelse(((pvalue >= 0.05)&(PVALUE1 >= 0.05||PVALUE2>= 0.05)), 
                   return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                                                         Keputusan="Terima H0, data menyebar normal"), 
                               `Hasil Uji Anderson`=
                                 data.frame(`P-Value`=
                                              rbind(`Versi 1`=PVALUE1, `Versi 2`=PVALUE2),
                                            Keputusan=rep("Terima H0, data menyebar normal", 2)))), 
                   
                   ifelse((pvalue >= 0.05&(PVALUE1 < 0.05||PVALUE2 < 0.05)),
                          return(list(`Hasil Uji Kolmogorov Smirnov`= data.frame(`P-Value`=pvalue,
                                                                                 Keputusan="Terima H0, data menyebar normal"), 
                                      `Hasil Uji Anderson`=data.frame(`P-Value`=
                                                                        rbind(`Versi 1`=PVALUE1,`Versi 2`=PVALUE2), 
                                                                      Keputusan= rep("Tolak H0, data tidak menyebar normal",2)), 
                                      `lambda transformasi`=lambda,
                                      `Data Hasil Transformasi Tukey`= t,
                                      `Setelah transformasi~Uji Kolmogorov-Smirnov`=
                                        data.frame(`P-Value`=pt, 
                                                   `Keputusan`=ifelse(pt>=0.05, 
                                                                      "Terima H0, data menyebar normal",
                                                                      "Tolak H0, data tidak menyebar normal")))),
                          
                          ifelse(pvalue < 0.05&(PVALUE1 >= 0.05||PVALUE2 >= 0.05),
                                 return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                                                                       Keputusan="Tolak H0, data tidak menyebar normal"), 
                                             `Hasil Uji Anderson`=data.frame(`P-Value`= rbind(`Versi 1`=PVALUE1,`Versi 2`=PVALUE2), 
                                                                             Keputusan=rep("Terima H0, data menyebar normal",2)),
                                             `lambda transformasi`=lambda,
                                             `Data Hasil Transformasi Tukey`= t,
                                             `Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt, 
                                                                                                      `Keputusan`=ifelse(pt>=0.05, 
                                                                                                                         "Terima H0, data menyebar normal", 
                                                                                                                         "Tolak H0, data tidak menyebar normal")))),
                                 
                                 return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                                                                       Keputusan="Tolak H0, data tidak menyebar normal"),
                                             `Hasil Uji Anderson`=data.frame(`P-Value`=rbind(`Versi 1`=PVALUE1,
                                                                                             `Versi 2`=PVALUE2), 
                                                                             Keputusan=rep("Tolak H0, data tidak menyebar normal",2)), 
                                             `lambda transformasi`=lambda,
                                             `Data Hasil Transformasi Tukey`= t,
                                             `Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt,
                                                                                                      `Keputusan`=ifelse(pt>=0.05, 
                                                                                                                         "Terima H0, data menyebar normal", 
                                                                                                                         "Tolak H0, data tidak menyebar normal"))))))))
  return(z)
}

uji.normal1<-function(x, object.name="x", graph=TRUE, graph.transformed=TRUE){
  lapply(c("fitdistrplus", "kSamples", "rcompanion"), library, character.only=T) 
  if(any(x<0))x<-x-min(x)+1
  mean <- mean(x); sd <- sd(x)
  uji<-ks.test(x, "pnorm", mean=mean, sd=sd)
  uji1<- ad.test(x, rnorm(length(x), mean=mean, sd=sd))
  pvalue<-uji$p.value
  PVALUE1<-uji1$ad[1,3]
  PVALUE2<-uji1$ad[2,3]
  t<-transformTukey(x,quiet = TRUE,plotit = FALSE)
  pt<-ks.test(t, "pnorm", mean=mean(t), 
              sd=sd(t))$p.value
  lambda<-transformTukey(x,returnLambda =TRUE,quiet=TRUE,plotit = FALSE)
  if(graph==TRUE){
    if(graph.transformed==FALSE){
      par(mfrow=c(1,2))
      hist(x, freq=F, col="steelblue", border="white", 
           main=paste("Histogram of ",object.name),xlab=object.name)
      lines(density(x),lwd=2, col="coral")
      qqnorm(x,col="coral");qqline(x,col="steelblue",lwd=2)
    }
    else{
      par(mfrow=c(2,2))
      hist(x, freq=F, col="steelblue", border="white", 
           main=paste("Histogram of ",object.name),xlab=object.name)
      lines(density(x),lwd=2, col="coral")
      hist(t, main=paste("Histogram of ",object.name,"transformed"), 
           xlab=paste(object.name,"transformed"), freq=F, 
           col="steelblue",border = "white")
      lines(density(t),lwd=2, col="coral")
      qqnorm(x,col="coral");qqline(x,col="steelblue",lwd=2)
      qqnorm(t, col="coral");qqline(t,col="steelblue", lwd=2)
    }
  }
  z<-ifelse((PVALUE1>=0.05 & PVALUE2<0.05 ||PVALUE1<0.05 & PVALUE2>=0.05), 
            
            ifelse(pvalue>=0.05, 
                   return(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                                                    Keputusan="Terima H0, data menyebar normal")), 
                   return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                                                         Keputusan="Tolak H0, data tidak menyebar normal"),
                               `lambda transformasi`=lambda,
                               `Data Hasil Transformasi Tukey`= t,
                               `Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt, 
                                                                                        `Keputusan`=ifelse(pt>=0.05, 
                                                                                                           "Terima H0, data menyebar normal", 
                                                                                                           "Tolak H0, data tidak menyebar normal"))))),
            
            ifelse(((pvalue >= 0.05)&(PVALUE1 >= 0.05||PVALUE2>= 0.05)), 
                   return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                                                         Keputusan="Terima H0, data menyebar normal"), 
                               `Hasil Uji Anderson`=
                                 data.frame(`P-Value`=
                                              rbind(`Versi 1`=PVALUE1, `Versi 2`=PVALUE2),
                                            Keputusan=rep("Terima H0, data menyebar normal", 2)))), 
                   
                   ifelse((pvalue >= 0.05&(PVALUE1 < 0.05||PVALUE2 < 0.05)),
                          return(list(`Hasil Uji Kolmogorov Smirnov`= data.frame(`P-Value`=pvalue,
                                                                                 Keputusan="Terima H0, data menyebar normal"), 
                                      `Hasil Uji Anderson`=data.frame(`P-Value`=
                                                                        rbind(`Versi 1`=PVALUE1,`Versi 2`=PVALUE2), 
                                                                      Keputusan= rep("Tolak H0, data tidak menyebar normal",2)), 
                                      `lambda transformasi`=lambda,
                                      `Data Hasil Transformasi Tukey`= t,
                                      `Setelah transformasi~Uji Kolmogorov-Smirnov`=
                                        data.frame(`P-Value`=pt, 
                                                   `Keputusan`=ifelse(pt>=0.05, 
                                                                      "Terima H0, data menyebar normal",
                                                                      "Tolak H0, data tidak menyebar normal")))),
                          
                          ifelse(pvalue < 0.05&(PVALUE1 >= 0.05||PVALUE2 >= 0.05),
                                 return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                                                                       Keputusan="Tolak H0, data tidak menyebar normal"), 
                                             `Hasil Uji Anderson`=data.frame(`P-Value`= rbind(`Versi 1`=PVALUE1,`Versi 2`=PVALUE2), 
                                                                             Keputusan=rep("Terima H0, data menyebar normal",2)),
                                             `lambda transformasi`=lambda,
                                             `Data Hasil Transformasi Tukey`= t,
                                             `Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt, 
                                                                                                      `Keputusan`=ifelse(pt>=0.05, 
                                                                                                                         "Terima H0, data menyebar normal", 
                                                                                                                         "Tolak H0, data tidak menyebar normal")))),
                                 
                                 return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                                                                       Keputusan="Tolak H0, data tidak menyebar normal"),
                                             `Hasil Uji Anderson`=data.frame(`P-Value`=rbind(`Versi 1`=PVALUE1,
                                                                                             `Versi 2`=PVALUE2), 
                                                                             Keputusan=rep("Tolak H0, data tidak menyebar normal",2)), 
                                             `lambda transformasi`=lambda,
                                             `Data Hasil Transformasi Tukey`= t,
                                             `Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt,
                                                                                                      `Keputusan`=ifelse(pt>=0.05, 
                                                                                                                         "Terima H0, data menyebar normal", 
                                                                                                                         "Tolak H0, data tidak menyebar normal"))))))))
  return(z)
}

Nomor 1 dan 2c

n dan N

n<-c(4,12,20,60,100)
N<-1000

Populasi

set.seed(123);c<-c(rchisq(250,4),rchisq(250,12),rnorm(250),rnorm(250,5,0.5));sim<-rchisq(1000,500)

Sampel Simetris

sams<-lapply(n,function(x) data.frame(replicate(1000,sample(sim,x))))

Sampel Campuran

samc<-lapply(n,function(x) data.frame(replicate(1000,sample(c,x))))
names(sams)<-paste("n =",n);names(samc)<-paste("n =",n)
l<-1:length(sams)

Sebaran Sampel

meanc<-lapply(l,function(x)colMeans(samc[[x]]));names(meanc)<-paste("n =",n)
means<-lapply(l,function(x)colMeans(sams[[x]]));names(means)<-paste("n =",n)

Histogram

mc<-map2(meanc,n,function(x,y) gghistogram(x,bins=30,fill="coral",
                                           title=paste("n = ",y,"(","xbar =",round(mean(x),3),")")))
ms<-map2(means,n,function(x,y) gghistogram(x,bins=30,fill="steelblue",
                                           title=paste("n = ",y,"(","xbar =",round(mean(x),3),")")))
G1c<-ggarrange(plotlist = mc)
G1s<-ggarrange(plotlist = ms)
gridExtra::grid.arrange(G1s,
                        top=textGrob("\nChisq(df=500)\n",
                                     gp=gpar(fontsize=20,font=2)))

gridExtra::grid.arrange(G1c,
                        top=textGrob("\nChisq(df=500) VS [Chisq(df=4),Chisq(df=12),\nNorm(0,1),Norm(5,0.5)]\n",
                                     gp=gpar(fontsize=20,font=2)))

QQPLOT

set.seed(123)
mcq<-map2(meanc,n,function(x,y) ggqqplot(x,color="coral",
                                         title=paste("n = ",y,"\n(","p-value =",round(uji.normal(x,graph = F)$`Hasil Uji Kolmogorov Smirnov`$P.Value,3),")")))

msq<-map2(means,n,function(x,y) ggqqplot(x,color="steelblue",
                                         title=paste("n = ",y,"\n(","p-value =",round(uji.normal1(x,graph = F)$`Hasil Uji Kolmogorov Smirnov`$P.Value,3),")")))
G1cq<-ggarrange(plotlist = mcq)
G1sq<-ggarrange(plotlist = msq)
gridExtra::grid.arrange(G1sq,
                        top=textGrob("\nChisq(df=500)\n",
                                     gp=gpar(fontsize=20,font=2)))

gridExtra::grid.arrange(G1cq,
                        top=textGrob("\n[Chisq(df=4),Chisq(df=12),\nNorm(0,1),Norm(5,0.5)]\n",
                                     gp=gpar(fontsize=20,font=2)))

Nomor 3

Bangkitkan Data

mendefinisikan vektor rataan

set.seed(4)
mu_mvn <- c(10,12)

mendefinisikan matrix korelasi

cor_mat <- matrix(c(1,0.85,0.85,1),nrow=2)

mendefinisikan vektor simpangan baku

sd <- c(2,3)

mengubah matrix korelasi ke kovarians

cov_mat <- MBESS::cor2cov(cor_mat,sd)

membangkitkan data berdistribus multivariate normal

mvn <- rmvnorm(n=100, mean=c(10,12), sigma=cov_mat)
set.seed(4)

membangkitkan data berdasarkan distribusi normal

x1_out <- rnorm(20,25,2)
y1_out <- rnorm(20,25,2)
x1y1_out <-cbind(x1_out,y1_out)

memilih secara acak amatan yang akan diganti dengan pencilan

index_out <- sample.int(nrow(mvn),20)

mengganti amatan dari langkah a dengan amatan dari langkah b

mvn[index_out,] <- x1y1_out

###QQPLOT

qplot(x = V1,y=V2,data = as.data.frame(mvn))
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Parameter Simulasi

mendefinisikan ukuran sampel yang dikaji

n <- seq(10,100,10)

mendefinisikan jumlah ulangan

num_rep2 <- 10000

mendefinisikan vektor simpangan baku

sd0 <- c(2,3)

mendefinisikan matrix korelasi

cor_mat <- matrix(c(1,0.85,0.85,1),nrow=2)

mengubah matrix korelasi ke kovarians

cov_mat <- MBESS::cor2cov(cor_mat,sd0)

mendefinisikan vektor rataan

mu_mvn <- c(10,12)

mendefinisikan mu dan sigma untuk data pencilan

mu1 <- 25
sigma1 <- 2

Fungsi Pembangkitan

generate multivariate normal dengan pencilan

gen_mvn_out <- function(n0,mu0,sigma0,n1,mu1,sigma1){
  x_mvn <- rmvnorm(n=n0, mean=mu0, sigma=sigma0)
  x1_out <- rnorm(n1,mu1,sigma1)
  y1_out <- rnorm(n1,mu1,sigma1)
  x1y1_out <-cbind(x1_out,y1_out)
  index_out <- sample.int(nrow(x_mvn),n1)
  x_mvn[index_out,] <- x1y1_out
  return(x_mvn)
}

extract koefisien korelasi

cor_res <- function(x_mat,method){
  cor(x_mat[,1],x_mat[,2],method = method)
}

Simulasi

set.seed(4)

membuat objek data.frame dummy untuk menyimpan hasil simulasi

result_sim2 <- data.frame(n=0,correlation=0,type="")
for(i in seq_along(n)){
  # membangkitan data berdistribusi multivariate normal dengan ulangan 10rb
  sim_mc_mvn <- replicate(num_rep2,                     gen_mvn_out(n0 = n[i],mu_mvn,cov_mat,n1=0.2*n[i],mu1,sigma1),
                          simplify = F)
  # menghitung keofisien korelasi pearson dari 10rb data hasil bangkitan
  pearson_res <- sapply(sim_mc_mvn,cor_res,method="pearson")
  # menghitung keofisien spearman pearson dari 10rb data hasil bangkitan
  spearman_res <- sapply(sim_mc_mvn,cor_res,method="spearman")
  # menghitung rata-rata koefisien
  pearson_sim <- mean(pearson_res)
  spearman_sim <- mean(spearman_res)
  # menyimpan hasil simulasi
  result_sim2 <- rbind(result_sim2,list(n[i],pearson_sim,"pearson"),
                       list(n[i],spearman_sim,"spearman")
  )
}
result_sim2 <- result_sim2[-1,]

Grafik Simulasi

ggplot(data = result_sim2,aes(n,correlation))+
  geom_point(aes(color=type))+geom_hline(yintercept = 0.85)+scale_y_continuous(limits = c(0.5,1))

Nomor 1a dan 1b

Data campuran 50% normal + 50% chi-square

Definisikan populasi

set.seed(123)
populasi <- c(rnorm(500), rchisq(500, df = 12))

Membuat fungsi

sampling_analysis <- function(n) {
  # mengambil sampel acak tanpa pengembalian dari populasi
  sample_data <- sample(populasi, n, replace = FALSE)
  
  # menghitung rata-rata dari sampel
  sample_mean <- mean(sample_data)
  
  # melakukan uji Kolmogorov-Smirnov
  ks_test <- ks.test(sample_data, "pnorm", mean(sample_data), sd(sample_data))
  
  # membuat histogram untuk sampel
  hist(sample_data, main = paste0("n = ", n, ", xbar = ", round(sample_mean, 3)), col = "lightblue", breaks = 30)
  
  # membuat qqplot untuk sampel
  qqnorm(sample_data, main = paste0("n = ", n, ", p-value = ", round(ks_test$p.value, 5)))
  qqline(sample_data)
  
  # mengembalikan nilai rata-rata sampel
  return(sample_mean)
}

menampilkan hasil fungsi untuk masing-masing sampel

set.seed(123)
par(mfrow = c(4,2))  # mengatur layout grafik
for (n in c(4, 12, 20, 60, 100)) {
  sampling_analysis(n)
}

Data campuran 50% chi-square parameter a + 50% chi-square parameter b

Definisikan populasi

set.seed(12)
populasi <- c(rchisq(500, df = 15), rchisq(500, df = 5))
sampling_analysis <- function(n) {
  # mengambil sampel acak tanpa pengembalian dari populasi
  sample_data <- sample(populasi, n, replace = FALSE)
  
  # menghitung rata-rata dari sampel
  sample_mean <- mean(sample_data)
  
  # melakukan uji Kolmogorov-Smirnov
  ks_test <- ks.test(sample_data, "pnorm", mean(sample_data), sd(sample_data))
  
  # membuat histogram untuk sampel
  hist(sample_data, main = paste0("n = ", n, ", xbar = ", round(sample_mean, 3)), col = "lightblue", breaks = 30)
  
  # membuat qqplot untuk sampel
  qqnorm(sample_data, main = paste0("n = ", n, ", p-value = ", round(ks_test$p.value, 5)))
  qqline(sample_data)
  
  # mengembalikan nilai rata-rata sampel
  return(sample_mean)
}

Menampilkan hasil fungsi untuk masing-masing sampel

set.seed(123)
par(mfrow = c(4,2))  # mengatur layout grafik
for (n in c(4, 12, 20, 60, 100)) {
  sampling_analysis(n)
}

SOAL 1 PEKAN 11

Populasi

n<-c(4,12,20,60,100);N<-1000
set.seed(123);a<- c(rnorm(500,0,1), rchisq(500, df = 1));b<- c(rchisq(500, df = 1), rchisq(500, df = 3))
c<-c(rchisq(250,1),rchisq(250,3),rnorm(250,0,1),rnorm(250,3,1.5))

Sampel

sama<-lapply(n,function(x) data.frame(replicate(1000,sample(a,x))))
samb<-lapply(n,function(x) data.frame(replicate(1000,sample(b,x))))
samc<-lapply(n,function(x) data.frame(replicate(1000,sample(c,x))))
names(samb)<-paste("n =",n);names(samb)<-paste("n =",n);names(samc)<-paste("n =",n)
l<-1:length(samc)

Sebaran Sampel

meana<-lapply(l,function(x)colMeans(sama[[x]]));names(meana)<-paste("n =",n)
meanb<-lapply(l,function(x)colMeans(samb[[x]]));names(meanb)<-paste("n =",n)
meanc<-lapply(l,function(x)colMeans(samc[[x]]));names(meanc)<-paste("n =",n)

Visualisasi

mm51<-map2(meana,n,function(x,y) ggdensity(x, rug = TRUE, fill = "lightgray")+
         geom_vline(xintercept = mean(a), color = "red", linetype = "dashed") +
         geom_vline(xintercept = t.test(x,mu=mean(a))$conf.int[1], color = "blue", linetype = "dashed") + 
         geom_vline(xintercept = t.test(x,mu=mean(a))$conf.int[2], color = "blue", linetype = "dashed") + 
         labs(subtitle = paste("n = ",y,",","p =",
                               round(t.test(x,mu=mean(a))$p.value,3),",","CI = ",
                               "[",round(t.test(x,mu=mean(a))$conf.int[1],2),",",
                               round(t.test(x,mu=mean(a))$conf.int[2],2),"]",",",
                               "lCI = ",round((t.test(x,mu=mean(a))$conf.int[2]-t.test(x,mu=mean(a))$conf.int[1]),2))))
G51<-ggarrange(plotlist = mm51);G51

mm52<-map2(meanb,n,function(x,y) ggdensity(x, rug = TRUE, fill = "lightgray")+
            geom_vline(xintercept = mean(b), color = "red", linetype = "dashed") +
            geom_vline(xintercept = t.test(x,mu=mean(b))$conf.int[1], color = "blue", linetype = "dashed") + 
            geom_vline(xintercept = t.test(x,mu=mean(b))$conf.int[2], color = "blue", linetype = "dashed") + 
            labs(subtitle =paste("n = ",y,",","p =",
                                 round(t.test(x,mu=mean(b))$p.value,3),",","CI = ",
                                 "[",round(t.test(x,mu=mean(b))$conf.int[1],2),",",
                                 round(t.test(x,mu=mean(b))$conf.int[2],2),"]",",",
                                 "lCI = ",round((t.test(x,mu=mean(b))$conf.int[2]-t.test(x,mu=mean(b))$conf.int[1]),2))))
G52<-ggarrange(plotlist = mm52);G52

mm53<-map2(meanc,n,function(x,y) ggdensity(x, rug = TRUE, fill = "lightgray")+
            geom_vline(xintercept = mean(c), color = "red", linetype = "dashed") +
            geom_vline(xintercept = t.test(x,mu=mean(c))$conf.int[1], color = "blue", linetype = "dashed") + 
            geom_vline(xintercept = t.test(x,mu=mean(c))$conf.int[2], color = "blue", linetype = "dashed") + 
            labs(subtitle =paste("n = ",y,",","p =",
                                 round(t.test(x,mu=mean(c))$p.value,3),",","CI = ",
                                 "[",round(t.test(x,mu=mean(c))$conf.int[1],2),",",
                                 round(t.test(x,mu=mean(c))$conf.int[2],2),"]",",",
                                 "lCI = ",round((t.test(x,mu=mean(c))$conf.int[2]-t.test(x,mu=mean(c))$conf.int[1]),2)) ))
G53<-ggarrange(plotlist = mm53);G53

SOAL 2 PEKAN 11

Tentukan parameter simulasi

set.seed(123)  # Atur seed untuk reproduksibilitas
n_groups <- 3  # Jumlah kelompok atau perlakuan
n_samples <- c(10, 50, 100)  # Jumlah sampel dalam setiap kelompok
n_simulations <- 1000  # Jumlah simulasi
alpha <- 0.05  # Tingkat signifikansi

A. 50% sebaran normal (N(0,1)) dan 50% sebaran chi-square (χ2(df=1)

Fungsi untuk menghasilkan data simulasi

generate_data <- function(n_groups, n_samples, mean_values, sd_values, violation) {
  data <- NULL
  for (i in 1:n_groups) {
    if (violation) {
      data_group <- rchisq(n_samples[i], df = 1)  # Menggunakan distribusi chi-square untuk melanggar asumsi sebaran normal
    } else {
      data_group <- rnorm(n_samples[i], mean_values[i], sd_values[i])
    }
    data <- rbind(data, data.frame(Group = factor(rep(i, n_samples[i])), Value = data_group))
  }
  return(data)
}

Fungsi untuk melakukan uji ANOVA dan mengembalikan hasil

perform_anova <- function(data) {
  model <- lm(Value ~ Group, data = data)
  anova_result <- anova(model)
  p_value <- anova_result$`Pr(>F)`[1]
  return(p_value)
}

Inisialisasi data frame untuk menyimpan hasil simulasi

results <- data.frame(Sim_Number = integer(),
                      Sample_Size = integer(),
                      Violation = logical(),
                      P_Value = numeric())

Loop simulasi

for (sim in 1:n_simulations) {
  for (violation in c(FALSE, TRUE)) {
    for (sample_size in n_samples) {
      # Tentukan nilai mean dan sd untuk setiap kelompok
      mean_values <- c(0, 0, 0)  # Misalkan ini adalah mean yang sebenarnya untuk setiap kelompok
      sd_values <- c(1, 1, 1)  # Misalkan ini adalah standar deviasi yang sebenarnya untuk setiap kelompok
      
      # Bangkitkan data simulasi
      data <- generate_data(n_groups, rep(sample_size, n_groups), mean_values, sd_values, violation)
      
      # Lakukan uji ANOVA
      p_value <- perform_anova(data)
      
      # Simpan hasil simulasi
      results <- rbind(results, data.frame(Sim_Number = sim,
                                           Sample_Size = sample_size,
                                           Violation = violation,
                                           P_Value = p_value))
    }
  }
}

Plot hasil simulasi

plot1 <- ggplot(results, aes(x = Sample_Size, y = P_Value, color = factor(Violation))) +
  geom_point()
plot1

B. 50% sebaran chi-square (χ2(df=1) dan 50% sebaran chi-square (χ2(df=3)

Fungsi untuk menghasilkan data simulasi

set.seed(123)
generate_data <- function(n_groups, n_samples, mean_values, sd_values, violation) {
  data <- NULL
  for (i in 1:n_groups) {
    if (violation) {
      data_group <- rchisq(n_samples[i], df = 1)  # Menggunakan distribusi chi-square untuk melanggar asumsi sebaran normal
    } else {
      data_group <- rchisq(n_samples[i], df = 3)
    }
    data <- rbind(data, data.frame(Group = factor(rep(i, n_samples[i])), Value = data_group))
  }
  return(data)
}

Fungsi untuk melakukan uji ANOVA dan mengembalikan hasil

perform_anova <- function(data) {
  model <- lm(Value ~ Group, data = data)
  anova_result <- anova(model)
  p_value <- anova_result$`Pr(>F)`[1]
  return(p_value)
}

Inisialisasi data frame untuk menyimpan hasil simulasi

results <- data.frame(Sim_Number = integer(),
                      Sample_Size = integer(),
                      Violation = logical(),
                      P_Value = numeric())

Loop simulasi

for (sim in 1:n_simulations) {
  for (violation in c(FALSE, TRUE)) {
    for (sample_size in n_samples) {
      # Tentukan nilai mean dan sd untuk setiap kelompok
      mean_values <- c(0, 0, 0)  # Misalkan ini adalah mean yang sebenarnya untuk setiap kelompok
      sd_values <- c(1, 1, 1)  # Misalkan ini adalah standar deviasi yang sebenarnya untuk setiap kelompok
      
      # Bangkitkan data simulasi
      data <- generate_data(n_groups, rep(sample_size, n_groups), mean_values, sd_values, violation)
      
      # Lakukan uji ANOVA
      p_value <- perform_anova(data)
      
      # Simpan hasil simulasi
      results <- rbind(results, data.frame(Sim_Number = sim,
                                           Sample_Size = sample_size,
                                           Violation = violation,
                                           P_Value = p_value))
    }
  }
}

Plot hasil simulasi

plot2 <- ggplot(results, aes(x = Sample_Size, y = P_Value, color = factor(Violation))) +
  geom_point();plot2

SOAL 3 PEKAN 11

1. Membuat dataset

set.seed(123)
n <- 1000
X <- runif(n)
Y <- X^2 + 3 * X + rnorm(n, mean = 0, sd = 1) # x^2 + 3x + c

dataset <- data.frame(X,Y)

2. Melihat dataset

head(dataset)
##           X          Y
## 1 0.2875775 0.34354054
## 2 0.7883051 1.99264180
## 3 0.4089769 2.42097794
## 4 0.8830174 4.17983325
## 5 0.9404673 2.19671403
## 6 0.0455565 0.04359744

3. Membangun model regresi linier

model <- lm(Y ~ X, data = dataset)

4. Menghitung residual model

residual <- residuals(model)

5. Memeriksa asumsi ragam homogen

a. secara eksplorasi menggunakan plot residual

plot(dataset$X, residual, xlab = "X", ylab = "Residuals")
abline(h = 0, col = "red", lwd = 2)

### b. Uji Kehomogenan Ragam (Breusch-Pagan test)

bptest(model) #ragam menyebar normal
## 
##  studentized Breusch-Pagan test
## 
## data:  model
## BP = 0.81837, df = 1, p-value = 0.3657