#Membuat Distribusi Normal Acak Menggunakan rnorm()
set.seed(300) # Setting the seed for replication purposes

myData <- rnorm(2000,20,4.5) # Creating a random normal distribution (n=300, mean=20, sd=4.5)
length(myData) # How many observations?
## [1] 2000
mean(myData) # What is the mean?
## [1] 20.25773
sd(myData) # What is the standard deviation?
## [1] 4.590852
#Membuat Grafik untuk myData
# Membuat histogram dari myData
hist(myData, breaks = 30, col = "yellow", main = "Distribusi myData", xlab = "Nilai", ylab = "Frekuensi")

# Menambahkan garis vertikal untuk mean
abline(v = mean(myData), col = "red", lwd = 2, lty = 2)

# Menambahkan legenda
legend("topright", legend = paste("Mean =", round(mean(myData), 5)), col = "red", lty = 2, lwd = 2)

#Bootstrapping Menggunakan Fungsi Dasar R

#Resampling dari myData Sebanyak 1000 Kali Menggunakan for(i in x)
set.seed(200) # Setting the seed for replication purposes

sample.size <- 2000 # Sample size

n.samples <- 1000 # Number of bootstrap samples

bootstrap.results <- c() # Creating an empty vector to hold the results

for (i in 1:n.samples)
{
    obs <- sample(1:sample.size, replace=TRUE)
    bootstrap.results[i] <- mean(myData[obs]) # Mean of the bootstrap sample
}
length(bootstrap.results) # Sanity check: this should contain the mean of 1000 different samples
## [1] 1000
summary(bootstrap.results) # Sanity check
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   19.92   20.19   20.26   20.26   20.33   20.57
sd(bootstrap.results) # Checking the standard deviation of the distribution of means (this is what we are interested in!)
## [1] 0.1021229
par(mfrow=c(2,1), pin=c(3,0.98)) # Combining plots (2 rows, 1 column) and setting the plots size

hist(bootstrap.results, # Creating an histogram
     col="#d83737", # Changing the color
     xlab="Mean", # Giving a label to the x axis
     main=paste("Means of 1000 bootstrap samples from myData")) # Giving a title to the graph

hist(myData, # Creating an histogram
     col="#37aad8", # Changing the color
     xlab="Value", # Giving a label to the x axis
     main=paste("Distribution of myData")) # Giving a title to the graph

#Pengambilan sampel ulang sebanyak 1000 kali dari proses pembuatan data aktual menggunakan for(i in x)
  
set.seed(200) # Setting the seed for replication purposes

sample.size <- 2000 # Sample size

n.samples <- 1000 # Number of bootstrap samples

bootstrap.results <- c() # Creating an empty vector to hold the results

for (i in 1:n.samples)
{
    bootstrap.results[i] <- mean(rnorm(2000,20,4.5)) # Mean of the bootstrap sample
}
length(bootstrap.results) # Sanity check: this should contain the mean of 1000 different samples
## [1] 1000
summary(bootstrap.results) # Sanity check
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   19.64   19.93   20.00   20.00   20.07   20.32
sd(bootstrap.results) # Checking the standard deviation of the distribution of means (this is what we are interested in!)
## [1] 0.1041927
par(mfrow=c(2,1), pin=c(3,1)) # Combining plots (2 rows, 1 column) and setting the plots size

hist(bootstrap.results, # Creating an histogram
     col="#d83737", # Changing the color
     xlab="Mean", # Giving a label to the x axis
     main=paste("Means of 1000 bootstrap samples from the DGP")) # Giving a title to the graph

hist(myData, # Creating an histogram
     col="#37aad8", # Changing the color
     xlab="Value", # Giving a label to the x axis
     main=paste("Distribution of myData")) # Giving a title to the graph

# EXERCISES

Latihan 1

# 1. Menetapkan seed untuk replikasi
set.seed(150)

# 2. Menghasilkan distribusi normal acak (n=1000, mean=30, sd=2.5)
data_asli <- rnorm(1000, mean = 30, sd = 2.5)

# 3. Inisialisasi parameter bootstrap
n_bootstrap <- 50  # Jumlah sampel bootstrap
hasil_rata_rata <- c() # Vektor kosong untuk menyimpan hasil

# 4. Melakukan loop untuk resampling
for (i in 1:n_bootstrap) {
  # Mengambil sampel dengan pengembalian (replace=TRUE)
  sampel_temp <- sample(data_asli, size = length(data_asli), replace = TRUE)
  
  # Menyimpan rata-rata sampel ke dalam vektor
  hasil_rata_rata[i] <- mean(sampel_temp)
}

# Verifikasi hasil
length(hasil_rata_rata)
## [1] 50
summary(hasil_rata_rata)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   29.75   29.89   29.92   29.93   29.97   30.10

Latihan 2

# 1. Mengatur tata letak grafik (2 baris, 1 kolom)
par(mfrow = c(2, 1))

# 2. Histogram Distribusi Asli
hist(data_asli, 
     col = "skyblue", 
     main = "Distribusi Data Asli (n=1000)", 
     xlab = "Nilai", 
     border = "white")

# 3. Histogram Distribusi Rata-rata Bootstrap
hist(hasil_rata_rata, 
     col = "salmon", 
     main = "Distribusi Rata-rata dari 50 Sampel Bootstrap", 
     xlab = "Rata-rata Sampel", 
     border = "white")

# Mengembalikan pengaturan grafik ke default
par(mfrow = c(1, 1))