set.seed(300) # Setting the seed for replication purposes
myData <- rnorm(2000,20,4.5) # Creating a random normal distribution (n=300, mean=20, sd=4.5)
length(myData) # How many observations?
## [1] 2000
mean(myData) # What is the mean?
## [1] 20.25773
sd(myData) # What is the standard deviation?
## [1] 4.590852
# Membuat histogram dari myData
hist(myData, breaks = 30, col = "lightblue", main = "Distribusi myData", xlab = "Nilai", ylab
= "Frekuensi")
# Menambahkan garis vertikal untuk mean
abline(v = mean(myData), col = "red", lwd = 2, lty = 2)
# Menambahkan legenda
legend("topright", legend = paste("Mean =", round(mean(myData), 5)), col = "red", lty = 2, lwd = 2)
set.seed(200) # Setting the seed for replication purposes
sample.size <- 2000 # Sample size
n.samples <- 1000 # Number of bootstrap samples
bootstrap.results <- c() # Creating an empty vector to hold the results
for (i in 1:n.samples)
{
obs <- sample(1:sample.size, replace=TRUE)
bootstrap.results[i] <- mean(myData[obs]) # Mean of the bootstrap sample
}
length(bootstrap.results) # Sanity check: this should contain the mean of 1000 different samples
## [1] 1000
summary(bootstrap.results) # Sanity check
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 19.92 20.19 20.26 20.26 20.33 20.57
sd(bootstrap.results) # Checking the standard deviation of the distribution of means (this is what we are interested in!)
## [1] 0.1021229
par(mfrow=c(2,1), pin=c(5.8,0.98)) # Combining plots (2 rows, 1 column) and setting the plots size
hist(bootstrap.results, # Creating an histogram
col="red", # Changing the color
xlab="Mean", # Giving a label to the x axis
main=paste("Means of 1000 bootstrap samples from myData")) # Giving a title to the graph
hist(myData, # Creating an histogram
col="magenta", # Changing the color
xlab="Value", # Giving a label to the x axis
main=paste("Distribution of myData")) # Giving a title to the graph
set.seed(200) # Setting the seed for replication purposes
sample.size <- 2000 # Sample size
n.samples <- 1000 # Number of bootstrap samples
bootstrap.results <- c() # Creating an empty vector to hold the results
for (i in 1:n.samples)
{
bootstrap.results[i] <- mean(rnorm(2000,20,4.5)) # Mean of the bootstrap sample
}
length(bootstrap.results) # Sanity check: this should contain the mean of 1000 different samples
## [1] 1000
summary(bootstrap.results) # Sanity check
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 19.64 19.93 20.00 20.00 20.07 20.32
sd(bootstrap.results) # Checking the standard deviation of the distribution of means (this is what we are interested in!)
## [1] 0.1041927
par(mfrow=c(2,1), pin=c(5.8,0.98)) # Combining plots (2 rows, 1 column) and setting the plots size
hist(bootstrap.results, # Creating an histogram
col="#d83737", # Changing the color
xlab="Mean", # Giving a label to the x axis
main=paste("Means of 1000 bootstrap samples from the DGP")) # Giving a title to the graph
hist(myData, # Creating an histogram
col="#37aad8", # Changing the color
xlab="Value", # Giving a label to the x axis
main=paste("Distribution of myData")) # Giving a title to the graph
Latihan 1
Tetapkan benih Anda pada angka 150. Hasilkan distribusi normal acak dari 1000 observasi, dengan rata-rata 30 dan simpangan baku 2,5. Hitung rata-rata dari 50 sampel dari 1000 observasi dari kumpulan data tersebut. Simpan hasil Anda dalam vektor.
Fungsi yang relevan: set.seed(), rnorm(), for(i in x), sample().
Answer:
set.seed(150)
data <- rnorm(1000, mean = 30, sd = 2.5)
sample_means <- c()
for (i in 1:50) {
sample_i <- sample(data, size = 50, replace = FALSE)
sample_means[i] <- mean(sample_i)
}
sample_means
## [1] 29.88923 29.12852 29.96403 30.28330 29.42555 30.13464 29.83227 29.61958
## [9] 30.36293 29.70179 30.39229 29.90397 29.45825 29.69616 30.47237 29.73355
## [17] 29.83181 30.24189 30.10936 30.43929 29.87055 30.00566 29.53994 30.10614
## [25] 30.37129 29.70682 29.74007 30.11853 29.86817 30.03286 30.09281 30.18419
## [33] 29.60297 29.83622 30.02769 30.69890 29.68912 29.64516 29.05322 29.63437
## [41] 30.41860 29.68675 29.47005 29.46992 29.60059 29.92465 29.71743 29.91258
## [49] 29.72997 30.05085
Latihan 2
Hasilkan dua histogram untuk menampilkan secara grafis distribusi rata-rata yang diperoleh dalam Latihan 1 serta nilai dari 1000 observasi dalam kumpulan data asli Anda. Gabungkan histogram ini menjadi satu grafik keseluruhan.
Fungsi yang relevan: par(), hist().
Answer:
set.seed(150)
data <- rnorm(1000, mean = 30, sd = 2.5)
sample_means <- c()
for (i in 1:50) {
sample_i <- sample(data, size = 50, replace = FALSE)
sample_means[i] <- mean(sample_i)
}
par(mfrow = c(1, 2)) # 1 baris, 2 kolom
hist(data,
main = "Distribusi Data Asli",
xlab = "Nilai",
col = "skyblue",
border = "blue")
# Histogram rata-rata sampel
hist(sample_means,
main = "Distribusi Rata-rata Sampel",
xlab = "Nilai Rata-rata",
col = "salmon",
border = "red")