Question 05

library(dplyr)
library(ggplot2)

If d is the minimum distance between any pair of n uniformly distributed points from a unit square, then \(n(n-1)d^2\) ~ \(Exp(\frac{2}{\pi})\) provided that n is sufficiently large. Using R to check this result: First, write a function to produce n points uniformly distributed from the unit square. Then, write a function to calculate the smallest distance between any pair of n points. Change the value of n, perform simulation, and comment on what you find. ## Function design

#取最小值函數
d_fun <- function(B){
  d = NULL
  for(i in 1:length(B)/2){
    d[i] <- sum((A[B[1,i],]-A[B[2,i],])^2)}
  d <- min(d)
}

n=20

#計算過程
set.seed(123)
o= Sys.time()
dd = NULL
pp = NULL
ddd =NULL
N=1000 #模擬一千個d值
n=20   #有20個點
#生成100個P值
for(k in 1:100){
for (j in 1:N) {
  A <- cbind(runif(n,0,1),runif(n,0,1))
  B <- combn(c(1:n),2)
  dd[j] <- n*(n-1)*d_fun(B)
}
a <- ks.test(dd,"pexp",pi/2)$p.value
ddd <- c(ddd,dd)
pp=c(pp,a)
}
#繪出所有模擬的統計量
ddd <- as.numeric(ddd)%>% as.data.frame()
ggplot(ddd,aes(ddd)) +
  geom_histogram(aes(y = ..density..), binwidth = 0.2)+
  labs(x="n(n-1)d^2",title="Simulation of n(n-1)d^2 with 100*1000 times ")
## Don't know how to automatically pick scale for object of type data.frame. Defaulting to continuous.

#繪出模擬100個P值散佈圖
mean(pp)
## [1] 0.5226788
sum(pp>0.05)*1/100
## [1] 0.92
pp <- as.data.frame(pp)
ggplot(pp,aes(y=pp),ylab="P值")+
  geom_point(aes(x = 1:100))+
  geom_hline(yintercept=0.05,color="red")+
  labs(x="",y="P_value",title="Simulation of P with 100 times ")

#與指數分配圖對照
ex <- rexp(N,pi/2) %>% as.data.frame()
ggplot(ex,aes(ex))+
geom_histogram(aes(y=..density..),fill="skyblue",binwidth = 0.2)
## Don't know how to automatically pick scale for object of type data.frame. Defaulting to continuous.

Sys.time()-o
## Time difference of 2.46291 mins