Final_Project

2.1 genNodes() function & acceptance-rejection sampling with plot

library(RSpectra)
library(MASS)
library(ggplot2)
#set.seed(123)

source("nodeDensity.R")
genNodes = function(n) {
  x<- numeric(0)
  y <- numeric(0)
  M = 4
  #define grid point, find max value of grid points, find max overall
  
  while (length(x) < n) {
    xn = runif(1, 0, 100)
    yn = runif(1, 0, 100)
    u = runif(1)
    if (u*M <= nodeDensity(xn, yn)) {
      x = c(x, xn)
      y = c(y, yn)
    }
  }
  if (length(x) > n) {
    x = x[-((n + 1) : length(x))]
    y = y[-((n + 1) : length(y))]
  }
nodes <- matrix(c(x, y), nrow = n, ncol = 2)
  return(nodes)
}

nodes1 = genNodes(10000)
ggplot(data.frame(x = nodes1[, 1], y = nodes1[, 2])) + geom_point(mapping = aes(x, y), alpha = 0.5) + xlab("x") + ylab("y") + labs(title = "Scatterplot of 10000 generated Nodes")

2.2 function findRc() & 3. function findTranMat(), function getEigen2(), function findRange(),

#finds the range of possible values to search for Rc
findRange = function(mat) {
  #  minimum and maximum values of the mat
  gr_row_min = max(apply(mat, 1, function(row) min(row[row > 0], na.rm = TRUE)))
 
  # i think this is wrong
  # gr_row_min = max(apply(mat, 1, min, na.rm = TRUE))
  sm_row_max = min(apply(mat, 1, max, na.rm = TRUE))
   # need each row of mat that computes mim value excluding 0, MAX OF MIN values. Has to be AT LEAST AS LARGE AS ROW GREATEST MIN
  #find the greatest row minimum in matrix
  L = gr_row_min
  U = sm_row_max
  
  #returns interval min and max
  return(c(L, U))
}

#finds transition matrix based on distance mat and R
findTranMat = function(mat, R) {
  P = matrix(0, nrow = nrow(mat), ncol = ncol(mat))
  #for each node 
  for (i in 1:nrow(mat)) {
    Si = which(mat[i, ] <= R)
        P[i, Si] = 1/length(Si)
  }
  # returns the transition matrix based on above two inputs
  return(P)
}


getEigen2 = function(mat) {
  # returns magnitutde of second largest eig value
  eigenvalues = eigen(mat)$values
    sorted_eigenvalues = sort(eigenvalues, decreasing = TRUE)
  
  # Returns numeric value mag of second largest eig of mat
  return(Mod(sorted_eigenvalues[2]))
}


# find rc function 
findRc = function(nodes, tol = 0.05) {
  mat = as.matrix(dist(nodes))
  range = findRange(mat)
  #  Rmin and Rmax
  Rmin = range[1]
  Rmax = range[2]
  
  R0_bar = (Rmin + Rmax) / 2
  
  #bisection method
  while(abs(Rmin - Rmax) > tol) {
      # transition matrix for R
      P = findTranMat(mat, R0_bar)
      # if R gives connected network
      #this from rspectra??? not sure if correct
      if (abs(getEigen2(P) - 1) > 1e-7 & getEigen2(P) < 1) {
        Rmax = R0_bar
      } else {
        Rmin = R0_bar
      }
      
      R0_bar = (Rmin + Rmax) / 2
  }
  return(Rmax)
}

sample_size = c(50, 100, 150)
network = 1000

Rc_val = lapply(sample_size, function(n) {
  replicate(1000, {
    set.seed(n)
    nodes = genNodes(n)
    findRc(nodes, tol = 0.0001)
  })
})

density plot with all sample sizes

library(tidyr)
library(ggplot2)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:MASS':
## 
##     select

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

sample_size2 = c(50, 100, 150, 200, 250)
networks2 = 50



Rc_val2 = matrix(0, nrow = length(sample_size2), ncol = networks2)
seed_list2 = c(1:networks2)
for (i in 1:length(sample_size2)) {
  n = sample_size2[i]
  for (seed in seed_list2) {
    set.seed(seed)
    nodes = genNodes(n)
    Rc_val2[i, seed] = findRc(nodes)
  }
}

seed_list2 = c(1:networks2)

Rc_df2 = data.frame(Rc = as.vector(t(Rc_val2)), 
                    sample_size2 = rep(sample_size2, each = ncol(Rc_val2)),
                    seed = rep(seed_list2, times = nrow(Rc_val2)))

Rc_df2$sample_size2 = as.factor(Rc_df2$sample_size2)


ggplot(Rc_df2, aes(x = Rc, color = sample_size2)) +
  geom_density() +
  labs(x = "Rc Value", y = "Density", color = "Sample Size", 
       title = "Density plot of Rc values for different sample sizes")

3a The Rc with smaller radius changes with node configurations because they are more spread out than the ones that are higher. When i look at the node network with minimum values corresponding to Rc they are more spread out across the X and Y and have various small concentrations, rather than one conentration in the middle or many at once. 3b For all sample sizes, they are not symmetric mostly and most of htem are right skewed. For the smalles sample size of 50, it is more symmetric than the others, but a little bit has a longer right tail. ALl of them have a longer right tail. The biggest sample size is the most right skewed, and it is also the highest density and heavy-tailed. It is the most heavy tailed. Each one as the sample size decreases becomes a little bit more symmetric and less right-skewed, and also less heavy-tailed. They are not multimodal because they mostly all only have one peak.

seed_list = c(1:network)

Rc_val_list = lapply(seed_list, function(seed) {
  set.seed(seed)
  nodes = genNodes(50)
  findRc(nodes)
})

Rc_val50 = do.call(rbind, Rc_val_list)


mean_Rc = mean(Rc_val50)
median_Rc = median(Rc_val50)
max_Rc = max(Rc_val50)
min_Rc = min(Rc_val50)

median_seed <- which.min(abs(Rc_val50 - median_Rc))
mean_seed <- which.min(abs(Rc_val50 - mean_Rc))

max_seed <- which(Rc_val50 == max_Rc)
min_seed <- which(Rc_val50 == min_Rc)


set.seed(max_seed)

max_network = genNodes(50)

df_max = data.frame()
nodes = 50
for (i in 1:(nrow(max_network)-1)) {
  for (j in (i + 1):nrow(max_network)) {
    if(sqrt(sum((max_network[i, ] - max_network[j, ])^ 2)) <= max_Rc) {
      df1 = data.frame(x = max_network[i, 1], y = max_network[i, 2], xend = max_network[j, 1], yend = max_network[j, 2])
      df_max = rbind(df_max, df1)
      
    }
  }
  
}

plot_max <- ggplot(df_max, aes(x = x, y = y)) +
  geom_segment(aes(xend = xend, yend = yend)) +
  geom_point(color = "blue") +
  labs(x = "X", y = "Y", title = "Node Network Corresponding to maximum values of Rc: 37.80386")

plot_max

set.seed(mean_seed)
mean_network = genNodes(50)


df_mean = data.frame()
nodes = 50
for (i in 1:(nrow(mean_network)-1)) {
  for (j in (i + 1):nrow(mean_network)) {
    if(sqrt(sum((mean_network[i, ] - mean_network[j, ])^ 2)) <= mean_Rc) {
      df2 = data.frame(x = mean_network[i, 1], y = mean_network[i, 2], xend = mean_network[j, 1], yend = mean_network[j, 2])
      df_mean = rbind(df_mean, df2)
      
    }
  }
  
}


plot_mean <- ggplot(df_mean, aes(x = x, y = y)) +
  geom_segment(aes(xend = xend, yend = yend)) +
  geom_point(color = "red") +
  labs(x = "X", y = "Y", title = "Node Network Corresponding to mean values of Rc: 22.8528")


plot_mean

set.seed(median_seed)
median_network = genNodes(50)


df_med = data.frame()
nodes = 50
for (i in 1:(nrow(median_network)-1)) {
  for (j in (i + 1):nrow(median_network)) {
    if(sqrt(sum((median_network[i, ] - median_network[j, ])^ 2)) <= median_Rc) {
      df3 = data.frame(x = median_network[i, 1], y = median_network[i, 2], xend = median_network[j, 1], yend = median_network[j, 2])
      df_med = rbind(df_med, df3)
      
    }
  }
  
}

plot_med <- ggplot(df_med, aes(x = x, y = y)) +
  geom_segment(aes(xend = xend, yend = yend)) +
  geom_point(color = "purple") +
  labs(x = "X", y = "Y", title = "Node Network Corresponding to median values of Rc:22.24529 ")

plot_med

set.seed(min_seed)
min_network = genNodes(50)


df_min = data.frame()
nodes = 50
for (i in 1:(nrow(min_network)-1)) {
  for (j in (i + 1):nrow(min_network)) {
    if(sqrt(sum((min_network[i, ] - min_network[j, ])^ 2)) <= min_Rc) {
      df4 = data.frame(x = min_network[i, 1], y = min_network[i, 2], xend = min_network[j, 1], yend = min_network[j, 2])
      df_min = rbind(df_min, df4)
      
    }
  }
  
}

plot_min <- ggplot(df_min, aes(x = x, y = y)) +
  geom_segment(aes(xend = xend, yend = yend)) +
  geom_point(color = "yellow") +
  labs(x = "X", y = "Y", title = "Node Network Corresponding to minimum values of Rc: 14.75555")

plot_min

df_max$category <- "Max"
df_mean$category <- "Mean"
df_med$category <- "Median"
df_min$category <- "Min"

df_all <- rbind(df_max, df_mean, df_med, df_min)

df_all$category <- as.factor(df_all$category)

ggplot(df_all, aes(x = x, y = y, color = category)) +
  geom_segment(aes(xend = xend, yend = yend)) +
  geom_point() +
  labs(x = "X", y = "Y", title = "Combined Node Network") +
  scale_color_discrete(name = "Category")

Final_Project

Phyllis Chen

2024-03-13