Data simualtion

Simulation

You can also embed plots, for example:

First we set up the data and main function.

lin_model <- function(beta0, beta1, beta2, beta3, n, z, var_i){
  ind = 
    data.frame(
      i = 1:n,
      ei = rnorm(length(1:n), mean=0, sd=var_i),
      female = c(rep(1, z), rep(0, 10-z)) %>% rep(.,100),
      age = rnorm(length(1:n), mean=46, sd=13)
    )
  
  ind <- ind %>% mutate(height = beta0 + beta1 * age + beta2 * female + beta3 *age*female + ei)
  
  model <- lm(height ~ female + age + female*age, data=ind)
  betahat <- model$coefficients[4]
  betahat <- unname(betahat)
  return(list("betahat"=betahat))
}

Then, we define the parameter space

# define fixed parameter grids:
  n_grid<-c(1000)
  var_i_grid<-c(1)
  beta0_grid<-c(10)
  beta2_grid<-c(-6)
  beta3_grid<-c(-0.5)
  gamma_grid<-c(0)
# beta_1 is the parameter that we will let vary to see if changes beta_3 so we specify a grid for beta1 
  beta1_grid<-c(0,5,10)
  z_grid <-c(3,5,8)
  
param_list=list("n"=n_grid, "z"=z_grid, "var_i"=var_i_grid, "beta0"=beta0_grid, "beta1"=beta1_grid, "beta2"=beta2_grid, "beta3"=beta3_grid)

Running simulation

mc_result <- MonteCarlo(func = lin_model, nrep = 500, param_list = param_list)

## Grid of  9  parameter constellations to be evaluated. 
##  
## Progress: 
##  
## 
  |                                                                       
  |                                                                 |   0%

## Warning in searchCommandline(parallel, cpus = cpus, type = type,
## socketHosts = socketHosts, : Unknown option on commandline:
## rmarkdown::render('/Users/yab/Documents/GitHub/Micro/
## Simulations.Rmd',~+~~+~encoding~+~

## 
  |                                                                       
  |=======                                                          |  11%

## Warning in searchCommandline(parallel, cpus = cpus, type = type,
## socketHosts = socketHosts, : Unknown option on commandline:
## rmarkdown::render('/Users/yab/Documents/GitHub/Micro/
## Simulations.Rmd',~+~~+~encoding~+~

## 
  |                                                                       
  |==============                                                   |  22%

## Warning in searchCommandline(parallel, cpus = cpus, type = type,
## socketHosts = socketHosts, : Unknown option on commandline:
## rmarkdown::render('/Users/yab/Documents/GitHub/Micro/
## Simulations.Rmd',~+~~+~encoding~+~

## 
  |                                                                       
  |======================                                           |  33%

## Warning in searchCommandline(parallel, cpus = cpus, type = type,
## socketHosts = socketHosts, : Unknown option on commandline:
## rmarkdown::render('/Users/yab/Documents/GitHub/Micro/
## Simulations.Rmd',~+~~+~encoding~+~

## 
  |                                                                       
  |=============================                                    |  44%

## Warning in searchCommandline(parallel, cpus = cpus, type = type,
## socketHosts = socketHosts, : Unknown option on commandline:
## rmarkdown::render('/Users/yab/Documents/GitHub/Micro/
## Simulations.Rmd',~+~~+~encoding~+~

## 
  |                                                                       
  |====================================                             |  56%

## Warning in searchCommandline(parallel, cpus = cpus, type = type,
## socketHosts = socketHosts, : Unknown option on commandline:
## rmarkdown::render('/Users/yab/Documents/GitHub/Micro/
## Simulations.Rmd',~+~~+~encoding~+~

## 
  |                                                                       
  |===========================================                      |  67%

## Warning in searchCommandline(parallel, cpus = cpus, type = type,
## socketHosts = socketHosts, : Unknown option on commandline:
## rmarkdown::render('/Users/yab/Documents/GitHub/Micro/
## Simulations.Rmd',~+~~+~encoding~+~

## 
  |                                                                       
  |===================================================              |  78%

## Warning in searchCommandline(parallel, cpus = cpus, type = type,
## socketHosts = socketHosts, : Unknown option on commandline:
## rmarkdown::render('/Users/yab/Documents/GitHub/Micro/
## Simulations.Rmd',~+~~+~encoding~+~

## 
  |                                                                       
  |==========================================================       |  89%

## Warning in searchCommandline(parallel, cpus = cpus, type = type,
## socketHosts = socketHosts, : Unknown option on commandline:
## rmarkdown::render('/Users/yab/Documents/GitHub/Micro/
## Simulations.Rmd',~+~~+~encoding~+~

## 
  |                                                                       
  |=================================================================| 100%
##

Getting a summary of results

summary(mc_result)

## Simulation of function: 
## 
## function(beta0, beta1, beta2, beta3, n, z, var_i){
##   ind = 
##     data.frame(
##       i = 1:n,
##       ei = rnorm(length(1:n), mean=0, sd=var_i),
##       female = c(rep(1, z), rep(0, 10-z)) %>% rep(.,100),
##       age = rnorm(length(1:n), mean=46, sd=13)
##     )
##   
##   ind <- ind %>% mutate(height = beta0 + beta1 * age + beta2 * female + beta3 *age*female + ei)
##   
##   model <- lm(height ~ female + age + female*age, data=ind)
##   betahat <- model$coefficients[4]
##   betahat <- unname(betahat)
##   return(list("betahat"=betahat))
## }
## <bytecode: 0x7fce03a88af8>
## 
## Required time: 10.54 secs for nrep = 500  repetitions on 1 CPUs 
## 
## Parameter grid: 
## 
##  beta0 : 10 
##  beta1 : 0 5 10 
##  beta2 : -6 
##  beta3 : -0.5 
##      n : 1000 
##      z : 3 5 8 
##  var_i : 1 
## 
##  
## 1 output arrays of dimensions: 1 3 1 1 1 3 1 500

  df.mc<-MakeFrame(mc_result)
  head(df.mc)

##   beta0 beta1 beta2 beta3    n z var_i    betahat
## 1    10     0    -6  -0.5 1000 3     1 -0.5087816
## 2    10     5    -6  -0.5 1000 3     1 -0.5069446
## 3    10    10    -6  -0.5 1000 3     1 -0.4882312
## 4    10     0    -6  -0.5 1000 5     1 -0.5049646
## 5    10     5    -6  -0.5 1000 5     1 -0.4978548
## 6    10    10    -6  -0.5 1000 5     1 -0.4965313

For the three different levels of beta1, the point estimate of betahat varies only marginally. We must however test the significance of the difference to assert that there is no incidence.

Creating a graph

class(df.mc$z)

## [1] "numeric"

df.mc$z <- as.factor(df.mc$z)
ggplot(df.mc, aes(x=betahat, color=z)) + 
  geom_density() +
  labs(color = TeX("z=")) + 
  labs(title = TeX("Distribution of the $\\beta_3$ estimate for different levels of gender mixity"),
       caption = paste("Source: ",mc_result$meta$nrep," simulated samples of n=",n_grid,", with true coefficient of interest = -0.5")) +
  theme_gw

cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
  ggplot(df.mc, aes(y=betahat, x=z, color=z)) + geom_boxplot() +
  geom_hline(aes(yintercept=mc_result$param_list$beta3), color="black", linetype="solid", size=.5) +
  labs(color = TeX("$z =$")) + scale_fill_manual(values=cbPalette) +
  labs(title = TeX("Estimates of $\\beta_3$ for different proportions of gender mixity"),
      subtitle = TeX("The estimates appear very comparable in terms of mean, but the precision varies a little"),
       caption = paste("Source: ",mc_result$meta$nrep," simulated samples of n=",n_grid,", with true coefficient of interest = -0.5")) +
  theme_gw

This is not the graph I am trying to get, and I don’t know why.

##Part b: Letting gamma be different than 0.

lin_model_2 <- function(beta0, beta1, beta2, beta3, gamma, n, z, var_i, var_x, mean_x){
  ind2 = 
    data.frame(
      i = 1:n,
      ei = rnorm(length(1:n), mean=0, sd=var_i),
      female = c(rep(1, z), rep(0, 10-z)) %>% rep(.,100),
      age = rnorm(length(1:n), mean=46, sd=13),
      xi = rnorm(length(1:n), mean=mean_x, sd=var_x)
    )
  
  ind2 <- ind2 %>% mutate(height = beta0 + beta1 * age + beta2 * female + beta3 *age*female + gamma*xi + ei)
  
  model.2 <- lm(height ~ age + female + female*age + xi, data=ind2)
  betahat <- model.2$coefficients[5]
  betahat <- unname(betahat)
  return(list("betahat"=betahat))
}

# define fixed parameter grids:
  n_grid<-c(1000)
  var_i_grid<-c(1)
  beta0_grid<-c(10)
  beta2_grid<-c(-6)
  beta3_grid<-c(-0.5)
  gamma_grid<-c(0)
  beta1_grid<-c(10)
  z_grid <-c(5)
  mean_x_grid<-c(0)
# changing properties of x 
  var_x_grid<-c(1,10,20)
  

  
param_list_2=list("n"=n_grid, "z"=z_grid, "var_i"=var_i_grid, "beta0"=beta0_grid, "beta1"=beta1_grid, "beta2"=beta2_grid, "beta3"=beta3_grid, "var_x"=var_x_grid, "gamma"=gamma_grid, "mean_x"=mean_x_grid)

mc_result_2 <- MonteCarlo(func = lin_model_2, nrep = 500, param_list = param_list_2)

## Grid of  3  parameter constellations to be evaluated. 
##  
## Progress: 
##  
## 
  |                                                                       
  |                                                                 |   0%

## Warning in searchCommandline(parallel, cpus = cpus, type = type,
## socketHosts = socketHosts, : Unknown option on commandline:
## rmarkdown::render('/Users/yab/Documents/GitHub/Micro/
## Simulations.Rmd',~+~~+~encoding~+~

## 
  |                                                                       
  |======================                                           |  33%

## Warning in searchCommandline(parallel, cpus = cpus, type = type,
## socketHosts = socketHosts, : Unknown option on commandline:
## rmarkdown::render('/Users/yab/Documents/GitHub/Micro/
## Simulations.Rmd',~+~~+~encoding~+~

## 
  |                                                                       
  |===========================================                      |  67%

## Warning in searchCommandline(parallel, cpus = cpus, type = type,
## socketHosts = socketHosts, : Unknown option on commandline:
## rmarkdown::render('/Users/yab/Documents/GitHub/Micro/
## Simulations.Rmd',~+~~+~encoding~+~

## 
  |                                                                       
  |=================================================================| 100%
##

summary(mc_result_2)

## Simulation of function: 
## 
## function(beta0, beta1, beta2, beta3, gamma, n, z, var_i, var_x, mean_x){
##   ind2 = 
##     data.frame(
##       i = 1:n,
##       ei = rnorm(length(1:n), mean=0, sd=var_i),
##       female = c(rep(1, z), rep(0, 10-z)) %>% rep(.,100),
##       age = rnorm(length(1:n), mean=46, sd=13),
##       xi = rnorm(length(1:n), mean=mean_x, sd=var_x)
##     )
##   
##   ind2 <- ind2 %>% mutate(height = beta0 + beta1 * age + beta2 * female + beta3 *age*female + gamma*xi + ei)
##   
##   model.2 <- lm(height ~ age + female + female*age + xi, data=ind2)
##   betahat <- model.2$coefficients[5]
##   betahat <- unname(betahat)
##   return(list("betahat"=betahat))
## }
## <bytecode: 0x7fce0d102f90>
## 
## Required time: 3.89 secs for nrep = 500  repetitions on 1 CPUs 
## 
## Parameter grid: 
## 
##   beta0 : 10 
##   beta1 : 10 
##   beta2 : -6 
##   beta3 : -0.5 
##   gamma : 0 
##       n : 1000 
##       z : 5 
##   var_i : 1 
##   var_x : 1 10 20 
##  mean_x : 0 
## 
##  
## 1 output arrays of dimensions: 1 1 1 1 1 1 1 1 3 1 500

  df.mc.2<-MakeFrame(mc_result_2)
  head(df.mc.2)

##   beta0 beta1 beta2 beta3 gamma    n z var_i var_x mean_x    betahat
## 1    10    10    -6  -0.5     0 1000 5     1     1      0 -0.4940920
## 2    10    10    -6  -0.5     0 1000 5     1    10      0 -0.5029216
## 3    10    10    -6  -0.5     0 1000 5     1    20      0 -0.4998691
## 4    10    10    -6  -0.5     0 1000 5     1     1      0 -0.5048641
## 5    10    10    -6  -0.5     0 1000 5     1    10      0 -0.5065868
## 6    10    10    -6  -0.5     0 1000 5     1    20      0 -0.5017138

class(df.mc.2$var_x)

## [1] "numeric"

df.mc.2$var_x <- as.factor(df.mc.2$var_x)
  ggplot(df.mc.2, aes(y=betahat, x=var_x, color=var_x)) + geom_boxplot() +
  geom_hline(aes(yintercept=mc_result$param_list$beta3), color="black", linetype="solid", size=.5) +
  labs(color = TeX("$var_x =$")) + scale_fill_manual(values=cbPalette) +
  labs(title = TeX("Estimation of $\\beta_3$ for different variances of X"),
      subtitle = TeX("$\\gamma = 0$ changes in properties of X are not relevant to estimation of $\\beta_3$"),
       caption = paste("Source: ",mc_result$meta$nrep," simulated samples of n=",n_grid,", with true coefficient of interest = -0.5")) +
  theme_gw

This result is expectable because X is here built to be irrelevant to the model, therefore no bias is introduced.

Data simualtion

Y.A.B.

2/26/2019

Preamble

Simulation