Theme Song



1 Setting

1.1 SCSS Setup

# install.packages("remotes")
require('BBmisc')
#remotes::install_github("rstudio/sass")
lib('sass')
## sass 
## TRUE
/* https://stackoverflow.com/a/66029010/3806250 */
h1 { color: #002C54; }
h2 { color: #2F496E; }
h3 { color: #375E97; }

/* ----------------------------------------------------------------- */
/* https://gist.github.com/himynameisdave/c7a7ed14500d29e58149#file-broken-gradient-animation-less */
.hover01 {
  /* color: #FFD64D; */
  background: linear-gradient(155deg, #EDAE01 0%, #FFEB94 100%);
  transition: all 0.45s;
  &:hover{
    background: linear-gradient(155deg, #EDAE01 20%, #FFEB94 80%);
    }
  }

.hover02 {
  color: #FFD64D;
  background: linear-gradient(155deg, #002C54 0%, #4CB5F5 100%);
  transition: all 0.45s;
  &:hover{
    background: linear-gradient(155deg, #002C54 20%, #4CB5F5 80%);
    }
  }

.hover03 {
  color: #FFD64D;
  background: linear-gradient(155deg, #A10115 0%, #FF3C5C 100%);
  transition: all 0.45s;
  &:hover{
    background: linear-gradient(155deg, #A10115 20%, #FF3C5C 80%);
    }
  }
## https://stackoverflow.com/a/36846793/3806250
options(width = 999)
knitr::opts_chunk$set(class.source = 'hover01', class.output = 'hover02', class.error = 'hover03')



1.2 Setup

if(!suppressPackageStartupMessages(require('BBmisc'))) {
  install.packages('BBmisc', dependencies = TRUE, INSTALL_opts = '--no-lock')
}
suppressPackageStartupMessages(require('BBmisc'))
# suppressPackageStartupMessages(require('rmsfuns'))

pkgs <- c('devtools', 'knitr', 'kableExtra', 'tidyr', 
          'readr', 'lubridate', 'data.table', 'reprex', 
          'timetk', 'plyr', 'dplyr', 'stringr', 'magrittr', 
          'tdplyr', 'tidyverse', 'formattable', 
          'echarts4r', 'paletteer')

suppressAll(lib(pkgs))
# load_pkg(pkgs)

## Set the timezone but not change the datetime
Sys.setenv(TZ = 'Asia/Tokyo')
## options(knitr.table.format = 'html') will set all kableExtra tables to be 'html', otherwise need to set the parameter on every single table.
options(warn = -1, knitr.table.format = 'html')#, digits.secs = 6)

## https://stackoverflow.com/questions/39417003/long-vectors-not-supported-yet-abnor-in-rmd-but-not-in-r-script
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
#, 
                      #cache = TRUE, cache.lazy = FALSE)

rm(pkgs)



2 受講生によるテストの実施:Simulating from a Mixture Model

2.1 説明

Consider the code provided in the lesson “Sample code for simulating from a Mixture Model”:

# Sample code for simulating from a Mixture Model
# Generate n observations from a mixture of two Gaussian 
# distributions
n     = 50           # Size of the sample to be generated
w     = c(0.6, 0.4)  # Weights
mu    = c(0, 5)      # Means
sigma = c(1, 2)      # Standard deviations
cc    = sample(1:2, n, replace=T, prob=w)
x     = rnorm(n, mu[cc], sigma[cc])

# Plot f(x) along with the observations 
# just sampled
xx = seq(-5, 12, length=200)
yy = w[1]*dnorm(xx, mu[1], sigma[1]) + 
  w[2]*dnorm(xx, mu[2], sigma[2])
par(mar=c(4,4,1,1)+0.1)
plot(xx, yy, type="l", ylab="Density", xlab="x", las=1, lwd=2)
points(x, y=rep(0,n), pch=1)

Source : Coursera-Bayesian-Statistics-Mixture-Models/Week1 Sample Code for Sim Mix Model.R

Modify the code above to sample 200 random numbers from a mixture of 3 Poisson distributions with means 1, 2 and 6 and weights 0.7, 0.2 and 0.1, respectively, and generate a barplot with the empirical frequencies of all the integers included in the sample.

2.1.1 Review criteria

The response should follow the same template as the sample code provided above. Peer reviewers will be asked to check whether the different pieces of code have been adequately modified to reflect the fact that (1) you are working now with 3 components rather than 2, (2) that the components of the mixture are Poisson distributions rather than Gaussians, (3) that the empirical frequencies associated with the sample generated are correctly computed, (4) that a barplot is used to represent the empirical frequencies.

2.2 自分の提出物

2.2.1 Assignment

n     = 200               # Size of the sample to be generated
w     = c(0.7, 0.2, 0.1)  # Weights
lamba = c(1, 2, 6)        # Means
cc    = sample(1:3, n, replace=TRUE, prob=w)
x     = rpois(n, lamba[cc])
empfreq = table(factor(x, levels=seq(0, max(x))))/n
#barplot(empfreq)

e_common(
  font_family = "Raleway",
  theme = NULL
)

tibble(no = 1:nrow(empfreq), value = empfreq) |>
  e_charts(no) |>
   e_theme('dark-mushroom') |>
  e_bar(value, stack = 'grp') |>
  e_line(value, stack = 'grp2') |>
   e_title('Timeline', 'Plot', sublink = 'https://echarts4r.john-coene.com/reference/e_bar.html')

2.2.2 Marking

Have the parameters of the mixture (both weights and component-specific parameters) been correctly specified?

The lines

n     = 50
w     = c(0.6, 0.4)  # Weights
mu    = c(0, 5)      # Means
sigma = c(1, 2)      # Standard deviations

in the original code should have been replaced by something like

n      = 200
w      = c(0.7, 0.2, 0.1)  # Weights
lambda = c(1, 2, 6)        # Means
  • 0点 No
  • 1点 Yes

Is the sampling of the indicator variables done correctly?

The line

cc    = sample(1:2, n, replace=T, prob=w)
## Error in sample.int(length(x), size, replace, prob): incorrect number of probabilities
cc
##   [1] 1 1 1 1 1 2 1 1 1 3 1 3 1 1 2 1 3 1 1 2 1 1 1 1 1 1 3 1 2 3 1 2 1 1 1 1 3 1 2 1 1 1 1 3 1 1 3 2 1 1 2 1 1 1 3 1 3 1 1 1 2 2 1 1 3 2 3 2 1 2 2 2 1 3 1 1 1 1 2 1 1 2 1 1 1 2 1 2 1 1 3 1 1 1 1 1 2 1 1 1 1 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 3 1 2 1 1 2 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 2 2 1 1 1 1 1 2 1 2 1 1 1 1 1 1 2 1 1 3 3 2 1 1 1 1 1 1 1 1 1 1 2 1 1 3 2 2 1 1 1 2

in the original code should now be something like

cc    = sample(1:3, n, replace=T, prob=w)
cc
##   [1] 1 2 1 1 1 2 2 1 1 1 1 2 1 1 1 1 1 2 1 1 1 1 1 1 2 1 2 1 3 1 2 2 2 2 1 1 1 2 1 1 2 1 1 1 1 1 1 2 2 1 2 1 1 3 1 2 2 1 3 1 2 1 1 2 1 1 2 2 1 1 1 2 1 1 1 1 1 2 2 2 1 1 2 1 1 1 1 1 1 1 1 2 2 1 3 1 1 1 1 1 1 1 1 1 1 1 2 1 1 3 1 1 3 1 1 1 2 1 1 1 2 1 1 1 2 1 1 2 1 1 1 1 2 1 1 2 3 3 1 1 1 3 1 1 1 1 1 1 1 1 2 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 1 2 3 1 1 1 1 1 3 1 3 2 1 1 1 1 1 1 1 1 1 1 2 3 1 2 1 1 1 1 1 2 1 2

to reflect the fact that we now have 3 components in the mixture.

  • 0点 No
  • 1点 Yes

Assuming that the indicator variables have been correctly sampled, are the observations being sampled correctly?

The line

x     = rnorm(n, mu[cc], sigma[cc])
x
##   [1]  0.212872872  5.921317560  0.875968855 -0.515876648 -0.162261507  8.250833869  7.524761742 -0.848905082  0.326508711 -3.339498394 -0.386554579  3.140937174 -0.260173822  0.636804314  0.972256069  0.495329621 -1.699295125  3.704314409  0.027854992  0.439442362 -0.626504712 -0.786550630 -0.274770177  0.659614199  6.887070722  0.062848292  3.515230561  0.175651221          NaN  1.771097466  6.257130770  3.496655408  5.773869467  4.533442290 -0.723204657  0.482203584 -0.818526993  5.460387289 -0.225837171 -0.313664797  3.208343573 -1.311446115 -0.832691989 -0.052123823 -0.396055804  0.358422653  1.819256535  6.999404072  5.278373783 -0.271475575  4.518071344 -1.092237020  0.975893816          NaN  0.239038653  5.665186969  4.248484390 -0.454585874          NaN -0.707138970  6.312059767  0.260199670  2.375909640  0.947824223 -0.093198058  0.077352746  3.681068681  3.252274781  0.361114682 -0.959956355 -0.474830362  4.769732198  1.887893736  1.742973671 -0.190696789  0.017186934
##  [77]  1.504653470  2.876315233  5.198555164  7.930265009 -0.519741590 -1.621651031  7.687684224 -0.199643525  0.786132748 -0.856765987  0.005721487 -0.965228161  0.445303869 -0.577097209 -0.938589264  7.705255537  8.200668759 -0.461294395          NaN  1.035659381  0.030832176 -0.670459502  0.301847644  1.329559883  0.251472191 -0.401104470  0.405761005 -1.107333993 -0.717942727 -0.422187248  5.081967162 -1.634156331 -0.391269141          NaN -0.599073914  0.464770080          NaN  0.450614621 -0.186750899 -0.001176947  1.331036356  0.161883601  0.373558445 -1.635928348  4.274374221  0.098839208  1.066055572 -0.168488487  4.504562741  1.026778764 -1.832810083  4.840103949 -0.720912132 -2.505837092 -1.160364202  1.909083133  5.680910376  1.119421580  0.162077291  4.122792845          NaN          NaN  0.129493815 -0.968581775  0.628197574          NaN -0.680969366 -0.102705294 -0.539013999 -0.259712106  1.469267187  0.424869230 -0.947855076  1.474101570  3.610866390  0.120552852
## [153] -0.134067627  1.475719097 -0.781645428 -1.600403010  5.942392036 -0.814667606 -0.099417339  0.145083408 -1.513511110  6.336085025 -0.252212572  0.839267694  0.142092317 -0.660867871 -2.792261133  2.551432989          NaN  0.723469684  0.072188707  1.319216014 -0.478923162 -0.514121786          NaN  0.789791425          NaN  4.241280618 -0.617515787 -0.130167954 -0.141889204 -0.932697127  0.134553432  1.027818580  0.849807157  0.610423935 -0.813874168  1.260186656  5.045383709          NaN  1.000206050  1.647110179  0.550144350  1.649309954 -0.702852393  0.232517932 -0.609529270  2.143966169  0.653449247  3.428412610

should now be something like

x     = rpois(n, lambda[cc])
x
##   [1]  0  2  2  1  1  2  3  0  0  0  2  1  0  5  0  1  3  2  0  1  2  0  1  3  1  0  0  1  3  1  2  4  3  0  1  0  0  3  0  0  1  2  2  0  1  0  1  2  2  2  1  2  0  8  1  1  3  1  4  0  1  0  0  1  0  0  2  3  3  1  0  2  1  0  1  0  1  1  0  1  1  1  3  0  0  1  2  3  0  1  2  0  2  0  8  1  2  1  1  0  0  2  0  1  1  1  0  1  2  6  0  0  8  0  0  0  2  0  1  1  2  2  2  1  2  0  0  1  1  2  0  0  3  1  1  0  4  4  1  0  2 10  1  0  1  0  1  1  0  0  1  1  0  1  0  0  3  1  0  2  0  2  1  0  0  1  1  2  8  0  3  3  1  0  9  1  6  1  1  2  2  2  0  0  1  0  0  1  2  1  0  3  1  1  0  1  1  1  1  2

to reflect the fact that the components of the mixture are all Poisson distributions.

  • 0点 No
  • 1点 Yes

Are the empirical frequencies computed correctly?

The code used to compute the frequencies should look something like

empfreq = table(factor(x, levels=seq(0, max(x))))/n
empfreq
## 
##     0     1     2     3     4     5     6     7     8     9    10 
## 0.335 0.340 0.180 0.080 0.020 0.005 0.010 0.000 0.020 0.005 0.005

The main difficulty here is how to properly compute the frequencies using R, rather than anything that is specific to mixture models. Using the function table() directly on the vector \(x\) will ignore values with zero counts. Similarly, the function tabulate() will ignore the \(x = 0\) case. First converting the vector \(x\) into a factor while ensuring that any integer between 0 and the maximum in the sample are valid factors avoids the issue.

  • 0点 No
  • 1点 Yes for some datasets, but might fail. Use this category if, for example, the function table was used directly on \(x\),
empfreq = table(x)/n
empfreq
## x
##     0     1     2     3     4     5     6     8     9    10 
## 0.335 0.340 0.180 0.080 0.020 0.005 0.010 0.020 0.005 0.005

Partial credit should also be given if counts rather than frequencies are reported (i.e., if the answer does not divide by \(n\)).

  • 2点 Yes

Does the barplot appear correct?

The code should be something like

barplot(empfreq)

or

hist(x, right=F)

When using the hist function, the option right=F must be specified so that the intervals/bins are specified correctly. This ensures that the intervals are [0,1), [1,2),…,[10,11]. Note that the last interval is a closed one. In the default setting (right=T), the first category [0,1] is closed. That is, the left and right endpoints are included.

  • 0点 No
  • 1点 Yes



2.3 ピアレビュー

2.3.1 1st Peer

2.3.1.1 Assignment

Enter code to sample 200 random numbers from a mixture of 3 Poisson distributions with means 1, 2 and 6 and weights 0.7, 0.2 and 0.1, respectively.

n     = 200               # Size of the sample to be generated
w     = c(0.7, 0.2, 0.1)  # Weights
lamba = c(1, 2, 6)        # Means
cc    = sample(1:3, n, replace=T, prob=w)
x     = rpois(n, lamba[cc])

Enter code to generate a barplot with the empirical frequencies of all the integers included in the sample.

empfreq = table(factor(x, levels=seq(0, max(x))))/n
barplot(empfreq)

2.3.1.2 Marking

Have the parameters of the mixture (both weights and component-specific parameters) been correctly specified?

The lines

n     = 50
w     = c(0.6, 0.4)  # Weights
mu    = c(0, 5)      # Means
sigma = c(1, 2)      # Standard deviations

in the original code should have been replaced by something like

n      = 200
w      = c(0.7, 0.2, 0.1)  # Weights
lambda = c(1, 2, 6)        # Means
  • 0点 No
  • 1点 Yes

Is the sampling of the indicator variables done correctly?

The line

cc    = sample(1:2, n, replace=T, prob=w)
## Error in sample.int(length(x), size, replace, prob): incorrect number of probabilities
cc
##   [1] 1 1 1 1 3 1 1 3 2 2 1 1 3 1 2 1 1 1 1 1 1 1 1 3 1 1 1 1 2 2 3 1 1 1 3 1 2 1 1 2 1 2 2 2 1 1 1 2 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 3 1 1 1 2 1 1 1 2 3 1 2 1 1 1 1 1 1 3 1 3 1 1 1 1 1 3 1 2 1 1 1 1 1 1 1 2 1 1 1 1 1 3 2 1 1 1 2 1 1 2 1 1 1 3 1 1 1 2 2 1 1 1 1 1 1 1 3 1 2 1 2 3 1 1 1 1 1 1 1 1 1 1 3 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 2 1 3 1 1 1 2 1 1 1 1 1 2 3 2 1 1 3 2 3 2 1

in the original code should now be something like

cc    = sample(1:3, n, replace=T, prob=w)
cc
##   [1] 1 2 1 2 1 1 1 1 1 2 1 1 2 3 1 1 1 1 1 1 2 2 3 1 2 2 1 1 2 1 1 1 2 1 1 1 1 3 1 1 1 1 1 1 1 2 1 1 3 1 3 1 1 1 2 1 2 1 1 1 3 1 1 2 2 2 1 1 1 1 2 1 2 1 1 1 1 1 2 2 1 1 1 3 1 1 1 1 1 3 1 2 2 2 1 3 2 1 1 1 1 1 1 3 1 2 1 1 3 2 2 2 1 2 1 1 2 1 1 1 1 2 1 1 2 1 3 1 1 1 1 1 2 1 2 1 1 1 1 2 1 1 3 2 1 1 1 1 2 1 2 2 3 1 1 1 1 1 1 1 1 2 1 2 1 1 2 2 1 1 1 1 1 1 2 2 2 1 1 1 3 1 1 3 1 1 1 1 1 1 2 2 2 1 2 1 2 3 1 1

to reflect the fact that we now have 3 components in the mixture.

  • 0点 No
  • 1点 Yes

Assuming that the indicator variables have been correctly sampled, are the observations being sampled correctly?

The line

x     = rnorm(n, mu[cc], sigma[cc])
x
##   [1]  1.235743165  7.344719143  1.408751229  7.420378759  1.165751316  0.732260537  0.057361282  0.465841950  0.085082339  1.763988712 -1.467358240 -1.359339287  9.298978295          NaN  0.554890679 -1.112496312 -0.870171108 -1.070596429  0.210014363  0.640968852  5.614533247  7.393165599          NaN  1.044539487  2.408227959  4.321065919  0.180577433 -0.298534589  5.581980501  1.030511132 -0.423474826  0.949315796  3.395425515  0.213908998  1.226942817  1.402285997  0.092693580          NaN  0.045327229  1.732372378  0.221968015  0.639753147  0.426437455  0.954636871  0.111420749  3.400235816 -1.825091379  0.954143379          NaN -0.395080673          NaN  0.916707931 -0.092657480 -0.240094210  5.988156751 -0.891652828  3.809171997  0.804084268  0.768375809 -0.897195732          NaN -0.030040465  0.529797443  4.858709902  5.990224441  4.726760888 -0.753837237  0.383488778 -0.571802901  1.125199924  3.468771471  0.666883256  4.437234680 -0.580328238  2.338695997 -0.304344502
##  [77]  1.673671246  1.451389881 -0.510990103  7.893448873 -1.052111719 -0.827423049  2.497490129          NaN  0.374345928  1.021177923 -0.418952699 -0.862441707  1.961173877          NaN -1.918822745  4.759465522  1.683813088  2.504631973  1.353403849          NaN  5.328241541 -0.097550607  0.354735471 -0.202175083  1.463228331  0.543096905  1.224399361          NaN  0.128796215  6.197565919  1.732282434 -0.564747572          NaN  0.621234120 10.907486146  5.516209921  0.119846604  1.960619273  1.379610505 -0.381231110  5.863775974  0.873815335  0.500165529  0.677588816  0.447122370  6.774697460 -1.069833352 -0.169445155  6.103556537 -0.978123417          NaN -0.503056398 -1.552051937  0.224154334 -0.456912044 -0.251731527  2.318710964 -0.560714936  3.924347288  0.993864418  1.022178891 -1.255358958 -1.300791769  6.292824688 -0.372718793 -2.521348586          NaN  5.886138799 -0.337651415 -0.342132099 -0.700286190 -2.131314185  5.890650926  2.024670925  5.246738037  2.425654948
## [153]          NaN -0.262779298  0.360569112 -0.356880514 -0.132212779 -1.068194466  0.931097845  0.063095835 -0.817426757  3.902853058 -0.552514467  4.320529856 -0.510029625  0.337884100  3.340511021  6.778848874  0.604245192 -1.310844641 -1.725373774  1.681115525 -0.814487821 -1.239038873  4.841883094  2.427211516  3.846529905 -1.493648234  0.081664558 -0.677390192          NaN  0.276189403  0.611143657          NaN  1.399380849 -1.175691205  0.730986390  2.266213947  1.025172903  0.024644246  4.171443624  3.046457539  6.333831390  0.009665193  5.024237712  0.695698887  6.050112594          NaN -0.827851587 -0.149902879

should now be something like

x     = rpois(n, lambda[cc])
x
##   [1] 0 1 1 2 0 1 0 0 0 3 0 1 1 7 2 3 2 0 1 3 2 2 2 1 2 2 1 5 3 0 1 0 1 1 0 1 1 4 1 1 2 2 1 1 3 2 1 2 7 1 9 1 1 1 3 1 2 0 0 0 5 0 0 1 3 1 1 0 2 1 0 0 2 0 2 0 0 2 2 3 1 2 2 4 2 1 0 2 1 4 1 1 2 0 3 7 0 1 2 3 2 0 1 6 0 3 3 2 7 2 2 0 2 1 0 3 1 1 3 0 1 4 1 2 0 1 7 2 1 0 2 2 2 0 0 2 1 0 1 2 1 0 3 3 0 1 1 1 1 0 2 4 4 1 2 0 0 0 0 0 2 2 0 1 1 0 2 5 1 0 2 1 1 1 1 2 0 1 2 0 5 1 3 2 0 1 4 1 2 1 2 0 2 1 2 0 1 8 1 1

to reflect the fact that the components of the mixture are all Poisson distributions.

  • 0点 No
  • 1点 Yes

Are the empirical frequencies computed correctly?

The code used to compute the frequencies should look something like

empfreq = table(factor(x, levels=seq(0, max(x))))/n
empfreq
## 
##     0     1     2     3     4     5     6     7     8     9 
## 0.250 0.325 0.245 0.085 0.035 0.020 0.005 0.025 0.005 0.005

The main difficulty here is how to properly compute the frequencies using R, rather than anything that is specific to mixture models. Using the function table() directly on the vector \(x\) will ignore values with zero counts. Similarly, the function tabulate() will ignore the \(x = 0\) case. First converting the vector \(x\) into a factor while ensuring that any integer between 0 and the maximum in the sample are valid factors avoids the issue.

  • 0点 No
  • 1点 Yes for some datasets, but might fail. Use this category if, for example, the function table was used directly on \(x\),
empfreq = table(x)/n
empfreq
## x
##     0     1     2     3     4     5     6     7     8     9 
## 0.250 0.325 0.245 0.085 0.035 0.020 0.005 0.025 0.005 0.005

Partial credit should also be given if counts rather than frequencies are reported (i.e., if the answer does not divide by \(n\)).

  • 2点 Yes

Does the barplot appear correct?

The code should be something like

barplot(empfreq)

or

hist(x, right=F)

When using the hist function, the option right=F must be specified so that the intervals/bins are specified correctly. This ensures that the intervals are [0,1), [1,2),…,[10,11]. Note that the last interval is a closed one. In the default setting (right=T), the first category [0,1] is closed. That is, the left and right endpoints are included.

  • 0点 No
  • 1点 Yes


2.3.2 2nd Peer

2.3.2.1 Assignment

Enter code to sample 200 random numbers from a mixture of 3 Poisson distributions with means 1, 2 and 6 and weights 0.7, 0.2 and 0.1, respectively.

set.seed(123)

## Generate n observations from a mixture of three Poisson distributions
n     = 200               # Size of the sample to be generated
w     = c(0.7, 0.2, 0.1)  # Weights
lambda = c(1, 2, 6)       # Means (also Variances) of 3 Poisson models
# Each of n Observations randomly selects one out of 3 Poisson models
cc    = sample(1:3, n, replace=T, prob=w)
# Value of each observations depends on selected Poisson model
yy = rpois(n, lambda[cc])

Enter code to generate a barplot with the empirical frequencies of all the integers included in the sample.

# range of Values for X-axis
xx = c(1 : (max(yy) + 5))

# build Frequency list based on occurrences of Values for y-axis
zz = NULL
for (i in xx) {
    zz <- append(zz, length(which(yy==i)))
}

# BarPlot of Frequency vs Values observed
par(mar=c(4,4,1,1)+0.1)
barplot(zz, names.arg=xx, width=1.0, 
        main="Values Generated by Mixture of 3 Poisson Models",
        xlim=c(0, (max(yy) + 6)), ylim=c(0, max(zz)+0.2),
        ylab="Frequency", xlab="Value")

2.3.2.2 Marking

Have the parameters of the mixture (both weights and component-specific parameters) been correctly specified?

The lines

n     = 50
w     = c(0.6, 0.4)  # Weights
mu    = c(0, 5)      # Means
sigma = c(1, 2)      # Standard deviations

in the original code should have been replaced by something like

n      = 200
w      = c(0.7, 0.2, 0.1)  # Weights
lambda = c(1, 2, 6)        # Means
  • 0点 No
  • 1点 Yes

Is the sampling of the indicator variables done correctly?

The line

cc    = sample(1:2, n, replace=T, prob=w)
## Error in sample.int(length(x), size, replace, prob): incorrect number of probabilities
cc
##   [1] 1 2 1 2 3 1 1 2 1 1 3 1 1 1 1 2 1 1 1 3 2 1 1 3 1 2 1 1 1 1 3 3 1 2 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 1 1 1 1 2 2 1 1 1 1 1 2 1 2 2 2 1 2 1 2 1 1 1 1 1 1 1 1 1 1 2 1 1 3 2 2 1 1 1 1 1 1 1 2 1 1 1 1 1 1 3 1 2 3 1 1 1 3 1 1 3 2 1 1 3 1 1 1 1 1 1 1 3 1 1 1 1 1 2 1 2 1 1 2 2 3 1 1 1 1 1 2 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 2 2 1 1 1 1 2 1 2 1 2 1 1 1 1 1 3 3 1 1 3 1 3 1 1 1 1 1

in the original code should now be something like

cc    = sample(1:3, n, replace=T, prob=w)
cc
##   [1] 3 1 3 1 1 1 2 1 1 1 1 2 1 1 1 1 3 1 1 1 1 1 1 1 2 1 1 1 1 2 3 1 1 3 1 1 1 1 1 1 1 1 2 1 2 2 2 1 1 1 1 1 1 1 1 2 2 2 1 1 3 1 1 1 1 1 1 1 1 2 1 2 1 2 1 1 1 1 1 2 1 2 1 1 3 1 1 1 1 2 3 1 1 2 1 3 1 1 1 2 1 1 1 1 1 1 1 1 3 1 1 1 2 1 2 1 1 2 1 2 1 1 1 1 1 1 3 1 3 1 3 1 1 2 1 2 1 2 1 1 2 1 2 1 2 3 1 2 2 1 1 1 1 2 1 1 1 1 1 1 1 3 2 1 1 1 1 2 1 2 1 3 1 1 3 2 1 1 1 1 2 2 2 1 2 1 1 1 3 1 1 1 3 1 1 2 1 1 2 1

to reflect the fact that we now have 3 components in the mixture.

  • 0点 No
  • 1点 Yes

Assuming that the indicator variables have been correctly sampled, are the observations being sampled correctly?

The line

x     = rnorm(n, mu[cc], sigma[cc])
x
##   [1]         NaN -0.71524219         NaN -0.75268897 -0.93853870 -1.05251328  4.12568093  0.33117917 -2.01421050  0.21198043  1.23667505  9.07514804  1.30117599  0.75677476 -1.72673040 -0.60150671         NaN -0.35204646  0.70352390 -0.10567133 -1.25864863  1.68443571  0.91139129  0.23743027  7.43621722 -1.33877429  0.66082030 -0.52291238  0.68374552  4.87835609         NaN  0.63296071  1.33551762         NaN  0.00729009  1.01755864 -1.18843404 -0.72160444  1.51921771  0.37738797 -2.05222282 -1.36403745  4.59843797  0.86577940  4.79623349  6.24837494  6.91801076  1.67105483  0.05601673 -0.05198191 -1.75323736  0.09932759 -0.57185006 -0.97400958 -0.17990623  7.02988635  1.01450302  4.14544143  0.11663728 -0.89320757         NaN  0.33390294  0.41142992 -0.03303616 -2.46589819  2.57145815 -0.20529926  0.65119328  0.27376649  7.04934647  0.81765945  4.58041366  0.37816777  3.10918234  0.85692301 -0.46103834  2.41677335 -1.65104890 -0.46398724  6.65075973  0.51013255  3.82103792
##  [83] -0.99678074  0.14447570         NaN -0.01430741 -1.79028124  0.03455107  0.19023032  5.34945279         NaN -1.05501704  0.47613328  7.75714027  0.45623640         NaN -1.13558847 -0.43564547  0.34610362  3.70590874 -2.15764634  0.88425082 -0.82947761 -0.57356027  1.50390061 -0.77414493  0.84573154 -1.26068288         NaN -0.35454240 -0.07355602 -1.16865142  3.73050347 -0.02884155  6.34139194 -1.65054654 -0.34975424  6.51281288 -0.53880916  5.45458384  0.49222857  0.26783502  0.65325768 -0.12270866 -0.41367651 -2.64314895         NaN -0.09294102         NaN  0.43028470         NaN  0.53539884 -0.55527835  8.55900582  0.28642442  5.25263172  1.27226678  3.56306756 -0.45033862  2.39745248  5.02225837  1.63356842  2.12298671 -0.19051680  5.75684781         NaN  0.30003855  2.98872748  5.03851855 -1.07742065  0.71270333  1.08477509 -2.22498770  7.47138692 -1.24104450  0.45476927  0.65990264 -0.19988983 -0.64511396  0.16532102  0.43881870         NaN  6.76660564 -2.05233698
## [165] -1.63637927  1.43040234  1.04662885  5.87057790  0.71517841  6.83434984 -2.66092280         NaN  1.11027710 -0.48498760         NaN  5.46123366 -0.29515780  0.87196495 -0.34847245  0.51850377  4.21863004  2.81442558  7.42002102  0.74090001  8.44852448  0.06515393  1.12500275  1.97541905         NaN -0.28148212 -1.32295111 -0.23935157         NaN -0.21404124  0.15168050  8.42460995 -0.32614389  0.37300466  4.54463187  0.02045071

should now be something like

x     = rpois(n, lambda[cc])
x
##   [1]  7  0  9  1  1  0  3  1  2  0  2  1  0  1  3  0  5  1  1  1  3  1  2  2  1  0  0  1  0  1  6  1  1  8  1  1  1  0  0  1  0  2  3  1  3  1  0  1  1  0  0  1  0  2  1  2  2  1  1  2  6  1  2  0  1  1  0  1  0  2  1  0  1  2  0  1  1  1  2  3  1  2  0  0  5  0  2  4  1  3  9  1  1  1  1  4  0  1  1  1  0  1  0  1  1  0  3  1  8  2  2  2  2  1  1  0  1  1  1  1  0  1  2  0  0  0  8  1  8  0  3  0  2  3  4  2  0  1  2  1  1  2  4  1  2 10  3  2  0  2  1  1  0  1  1  3  0  1  0  0  1  5  1  0  3  1  0  2  1  3  1  6  1  2  5  1  1  0  2  1  0  5  1  1  2  1  0  0  9  1  2  2  6  1  0  3  1  0  1  0

to reflect the fact that the components of the mixture are all Poisson distributions.

  • 0点 No
  • 1点 Yes

Are the empirical frequencies computed correctly?

The code used to compute the frequencies should look something like

empfreq = table(factor(x, levels=seq(0, max(x))))/n
empfreq
## 
##     0     1     2     3     4     5     6     7     8     9    10 
## 0.245 0.405 0.165 0.075 0.020 0.025 0.020 0.005 0.020 0.015 0.005

The main difficulty here is how to properly compute the frequencies using R, rather than anything that is specific to mixture models. Using the function table() directly on the vector \(x\) will ignore values with zero counts. Similarly, the function tabulate() will ignore the \(x = 0\) case. First converting the vector \(x\) into a factor while ensuring that any integer between 0 and the maximum in the sample are valid factors avoids the issue.

  • 0点 No
  • 1点 Yes for some datasets, but might fail. Use this category if, for example, the function table was used directly on \(x\),
empfreq = table(x)/n
empfreq
## x
##     0     1     2     3     4     5     6     7     8     9    10 
## 0.245 0.405 0.165 0.075 0.020 0.025 0.020 0.005 0.020 0.015 0.005

Partial credit should also be given if counts rather than frequencies are reported (i.e., if the answer does not divide by \(n\)).

  • 2点 Yes

Does the barplot appear correct?

The code should be something like

barplot(empfreq)

or

hist(x, right=F)

When using the hist function, the option right=F must be specified so that the intervals/bins are specified correctly. This ensures that the intervals are [0,1), [1,2),…,[10,11]. Note that the last interval is a closed one. In the default setting (right=T), the first category [0,1] is closed. That is, the left and right endpoints are included.

  • 0点 No
  • 1点 Yes


2.3.3 3rd Peer

2.3.3.1 Assignment

Enter code to sample 200 random numbers from a mixture of 3 Poisson distributions with means 1, 2 and 6 and weights 0.7, 0.2 and 0.1, respectively.

# Generate n observations from a mixture of three Poison 
# distributions
n     = 200              # Size of the sample to be generated
w     = c(0.7, 0.2, 0.1) # Weights
mu    = c(1, 2, 6)       # Means

xx = seq(0, 17, by =1)
yy = w[1]*dpois(xx, mu[1]) + w[2]*dpois(xx, mu[2]) + w[3]*dpois(xx, mu[3])

Enter code to generate a barplot with the empirical frequencies of all the integers included in the sample.

# Plot f(x) along with the observations 
# just sampled
xx = seq(0, 17, by = 1)
xx = seq(0, 17, by =1)

yy = w[1]*dpois(xx, mu[1]) + w[2]*dpois(xx, mu[2]) + w[3]*dpois(xx, mu[3])
par(mar=c(4,4,1,1)+0.1)
plot(xx, yy, type="l", ylab="Density", xlab="x", las=1, lwd=2)

2.3.3.2 Marking

Have the parameters of the mixture (both weights and component-specific parameters) been correctly specified?

The lines

n     = 50
w     = c(0.6, 0.4)  # Weights
mu    = c(0, 5)      # Means
sigma = c(1, 2)      # Standard deviations

in the original code should have been replaced by something like

n      = 200
w      = c(0.7, 0.2, 0.1)  # Weights
lambda = c(1, 2, 6)        # Means
  • 0点 No
  • 1点 Yes

Is the sampling of the indicator variables done correctly?

The line

cc    = sample(1:2, n, replace=T, prob=w)
## Error in sample.int(length(x), size, replace, prob): incorrect number of probabilities
cc
##   [1] 3 1 3 1 1 1 2 1 1 1 1 2 1 1 1 1 3 1 1 1 1 1 1 1 2 1 1 1 1 2 3 1 1 3 1 1 1 1 1 1 1 1 2 1 2 2 2 1 1 1 1 1 1 1 1 2 2 2 1 1 3 1 1 1 1 1 1 1 1 2 1 2 1 2 1 1 1 1 1 2 1 2 1 1 3 1 1 1 1 2 3 1 1 2 1 3 1 1 1 2 1 1 1 1 1 1 1 1 3 1 1 1 2 1 2 1 1 2 1 2 1 1 1 1 1 1 3 1 3 1 3 1 1 2 1 2 1 2 1 1 2 1 2 1 2 3 1 2 2 1 1 1 1 2 1 1 1 1 1 1 1 3 2 1 1 1 1 2 1 2 1 3 1 1 3 2 1 1 1 1 2 2 2 1 2 1 1 1 3 1 1 1 3 1 1 2 1 1 2 1

in the original code should now be something like

cc    = sample(1:3, n, replace=T, prob=w)
cc
##   [1] 1 1 1 1 1 1 1 3 1 1 1 3 1 1 2 1 1 1 1 1 1 2 2 1 1 2 1 1 1 1 1 2 1 1 2 1 3 1 2 2 1 2 1 3 1 1 2 1 1 3 1 1 1 2 1 2 1 3 1 1 1 2 3 1 1 1 2 1 1 1 1 1 1 1 1 2 3 1 1 1 1 1 2 1 1 1 1 2 2 1 1 1 1 2 1 1 3 1 1 1 1 1 2 1 1 3 1 3 1 2 1 1 2 1 2 1 1 1 1 2 1 2 1 1 2 1 1 1 2 1 1 2 1 1 2 2 2 1 1 1 1 2 2 1 1 1 1 1 1 1 2 1 2 1 2 1 1 1 2 1 3 1 3 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1

to reflect the fact that we now have 3 components in the mixture.

  • 0点 No
  • 1点 Yes

Assuming that the indicator variables have been correctly sampled, are the observations being sampled correctly?

The line

x     = rnorm(n, mu[cc], sigma[cc])
x
##   [1]  0.982113003 -0.727383529 -0.996838982 -1.041688859 -0.414588726 -0.239029068  0.483617534          NaN -0.321324842 -2.078489269 -0.091434276          NaN  1.187186811  1.191601269  3.422073564 -1.547776544  2.458060492 -0.162421942 -0.097451250  0.420574189 -1.614039457  3.543561778  1.919115189 -0.693094614  0.118849433  2.270581084  0.589982679  0.289344029 -0.904215026  0.226324942  0.748081162  7.122190507 -0.212848279 -0.093636794  4.826571730  1.441461756          NaN  1.125071892  6.668803136  4.425318400  0.373241434  5.806580662 -1.041673294          NaN -1.728304515  0.641830028  1.941378938  0.001683688  0.250247821          NaN  0.563867390  0.189426238 -0.732853806  6.972731720  1.738633767  6.762357618 -1.943650901          NaN  1.399576185 -0.056055946  0.524914279  6.244066472          NaN -0.096686073 -0.075263198  1.019157069  6.423203845  0.990262246  2.382926695  0.664415864  0.207381157 -2.210633111  2.691714003 -0.482676822  2.374734715  5.749287136
##  [77]          NaN  1.538430199 -0.109710321  0.511470755  0.213957980 -0.186120699  4.759212351  1.012834336 -0.201458147 -2.037682494 -0.195889249  6.079581211  6.232911433  0.616567817 -1.692101521  0.368742058  0.967859210  7.553157363 -0.224961271 -0.321892586          NaN  1.487837832 -1.667928046 -0.436829977  0.457462079 -1.617773765  5.559255725  1.877864021 -0.004060653          NaN -0.278454025          NaN  0.474911714  4.441855658  0.813400374  0.904435464  5.005383322 -1.176692158  2.363558545 -0.592997366  0.797380501 -1.958205175 -1.886325159  3.692440350  0.394394848  3.172867904  0.886749037  0.333369970  4.658720764  0.818828137  0.388365163 -0.445935027  5.462229869  0.647513358  0.356283345  3.683979584  0.855202205  1.152936226  5.552549113  5.288209325  4.848749838  2.161415846  0.276315532 -0.158294029 -2.507917802  1.869436470  4.844653599  0.206294045  0.276872459  0.821506780 -0.194152409  1.214588794 -0.921516043 -1.208442720  2.542027645  0.742297024
## [153]  4.834160115  0.789817922  4.464587157 -0.591892102 -0.368352575 -1.852616824  2.660769479 -1.442034646          NaN  1.054322272          NaN -0.597330091  0.789459853  1.516490602 -0.191774809  0.283878906 -1.751067519 -0.818669778  0.056214849  0.299086903 -0.759398117  2.684858999  4.083219722  5.128487113  6.299583734 -0.026018635 -0.643567387  1.045305664  1.615545323 -0.029693971  0.562267345          NaN -0.097412499  1.016455218 -1.156167394  2.320860224 -0.603531248 -1.458849414 -0.350917827  0.146708484  1.623621208  0.911209677  0.142458432  2.221032952 -0.866037745 -0.163284932  2.553026113 -1.860227574

should now be something like

x     = rpois(n, lambda[cc])
x
##   [1]  2  1  0  2  3  2  0 11  1  0  0  1  2  2  4  1  3  2  2  1  3  2  4  1  1  4  1  0  0  1  1  6  2  0  3  2  7  1  1  2  0  2  0  4  1  1  1  0  1  7  0  0  3  1  1  3  0  2  1  2  0  1  7  3  1  1  1  2  0  1  1  0  3  1  2  2  4  1  2  1  1  1  2  1  1  0  4  2  0  2  1  2  0  4  1  2  9  1  3  0  0  0  2  0  3  8  0  6  1  3  1  0  2  0  3  0  2  2  1  0  1  2  4  0  2  3  1  1  0  2  1  1  1  2  2  2  1  2  0  1  0  4  2  0  2  0  2  0  0  0  0  2  1  1  1  1  1  4  1  1 11  1  5  2  0  1  1  0  0  2  1  1  1  1  2  2  0  1  1  1  1  0  0  7  2  0  2  1  4  1  1  1  1  0  1  5  1  1  0  0

to reflect the fact that the components of the mixture are all Poisson distributions.

  • 0点 No
  • 1点 Yes

Are the empirical frequencies computed correctly?

The code used to compute the frequencies should look something like

empfreq = table(factor(x, levels=seq(0, max(x))))/n
empfreq
## 
##     0     1     2     3     4     5     6     7     8     9    10    11 
## 0.240 0.360 0.220 0.065 0.055 0.010 0.010 0.020 0.005 0.005 0.000 0.010

The main difficulty here is how to properly compute the frequencies using R, rather than anything that is specific to mixture models. Using the function table() directly on the vector \(x\) will ignore values with zero counts. Similarly, the function tabulate() will ignore the \(x = 0\) case. First converting the vector \(x\) into a factor while ensuring that any integer between 0 and the maximum in the sample are valid factors avoids the issue.

  • 0点 No
  • 1点 Yes for some datasets, but might fail. Use this category if, for example, the function table was used directly on \(x\),
empfreq = table(x)/n
empfreq
## x
##     0     1     2     3     4     5     6     7     8     9    11 
## 0.240 0.360 0.220 0.065 0.055 0.010 0.010 0.020 0.005 0.005 0.010

Partial credit should also be given if counts rather than frequencies are reported (i.e., if the answer does not divide by \(n\)).

  • 2点 Yes

Does the barplot appear correct?

The code should be something like

barplot(empfreq)

or

hist(x, right=F)

When using the hist function, the option right=F must be specified so that the intervals/bins are specified correctly. This ensures that the intervals are [0,1), [1,2),…,[10,11]. Note that the last interval is a closed one. In the default setting (right=T), the first category [0,1] is closed. That is, the left and right endpoints are included.

  • 0点 No
  • 1点 Yes



2.4 ディスカッション



3 Appendix

3.1 Blooper

3.2 Documenting File Creation

It’s useful to record some information about how your file was created.

  • File creation date: 2021-05-22
  • File latest updated date: 2021-05-22
  • R version 4.1.0 (2021-05-18)
  • rmarkdown package version: 2.8
  • File version: 1.0.0
  • Author Profile: ®γσ, Eng Lian Hu
  • GitHub: Source Code
  • Additional session information:
suppressMessages(require('dplyr', quietly = TRUE))
suppressMessages(require('magrittr', quietly = TRUE))
suppressMessages(require('formattable', quietly = TRUE))
suppressMessages(require('knitr', quietly = TRUE))
suppressMessages(require('kableExtra', quietly = TRUE))

sys1 <- devtools::session_info()$platform %>% 
  unlist %>% data.frame(Category = names(.), session_info = .)
rownames(sys1) <- NULL

sys2 <- data.frame(Sys.info()) %>% 
  dplyr::mutate(Category = rownames(.)) %>% .[2:1]
names(sys2)[2] <- c('Sys.info')
rownames(sys2) <- NULL

if (nrow(sys1) == 9 & nrow(sys2) == 8) {
  sys2 %<>% rbind(., data.frame(
  Category = 'Current time', 
  Sys.info = paste(as.character(lubridate::now('Asia/Tokyo')), 'JST🗾')))
} else {
  sys1 %<>% rbind(., data.frame(
  Category = 'Current time', 
  session_info = paste(as.character(lubridate::now('Asia/Tokyo')), 'JST🗾')))
}

sys <- cbind(sys1, sys2) %>% 
  kbl(caption = 'Additional session information:') %>% 
  kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>% 
  row_spec(0, background = 'DimGrey', color = 'yellow') %>% 
  column_spec(1, background = 'CornflowerBlue', color = 'red') %>% 
  column_spec(2, background = 'grey', color = 'black') %>% 
  column_spec(3, background = 'CornflowerBlue', color = 'blue') %>% 
  column_spec(4, background = 'grey', color = 'white') %>% 
  row_spec(9, bold = T, color = 'yellow', background = '#D7261E')

rm(sys1, sys2)
sys
Additional session information:
Category session_info Category Sys.info
version R version 4.1.0 (2021-05-18) sysname Linux
os Ubuntu 20.04.2 LTS release 5.8.0-54-generic
system x86_64, linux-gnu version #61~20.04.1-Ubuntu SMP Thu May 13 00:05:49 UTC 2021
ui X11 nodename Scibrokes-Trading
language en machine x86_64
collate en_US.UTF-8 login englianhu
ctype en_US.UTF-8 user englianhu
tz Asia/Tokyo effective_user englianhu
date 2021-05-22 Current time 2021-05-22 19:50:18 JST🗾

3.3 Reference