Data generation

In this report, I managed to build functions that generates normal cloud data upon receiving parameters as input. Furthermore, plots are provided as well.

\[X \sim N(25,\ \hat{\sigma})\]

\[\hat{\sigma} \sim N(3,\ 0.3)\]

# function for generating random ncm data
rncm<- function (n, mean = 25, mean_sigma = 3, sig_sigma= 0.3) {
  x<- y<- c()
  sigma<- rnorm(100, mean_sigma, sig_sigma)
  for (i in 1:length(sigma)) {
    obs<- rnorm(n/100, mean, sigma[i])
    # x
    x<- c(x, obs)
    # mu
    y<- c(y, exp(-((obs-mean)^2)/(2*sigma[i]^2)))
  }
  df<- data.frame(x = x, mu = y)
  return(df)
}
data_rcm<- rncm(10000)
random_num<- sample(nrow(data_rcm), 10)
show_data<- cbind(data_rcm[random_num,])
show_data %>%
  kbl(booktabs = T) %>%
  kable_styling(font = 15) %>%
  kable_classic_2(full_width = F)
x mu
1833 24.64090 0.9933561
7400 25.51015 0.9860268
3179 23.52918 0.9163025
7337 27.16856 0.7754811
7305 25.88469 0.9585633
3785 20.48263 0.3689801
7545 27.53547 0.7057928
223 20.20419 0.2464025
4758 20.66841 0.2432040
5750 26.80445 0.8308940

Plot

par(mfrow = c(2, 2))
# plot 1
plot(data_rcm, ylab = expression(mu))
x_20<- data_rcm[which(data_rcm$x>=19.9 & data_rcm$x<=20.1),]
x_23<- data_rcm[which(data_rcm$x>=22.9 & data_rcm$x<=23.1),]
x_25<- data_rcm[which(data_rcm$x>=24.9 & data_rcm$x<=25.1),]
points(x_20, col = 2)
points(x_23, col = 3)
points(x_25, col = 4)
legend("topright", legend = c(20, 23, 25), col = c(2,3,4), pch = 1)
# plot 2
plot(density(x_20$mu), main = "", xlab = expression(mu),
     type = "l", xlim = c(0, 1), col = 2)
legend("top", legend = paste("N = ", length(x_20$mu), sep = ""))
# plot 3
plot(density(x_23$mu), main = "", xlab = expression(mu),
     type = "l", xlim = c(0, 1), col = 3)
legend("top", legend = paste("N = ", length(x_23$mu), sep = ""))

# plot 4
plot(density(x_25$mu), main = "", xlab = expression(mu),
     type = "l", xlim = c(0, 1), col = 4)
legend("top", legend = paste("N = ", length(x_25$mu), sep = ""))

comparison for w/ or w/o uncertainty

To me, an important feature of cloud model is that it not only take randomness into consideration, but also fuzziness, which is uncertainty, briefly speaking.

Below are plots that describe the difference of two conditions, which are considering uncertainty and without uncertainty, respectively.

# computing mu without uncertainty (certain sigma instead of  sigma ~ N(3, 0.3))
mu<- function (x, mean = 25, sigma = 3) {
  exp(-((x-mean)^2)/(2*sigma^2))
}
#############################################################
## execution
r_d<- rncm(10000)
mu_set<- mu(r_d$x)
par(mfrow = c(1,2))
# without uncertainty
plot(r_d$x, mu_set, main = "Without uncertainty (fuzziness)",
     xlab = "x", ylab = expression(mu))
# with uncertainty
plot(r_d$x, r_d$mu, main = "Considering uncertainty (fuzziness)",
     xlab = "x", ylab = expression(mu))

alternative plot function

In this section, I build a function that could visualize the cloud model, the plotting function takes two types of input. - dataframe from first section - numbder of desired observations

# alternative plot function
ncm_plot<- function (n = 10000, mean = 25, mean_sigma = 3, sig_sigma= 0.3) {
  if (is.numeric(n)) {
    x<- c() ; y<- c()
    sigma<- rnorm(100, 3, 0.3)
    for (i in 1:length(sigma)) {
      obs<- rnorm(n/length(sigma), mean = mean, sd = sigma[i])
      # x
      x<- c(x, obs)
      # mu
      y<- c(y, exp(-((obs-mean)^2)/(2*sigma[i]^2)))
    }
    plot(x, y, ylab = expression(mu), main = "normal cloud model",
         xlim = c(15, 35), ylim = c(0, 1), las = 1)
    curve(mu(x, sigma = qnorm(0.005, mean_sigma, sig_sigma)),
          xlim = c(15, 35), add = T, col = 2, lwd = 2)
    curve(mu(x, sigma = qnorm(0.995, mean_sigma, sig_sigma)),
          xlim = c(15, 35), add = T, col = 3, lwd = 2)
    legend("topright", legend = c("99% upper bound", "99% lower bound"),
           lwd = 2, lty = 1, col = c(3, 2), cex = 0.85)
  } else if (is.data.frame(n) && ncol(n) == 2) {
    plot(n, ylab = expression(mu), main = "normal cloud model",
         xlim = c(15, 35), ylim = c(0, 1), las = 1)
    curve(mu(x, sigma = qnorm(0.005, mean_sigma, sig_sigma)),
          xlim = c(15, 35), add = T, col = 2, lwd = 2)
    curve(mu(x, sigma = qnorm(0.995, mean_sigma, sig_sigma)),
          xlim = c(15, 35), add = T, col = 3, lwd = 2)
    legend("topright", legend = c("99% upper bound", "99% lower bound"),
           lwd = 2, lty = 1, col = c(3, 2), cex = 0.85)
  } else {
    return ("false input")
  }
}
#############################################################
par(mfrow = c(3,1))
# wrong input
ncm_plot("data")
## [1] "false input"
# default usage
ncm_plot()
# n = number of observation
ncm_plot(n = 10000)
# n = dataframe
ncm_plot(data_rcm)