Problem Set 1

(a) Please write a function to compute the expected value and standard deviation of an array of values.

xstd <- function(a) {
  # Sum the values in the array or vector of values a
  xpect <- sum(a)
  # How many values in the vector = length(vector), so ...
  # The expected value or mean is given by
  xpect <- xpect/length(a)
  
  # The variance is the mean subtracted from each value squared, summed, and divided by the count
  # Set variance variable to zero
  xvar <- 0
  for (i in 1:length(a)) {
    xvar <- xvar + (a[i] - xpect)^2
  }
  
  xvar <- xvar/(length(a) - 1)
  # The standard deviation is the square root of variance
  xvar <- sqrt(xvar)
  result <- round(c(xpect, xvar), 2)
  return(result)
}
# Make an array (or vector) of values
test <- c(3, 6, 7, 1, 7, 9, 2, 8, 1, 5)
xstd (test)
## [1] 4.90 2.96
mean(test)
## [1] 4.9
sd(test)
## [1] 2.960856

I got the same answer for the standard deviation by dividing the variance by n - 1 instead of n, but I don’t know why.

(b) Now write a function to estimate the mean and standard deviation of infinite stream of numbers coming by.

stream <- function(x) {
  if (!exists("gcount")) {
    # On 1st number, set the sum & mean to the number, set variance & deviation to zero, set count to one
    # These variables are all local to the function
    lsum <- x
    lpect <- x
    lvar <- 0
    lsd <- 0
    lcount <- 1
  }
  else {
    # After 1st number, add number to sum, find mean, estimate variance & deviation
    # Get running totals from global enviro
    lsum <- get("gsum")
    lvar <- get("gvar")
    lcount <- get("gcount")
    lsum <- lsum + x
    lpect <- lsum / lcount
    lvar <- lvar + (x - lpect)^2 / lcount
    lsd <- sqrt(lvar)
  }
  # Set global variables for running totals
  # Increment the count
  lcount <- lcount + 1
  assign("gsum", lsum, envir = .GlobalEnv)
  assign("gvar", lvar, envir = .GlobalEnv)
  assign("gcount", lcount, envir = .GlobalEnv)
  results <- c(lpect, lsd)
  return(results)
}
# Make an array (or vector) of values for the test & feed them 1-at-a-time to Stream to compare
test <- c(3, 6, 7, 1, 7, 9, 2, 8, 1, 5)
stream(3)
## [1] 3 0
stream(6)
## [1] 4.50000 1.06066
stream(7)
## [1] 5.333333 1.432105
stream(1)
## [1] 4.250000 2.165999
stream(7)
## [1] 4.800000 2.378981
stream(9)
## [1] 5.500000 2.775107
stream(2)
## [1] 5.000000 2.997821
stream(8)
## [1] 5.375000 3.138194
stream(1)
## [1] 4.888889 3.395386
stream(5)
## [1] 4.900000 3.395533
mean(test)
## [1] 4.9
sd(test)
## [1] 2.960856

I got closer estimating the standard deviation by dividing the variance by n instead of n - 1.