PROBLEM SET 1

Write a function to compute expected value and standard deviation of an array of values. Compare results with that of R’s mean and std functions.

# my expected value function
my_expected_value <- function(x){
  if (length(x) == 0) {return(NA)}
  
  e <- sum(x)/length(x)
  return(e)
}

# my standard deviation function
my_std_deviation <- function(x) {
  if (length(x) < 2) {return(NA)}
  
  m <- my_expected_value(x)
  s <- sqrt(sum((x-m)^2)/(length(x)-1))
  return(s)
}

Testing my functions with an array

an_array <- c(9, 2, 5, 4, 12, 7, 8, 11, 9, 3, 7, 4, 12, 5, 4, 10, 9, 6, 9, 4)
 
my_expected_value(an_array)
## [1] 7
my_std_deviation(an_array)
## [1] 3.060788

Comparing results with R’s built in mean and std functions (should be same)

mean(an_array)
## [1] 7
sd(an_array)
## [1] 3.060788

Now create a function to estimate mean and standard deviation of array with infinite stream of numbers.

# Global array that will hold new values as they come in

inf_stream <<- c()

# Function to add in new values and compute mean and standard deviation of new array

rolling_est <- function(x = c(), reset=FALSE){

  m <- NULL
  s <- NULL
  values <- NULL

  # set inf_stream global array to null if reset flag is TRUE
  if (reset == TRUE) {inf_stream <<- NULL}
  
  # else add new values to the existing array
  else {
      inf_stream <<- c(inf_stream,x)
      values <- inf_stream
    
      # compute mean and standard deviation
      m <- mean(inf_stream)
      s <- sd(inf_stream)
  }
  
  return(list("inf_array"=inf_stream,"mean"= m,"std"=s))
  
}

Test new rolling estimate function with stream of new numbers coming in

# random number samples from 1 to 100 in 10 increments
for (i in c(1:10)){
  rv <- sample(1:100,i,replace=T)
  print("Add new numbers:")
  print(rv)
  print(rolling_est(rv))
}
## [1] "Add new numbers:"
## [1] 28
## $inf_array
## [1] 28
## 
## $mean
## [1] 28
## 
## $std
## [1] NA
## 
## [1] "Add new numbers:"
## [1] 46 39
## $inf_array
## [1] 28 46 39
## 
## $mean
## [1] 37.66667
## 
## $std
## [1] 9.073772
## 
## [1] "Add new numbers:"
## [1] 86 79 65
## $inf_array
## [1] 28 46 39 86 79 65
## 
## $mean
## [1] 57.16667
## 
## $std
## [1] 23.12935
## 
## [1] "Add new numbers:"
## [1]  9 24 44 43
## $inf_array
##  [1] 28 46 39 86 79 65  9 24 44 43
## 
## $mean
## [1] 46.3
## 
## $std
## [1] 24.23977
## 
## [1] "Add new numbers:"
## [1]  80  14  69 100  85
## $inf_array
##  [1]  28  46  39  86  79  65   9  24  44  43  80  14  69 100  85
## 
## $mean
## [1] 54.06667
## 
## $std
## [1] 28.60936
## 
## [1] "Add new numbers:"
## [1] 50 86 40 91 62 12
## $inf_array
##  [1]  28  46  39  86  79  65   9  24  44  43  80  14  69 100  85  50  86
## [18]  40  91  62  12
## 
## $mean
## [1] 54.85714
## 
## $std
## [1] 28.17496
## 
## [1] "Add new numbers:"
## [1] 98 22 56 82 33 18 74
## $inf_array
##  [1]  28  46  39  86  79  65   9  24  44  43  80  14  69 100  85  50  86
## [18]  40  91  62  12  98  22  56  82  33  18  74
## 
## $mean
## [1] 54.82143
## 
## $std
## [1] 28.3876
## 
## [1] "Add new numbers:"
## [1] 84 39 86 99  7 89 81 96
## $inf_array
##  [1]  28  46  39  86  79  65   9  24  44  43  80  14  69 100  85  50  86
## [18]  40  91  62  12  98  22  56  82  33  18  74  84  39  86  99   7  89
## [35]  81  96
## 
## $mean
## [1] 58.77778
## 
## $std
## [1] 29.78793
## 
## [1] "Add new numbers:"
## [1] 63 71 66 25  6 18 91 28 76
## $inf_array
##  [1]  28  46  39  86  79  65   9  24  44  43  80  14  69 100  85  50  86
## [18]  40  91  62  12  98  22  56  82  33  18  74  84  39  86  99   7  89
## [35]  81  96  63  71  66  25   6  18  91  28  76
## 
## $mean
## [1] 56.88889
## 
## $std
## [1] 29.76669
## 
## [1] "Add new numbers:"
##  [1] 25 83 50 53 54  1  9 11 38 46
## $inf_array
##  [1]  28  46  39  86  79  65   9  24  44  43  80  14  69 100  85  50  86
## [18]  40  91  62  12  98  22  56  82  33  18  74  84  39  86  99   7  89
## [35]  81  96  63  71  66  25   6  18  91  28  76  25  83  50  53  54   1
## [52]   9  11  38  46
## 
## $mean
## [1] 53.27273
## 
## $std
## [1] 29.82508
# reset array
rolling_est(reset=TRUE)
## $inf_array
## NULL
## 
## $mean
## NULL
## 
## $std
## NULL