#########################################   Q1   ##############################################
#One of the key observation after reading the R handout is try to use vectorized version of a function
#rather than using lapply/sapply or looping through (using while/repeat/for) Here is the experiment and data for it

#Example finding sin of values in an vector.

#get 10 Million numbers beweteen 0, 2* pi randomly
nums = runif(10000000, 0,2*pi)

# use the vector implementation to find sine of 10M points above and time it
system.time(sin(nums))
##    user  system elapsed 
##    0.64    0.05    0.77
# use sapply to iterate and find the sin of the vector of 10 Million numbers
system.time(sapply(nums,sin))
##    user  system elapsed 
##   13.28    0.22   14.50
# Write a custom iterator to iterate over the 10 Million numbers and find the sum
sin_cal = function(nums){
  sum = 0 
  for (elem in nums){
    sum = sum + sin(elem)
  }
  return(sum)
}

# time the handwritten iterator
system.time(sin_cal(nums))
##    user  system elapsed 
##    8.15    0.04    9.02
#it is clear from the above data that the vectorize verision is 11X faster than the non vectorized version. sappply and the iteration (loop) are having the similar runtime. 

#Another observation is, since R being a interpreted language it is slower in task where looping (nested looping) is required. If the task can't be vectorized and is a bottleneck you should think of implementing it in C/C++ and calling it thorugh R, you will see dramatic improvement in speed.





# bumpup the expressions limit to avoid the early stack overflow
options(expressions=500000)

#########################################   Q2   ##############################################


# gamma function implementation (iterative version)
# log_gamma = log(!(n-1)) for n > 1
# log_gamma(1) = 0 for n = 1
 
log_gamma_loop = function (n){
  sum = 0 
  val = n-1
  while(val > 0){
    sum = sum + log(val)
    val = val -1
  }
  return(sum)
}
test = 0
if(test){
  print(log_gamma_loop(5))
  print(log_gamma_loop(1))
}


#########################################   Q3   ##############################################


# gamma function implementation (recursive version)
# log_gamma = log(!(n-1)) for n > 1
# log_gamma(1) = 0 for n = 1

log_gamma_recursive = function (n){
  if(n <= 1){ #base case log_gamma(1) == 0
    return(0)
  }else{
    return(log_gamma_recursive(n-1) + log(n-1))
  }
}


#print(log_gamma_recursive(5))

#########################################   Q4   ##############################################


sum_log_gamma_loop = function(n){
  sum = 0
  for(i in seq(1,n,by=1)){
    sum = sum + log_gamma_loop(i)
  }
  return(sum)    
}


sum_log_gamma_recursive = function(n){
  if(n > 1){
    return (sum_log_gamma_recursive(n-1) + log_gamma_recursive(n))
  }else{
    return(log_gamma_recursive(n))
  }
}



sum_log_gamma_recursive(5)
## [1] 5.66296
#system.time(sum_log_gamma_loop(500000))
#system.time(sum_log_gamma_recursive())
#system.time(lgamma(10^8))
val_loop = sum_log_gamma_loop(10^2);val_loop
## [1] 15617.2
#system.time()
#seq = 1:10^5;

#val = sum(sapply(1:10^2,FUN=lgamma));val

#########################################   Q5   ##############################################


# generate a geometric series for the test input max upto 10^8

geomSeries <- function(base, max) {
    base^(0:floor(log(max, base)))
}
test_pattern_larger = geomSeries(base=2, max=2^16)

test_pattern_small = geomSeries(base=2, max=2048)

# generate the runtime comparision of the three version of sum_log_gamma
# we pass option skip argument which skips the recursive version of the  



  
  
#sum_lgamma = as.numeric(0) # holds the vector of running time for sum_lgamma / library implementation

  
sum_log_gamma_lib = function(n){
  l = (sum(sapply(1:n,FUN=lgamma)))
  return(l)
}

run_test_vector = function(test_pattern,Func){
  sum_lgamma = vector(mode="numeric", length=0); # holds the runtime of the results
  for (elem in test_pattern){
    l = system.time(Func(elem))[[1]]
    print(l)
    sum_lgamma = c(sum_lgamma,l)
  }
  return(sum_lgamma) 
}

sum_log_gamma_lib_time = 0.0

# Run small set of test case where the recursive example does not overflow

sum_log_gamma_lib_time = run_test_vector(test_pattern_small,sum_log_gamma_lib)
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
sum_log_gamma_loop_time = run_test_vector(test_pattern_small,sum_log_gamma_loop)
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0.05
## [1] 0.19
## [1] 0.78
## [1] 3.05
sum_log_gamma_recursive_time = run_test_vector(test_pattern_small,sum_log_gamma_recursive)
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0.02
## [1] 0.03
## [1] 0.16
## [1] 0.66
## [1] 2.68
## [1] 10
df = data.frame(sum_log_gamma_lib_time,sum_log_gamma_loop_time,sum_log_gamma_recursive_time)
df =  cbind(df,size=test_pattern_small)
df
##    sum_log_gamma_lib_time sum_log_gamma_loop_time
## 1                       0                    0.00
## 2                       0                    0.00
## 3                       0                    0.00
## 4                       0                    0.00
## 5                       0                    0.00
## 6                       0                    0.00
## 7                       0                    0.00
## 8                       0                    0.00
## 9                       0                    0.05
## 10                      0                    0.19
## 11                      0                    0.78
## 12                      0                    3.05
##    sum_log_gamma_recursive_time size
## 1                          0.00    1
## 2                          0.00    2
## 3                          0.00    4
## 4                          0.00    8
## 5                          0.00   16
## 6                          0.00   32
## 7                          0.02   64
## 8                          0.03  128
## 9                          0.16  256
## 10                         0.66  512
## 11                         2.68 1024
## 12                        10.00 2048
# Run larger set of test case with sum_log_gamma_lib() and sum_log_gamma_loop()

sum_log_gamma_lib_time = run_test_vector(test_pattern_larger,sum_log_gamma_lib)
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0.01
## [1] 0
## [1] 0.03
## [1] 0.01
## [1] 0.03
## [1] 0.05
sum_log_gamma_loop_time = run_test_vector(test_pattern_larger,sum_log_gamma_loop)
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 0.02
## [1] 0.04
## [1] 0.28
## [1] 0.93
## [1] 3.66
## [1] 12.19
## [1] 43.26
## [1] 170.82
## [1] 674.97
## [1] 2663.28
df_large = data.frame(sum_log_gamma_lib_time,sum_log_gamma_loop_time)
df_large =  cbind(df_large,size=test_pattern_larger)

df_large
##    sum_log_gamma_lib_time sum_log_gamma_loop_time  size
## 1                    0.00                    0.00     1
## 2                    0.00                    0.00     2
## 3                    0.00                    0.00     4
## 4                    0.00                    0.00     8
## 5                    0.00                    0.00    16
## 6                    0.00                    0.00    32
## 7                    0.00                    0.00    64
## 8                    0.00                    0.02   128
## 9                    0.00                    0.04   256
## 10                   0.00                    0.28   512
## 11                   0.00                    0.93  1024
## 12                   0.01                    3.66  2048
## 13                   0.00                   12.19  4096
## 14                   0.03                   43.26  8192
## 15                   0.01                  170.82 16384
## 16                   0.03                  674.97 32768
## 17                   0.05                 2663.28 65536
plotfunc = function(df){
  require(ggplot2)
  if("sum_log_gamma_recursive_time" %in% colnames(df)){
    ggplot(df, aes(size)) + 
      geom_line(aes(y = sum_log_gamma_loop_time, colour = "sum_log_gamma_loop_time")) + 
      geom_line(aes(y = sum_log_gamma_recursive_time, colour = "sum_log_gamma_recursive_time"))+
      geom_line(aes(y = sum_log_gamma_lib_time, colour = "sum_log_gamma_lib_time"))+
      labs(x="Size",y="time(s)") 
    
  }else{
    ggplot(df, aes(size)) + 
      geom_line(aes(y = sum_log_gamma_loop_time, colour = "sum_log_gamma_loop_time")) + 
      geom_line(aes(y = sum_log_gamma_lib_time, colour = "sum_log_gamma_lib_time"))+
      labs(x="Size",y="time(s)") 
  }
  
}
plotfunc(df)
## Loading required package: ggplot2

plotfunc(df_large)

#Error: C stack usage is too close to the limit