My notes on using R

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(ggplot2)
library(tidyverse)
library(reshape2)

Simultaneously merge multiple data.frames in a list

This is useful when merge many stock prices into one list object

https://stackoverflow.com/questions/8091303/simultaneously-merge-multiple-data-frames-in-a-list.

x <- tibble(i = c("a","b","c"), j = 1:3)
y <- tibble(i = c("b","c","d"), k = 4:6)
z <- tibble(i = c("c","d","a"), l = 7:9)

list(x, y, z) %>% 
  map_df(gather, key=key, value=value, -i) %>% 
  spread(key, value)

## # A tibble: 4 x 4
##   i         j     k     l
##   <chr> <int> <int> <int>
## 1 a         1    NA     9
## 2 b         2     4    NA
## 3 c         3     5     7
## 4 d        NA     6     8

Financial analysis with R

https://fderyckel.github.io/financialanalysiswithR/

Unit root test in R

https://www.r-bloggers.com/unit-root-tests/ https://freakonometrics.hypotheses.org/12729

Applied econometrics with R

https://eeecon.uibk.ac.at/~zeileis/teaching/AER/

Tsay, An introduction to analysis of financial data with R

http://faculty.chicagobooth.edu/ruey.tsay/teaching/introTS/

Simulation in R – Standford U

https://web.stanford.edu/class/bios221/labs/simulation/lab_3_simulation.html

Generating Random Mixtures of Normal Data

sampa=rnorm(1000000,0,1)
sampb=rnorm(1500000,3,1)
combined = c(sampa, sampb)
plt = ggplot(data.frame(combined), aes(x=combined)) + stat_bin(binwidth=0.25, position="identity")
plt

a <- rnorm(1000, 0, 1) 
b <- rnorm(1000, 5, 2) 
c <- rnorm(1000, 3, 2)
d <- rnorm(1000, -2, 1)
d <- c(a, b, c, d)
df <- data.frame(d, id = as.factor(rep(c(1, 2, 3, 4), each = 1000)))
dim(df)

## [1] 4000    2

head(df)

##             d id
## 1 -0.42302901  1
## 2 -0.38196684  1
## 3 -1.85975094  1
## 4  1.23994382  1
## 5  0.73957802  1
## 6 -0.01873475  1

ggplot(df) +
  stat_density(aes(x = d, group = id), position = "stack", geom = "line", show.legend = F, color = "red") +
  stat_density(aes(x = d, linetype = id), position = "identity", geom = "line")

Generating samples from a two-Gaussian mixture in r (code given in MATLAB)

https://stackoverflow.com/questions/12450007/generating-samples-from-a-two-gaussian-mixture-in-r-code-given-in-matlab

I’m trying to create (in r) the equivalent to the following MATLAB function that will generate n samples from a mixture of N(m1,(s1)^2) and N(m2, (s2)^2) with a fraction, alpha, from the first Gaussian.

gaussmix <- function(nsim,mean_1,mean_2,std_1,std_2,alpha){
   U <- runif(nsim)
   I <- as.numeric(U<alpha)
   y <- I*rnorm(nsim,mean=mean_1,sd=std_1)+
       (1-I)*rnorm(nsim,mean=mean_2,sd=std_2)
   return(y)
}

z1 <- gaussmix(1000, 0, 0, 1, 4, 0.95)
ggplot(data.frame(z1), aes(z1)) + 
  geom_histogram(aes(y=..density..), binwidth = 0.1)+
  stat_density(aes(z1), position = "stack", geom = "line", show.legend = F, color = "red")+
  ggtitle("Plot of mixture normal distribution: \n
          (1-X)*N (0, 1)+X*N (0, 16) with X being Bernoulli such that P (X=1)=0.05")+
  xlab("x")+ylab("f(x)")

#
z1_standardized <- (z1-mean(z1))/sqrt(var(z1))
hist(z1_standardized,xlim=c(-10,10),ylim=c(0,500),
   main="Histogram of 95% of N(0,1) and 5% of N(0,36)",
   col="blue",xlab=" ")

#
ggplot(data.frame(z1_standardized), aes(z1_standardized)) + 
  geom_histogram(aes(y=..density..), binwidth = 0.1)+
  stat_density(aes(z1_standardized), position = "stack", geom = "line", show.legend = F, color = "red")

plot(z1_standardized,type='l',
   main="1000 samples from a mixture N(0,1) and N(0,36)",
   col="blue",xlab="Samples",ylab="Mean",ylim=c(-10,10))

Generating random variables from a mixture of Normal distributions in r

https://stats.stackexchange.com/questions/70855/generating-random-variables-from-a-mixture-of-normal-distributions

How can I sample from a mixture distribution, and in particular a mixture of Normal distributions in R? For example, if I wanted to sample from: 0.3×N(0,1)+0.5×N(10,1)+0.2×N(3,.1)

set.seed(8)               # this makes the example reproducible
N     = 1000              # this is how many data you want
probs = c(.3,.8)          # these are *cumulative* probabilities; since they 
                          #   necessarily sum to 1, the last would be redundant
dists = runif(N)          # here I'm generating random variates from a uniform
                          #   to select the relevant distribution

# this is where the actual data are generated, it's just some if->then
#   statements, followed by the normal distributions you were interested in
data = vector(length=N)
for(i in 1:N){
  if(dists[i]<probs[1]){
    data[i] = rnorm(1, mean=0, sd=1)
  } else if(dists[i]<probs[2]){
    data[i] = rnorm(1, mean=10, sd=1)
  } else {
    data[i] = rnorm(1, mean=3, sd=.1)
  }
}

# here are a couple of ways of looking at the results
summary(data)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -3.2819  0.8443  3.1912  5.5346 10.0727 13.1630

plot(density(data))