df<-data.frame(a=cumsum(rnorm(n = 100,mean = 0,sd = 1)),b=cumsum(rnorm(n = 100,mean = 0,sd = 1)))
plot(df$a,type="l")
plot(df$b,type="l")
df$b <- ifelse(df$b<0,0,df$b)
res_1 <- lm(formula = b~a,data = df)
summary(res_1)
##
## Call:
## lm(formula = b ~ a, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.5352 -0.8150 -0.2839 0.8532 2.6118
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.36341 0.14268 9.556 1.12e-15 ***
## a -0.11078 0.02719 -4.074 9.39e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.104 on 98 degrees of freedom
## Multiple R-squared: 0.1448, Adjusted R-squared: 0.1361
## F-statistic: 16.6 on 1 and 98 DF, p-value: 9.392e-05
rpois_r<- function(x){
return(rpois(1,x))
}
df$c <- sapply(df$b, rpois_r)
あたりまえだけどcはaに強い相関がある。
res_2 <- glm(formula = c~a,data = df,family=poisson(link="identity"))
summary(res_2)
##
## Call:
## glm(formula = c ~ a, family = poisson(link = "identity"), data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8317 -1.5062 -0.9487 0.4556 3.5778
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.32752 0.14724 9.016 < 2e-16 ***
## a -0.10450 0.02111 -4.951 7.38e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 238.22 on 99 degrees of freedom
## Residual deviance: 215.67 on 98 degrees of freedom
## AIC: 317.51
##
## Number of Fisher Scoring iterations: 4
library(broom)
## Warning: package 'broom' was built under R version 3.5.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
fun_sim <- function(){
df<-data.frame(a=cumsum(rnorm(n = 100,mean = 0,sd = 1)),b=cumsum(rnorm(n = 100,mean = 0,sd = 1)))
# df$b <- ifelse(df$b<0,0,df$b)
res_1 <- lm(formula = b~a,data = df)
res_1 <- broom::tidy(res_1) %>%filter(term=="a")
return(res_1)
}
res_tab <- fun_sim()
for(i in 1:10000){
res_tab <- dplyr::bind_rows(res_tab,fun_sim())
}
res_tab %>% pull(estimate) %>% hist()
res_tab %>% pull(p.value) %>% hist()
res_tab %>% mutate(significant = p.value <= 0.05) %>% group_by(significant) %>% summarise(n = n())
## # A tibble: 2 x 2
## significant n
## <lgl> <int>
## 1 FALSE 2431
## 2 TRUE 7570