library(faux)

## 
## ************
## Welcome to faux. For support and examples visit:
## https://debruine.github.io/faux/
## - Get and set global package options with: faux_options()
## ************

library(plyr)
library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.1     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x purrr::%||%()      masks faux::%||%()
## x dplyr::arrange()   masks plyr::arrange()
## x purrr::compact()   masks plyr::compact()
## x dplyr::count()     masks plyr::count()
## x dplyr::failwith()  masks plyr::failwith()
## x dplyr::filter()    masks stats::filter()
## x dplyr::id()        masks plyr::id()
## x dplyr::lag()       masks stats::lag()
## x dplyr::mutate()    masks plyr::mutate()
## x dplyr::rename()    masks plyr::rename()
## x dplyr::summarise() masks plyr::summarise()
## x dplyr::summarize() masks plyr::summarize()

library(broom)
library(ggfortify)

set.seed(2021)
df <- rnorm_multi(n=120, vars= 2, mu = c(3, 6), sd= 1, 
                r = 0.75,
                varnames = c("rto","DPM"))

modelo1 <- lm(df$rto ~ df$DPM)
summary(modelo1)

## 
## Call:
## lm(formula = df$rto ~ df$DPM)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.47993 -0.43431 -0.01394  0.39889  1.72347 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -1.4961     0.3431  -4.361 2.78e-05 ***
## df$DPM        0.7360     0.0578  12.735  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6609 on 118 degrees of freedom
## Multiple R-squared:  0.5788, Adjusted R-squared:  0.5753 
## F-statistic: 162.2 on 1 and 118 DF,  p-value: < 2.2e-16

sqrt(1 - summary(modelo1)$adj.r.squared) * sd(df$rto)

## [1] 0.6608723

modelo1.diag <- augment(modelo1)
modelo1.diag

## # A tibble: 120 x 8
##    `df$rto` `df$DPM` .fitted .resid    .hat .sigma  .cooksd .std.resid
##       <dbl>    <dbl>   <dbl>  <dbl>   <dbl>  <dbl>    <dbl>      <dbl>
##  1     2.62     6.15    3.03 -0.413 0.00906  0.663 0.00180      -0.628
##  2     3.63     6.41    3.22  0.408 0.0108   0.663 0.00209       0.620
##  3     3.94     5.71    2.71  1.23  0.00846  0.654 0.0149        1.87 
##  4     4.09     5.58    2.61  1.48  0.00886  0.649 0.0227        2.25 
##  5     3.00     7.68    4.15 -1.15  0.0341   0.655 0.0553       -1.77 
##  6     1.03     4.37    1.72 -0.692 0.0249   0.661 0.0144       -1.06 
##  7     2.86     6.63    3.38 -0.527 0.0131   0.662 0.00427      -0.802
##  8     3.75     6.96    3.63  0.120 0.0179   0.664 0.000306      0.183
##  9     2.65     6.37    3.19 -0.540 0.0105   0.662 0.00357      -0.821
## 10     3.75     8.49    4.75 -0.999 0.0618   0.657 0.0802       -1.56 
## # ... with 110 more rows

Supuestos del modelo

ggplot(modelo1.diag, aes(df$rto, df$DPM)) +
  geom_point() +
  stat_smooth(method = lm, se = FALSE) +
  geom_segment(aes(xend = df$rto, yend = .fitted), color = "red", size = 0.3)

## `geom_smooth()` using formula 'y ~ x'

autoplot(modelo1)

Error estandar por simulacion montecarlo

df_cor <- data.frame(Correlacion = numeric())

for(i in (1:20)){
    sim   <- as.data.frame(rnorm_multi(n=120, vars= 2, mu = c(3, 6), sd= 1,r = 0.75))
    df_cor[i,] <- cor(sim[1], sim[2])
}
sd(df_cor$Correlacion)/sqrt(length(df_cor$Correlacion))

## [1] 0.007479929