## Warning: package 'ggplot2' was built under R version 3.4.2
## Warning: package 'dplyr' was built under R version 3.4.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Warning: package 'mosaic' was built under R version 3.4.2
## Loading required package: lattice
## Loading required package: ggformula
## Warning: package 'ggformula' was built under R version 3.4.2
## 
## New to ggformula?  Try the tutorials: 
##  learnr::run_tutorial("introduction", package = "ggformula")
##  learnr::run_tutorial("refining", package = "ggformula")
## Loading required package: mosaicData
## Warning: package 'mosaicData' was built under R version 3.4.2
## Loading required package: Matrix
## 
## The 'mosaic' package masks several functions from core packages in order to add 
## additional features.  The original behavior of these functions should not be affected by this.
## 
## Note: If you use the Matrix package, be sure to load it BEFORE loading mosaic.
## 
## Attaching package: 'mosaic'
## The following object is masked from 'package:Matrix':
## 
##     mean
## The following objects are masked from 'package:dplyr':
## 
##     count, do, tally
## The following objects are masked from 'package:stats':
## 
##     binom.test, cor, cor.test, cov, fivenum, IQR, median,
##     prop.test, quantile, sd, t.test, var
## The following objects are masked from 'package:base':
## 
##     max, mean, min, prod, range, sample, sum
##                   X  mpg cyl disp  hp drat    wt  qsec vs am gear carb
## 1         Mazda RX4 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## 2     Mazda RX4 Wag 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## 3        Datsun 710 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## 4    Hornet 4 Drive 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## 5 Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## 6           Valiant 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
p <-ggplot(mtcars, aes(x= mpg, y=wt)) +
  geom_point(color= "blue")+
  stat_smooth(method = "lm",col="red")

p

fit <- lm(wt ~ mpg, data = mtcars)
summary(fit)
## 
## Call:
## lm(formula = wt ~ mpg, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6516 -0.3490 -0.1381  0.3190  1.3684 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.04726    0.30869  19.590  < 2e-16 ***
## mpg         -0.14086    0.01474  -9.559 1.29e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4945 on 30 degrees of freedom
## Multiple R-squared:  0.7528, Adjusted R-squared:  0.7446 
## F-statistic: 91.38 on 1 and 30 DF,  p-value: 1.294e-10
par(mfrow = c(2,2))
plot(fit)

par(mfrow = c(1,1))
cor_base <- cor(x = mtcars$mpg, y = mtcars$wt)
cor_base
## [1] -0.8676594
fit <- lm(wt ~ mpg, data = mtcars)  # Fit the model
summary(fit)  # Report the results
## 
## Call:
## lm(formula = wt ~ mpg, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6516 -0.3490 -0.1381  0.3190  1.3684 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.04726    0.30869  19.590  < 2e-16 ***
## mpg         -0.14086    0.01474  -9.559 1.29e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4945 on 30 degrees of freedom
## Multiple R-squared:  0.7528, Adjusted R-squared:  0.7446 
## F-statistic: 91.38 on 1 and 30 DF,  p-value: 1.294e-10
par(mfrow = c(2, 2))  # Split the plotting panel into a 2 x 2 grid
plot(fit)  # Plot the model information

par(mfrow = c(1, 1))  # Return plotting panel to 1 section
cor_base <- cor(x = mtcars$mpg,y = mtcars$wt)
cor_base
## [1] -0.8676594
d <- mtcars
fit <- lm(wt ~ mpg, data = d)
d$predicted <- predict(fit)
d$residuals <- residuals(fit)

d%>% select(mpg, predicted, residuals) %>% head()
##                    mpg predicted   residuals
## Mazda RX4         21.0  3.089154 -0.46915365
## Mazda RX4 Wag     21.0  3.089154 -0.21415365
## Datsun 710        22.8  2.835602 -0.51560210
## Hornet 4 Drive    21.4  3.032809  0.18219114
## Hornet Sportabout 18.7  3.413136  0.02686382
## Valiant           18.1  3.497653 -0.03765336
ggplot(d, aes(x= predicted, y=residuals)) +
  geom_point(colors = "blue") +
  geom_hline(yintercept = 0, color = "red")
## Warning: Ignoring unknown parameters: colors

d$shuffle_wt <- shuffle(d$wt)
d %>% select(wt, shuffle_wt) %>% head()
##                      wt shuffle_wt
## Mazda RX4         2.620      1.835
## Mazda RX4 Wag     2.875      2.875
## Datsun 710        2.320      1.935
## Hornet 4 Drive    3.215      3.730
## Hornet Sportabout 3.440      2.465
## Valiant           3.460      3.840
fit <- lm(shuffle_wt ~ mpg, data = d)
cor(x=d$mpg, y = d$shuffle_wt)
## [1] -0.3575765
ntrials <- 500
cor_i <- rep()
for(i in 1:ntrials){
  cor_i[i] <- cor(x= d$mpg, y = shuffle(d$wt))
                  
  }

table (abs(cor_i) > abs(cor_base))
## 
## FALSE 
##   500
table (abs(cor_i) > 0.1)
## 
## FALSE  TRUE 
##   194   306
p2 <-ggplot(mtcars, aes(x= gear, y= carb)) +
  geom_point(color= "blue") +
  stat_smooth(method = "lm", col="red")

p2

fit <- lm(wt ~ mpg, data = mtcars)  # Fit the model
summary(fit)  # Report the results
## 
## Call:
## lm(formula = wt ~ mpg, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6516 -0.3490 -0.1381  0.3190  1.3684 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.04726    0.30869  19.590  < 2e-16 ***
## mpg         -0.14086    0.01474  -9.559 1.29e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4945 on 30 degrees of freedom
## Multiple R-squared:  0.7528, Adjusted R-squared:  0.7446 
## F-statistic: 91.38 on 1 and 30 DF,  p-value: 1.294e-10
par(mfrow = c(2, 2))  # Split the plotting panel into a 2 x 2 grid
plot(fit)  # Plot the model information

par(mfrow = c(1, 1))  # Return plotting panel to 1 section
cor_base <- cor(x = mtcars$mpg,y = mtcars$wt)
cor_base
## [1] -0.8676594
ntrials <- 1000
cor_i <- rep()
for(i in 1: ntrials){
  
  cor_i[i] <- cor(x = mtcars$gear, y =shuffle(mtcars$carb))
}