library(readxl)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.6     v purrr   0.3.4
## v tibble  3.1.3     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(moderndive)
library(skimr)

# **EJERCICIO CIRCULOS CONCENTRICOS**
# **4 sensores torres unicados en lugares diferentes**

df <- read_excel("C:/Mediciones/Circulos_Concentricos.xlsx")

View(df)

glimpse(df)
## Rows: 92
## Columns: 6
## $ num    <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, ~
## $ fecha  <dttm> 2021-02-25 16:00:00, 2021-02-25 17:00:00, 2021-02-25 18:00:00,~
## $ estfer <dbl> 2.76, 2.61, 2.92, 3.12, 3.05, 4.35, 4.85, 4.93, 3.42, 5.38, 4.4~
## $ torres <dbl> 2.00, 2.35, 2.79, 3.43, 3.42, 5.00, 4.79, 4.66, 3.89, 6.13, 4.9~
## $ ponte  <dbl> 3.14, 3.21, 3.30, 3.98, 3.51, 5.02, 4.61, 4.80, 4.15, 5.93, 5.1~
## $ sml    <dbl> 3.72, 3.26, 4.67, 4.66, 7.75, 8.66, 9.63, 7.97, 6.89, 5.57, 5.4~
df %>%
  sample_n(size = 10)
## # A tibble: 10 x 6
##      num fecha               estfer torres ponte   sml
##    <dbl> <dttm>               <dbl>  <dbl> <dbl> <dbl>
##  1    57 2021-02-28 00:00:00  10.0   12.2  11.4  11.1 
##  2    58 2021-02-28 01:00:00   4.31   5.4   8.42  9.19
##  3    54 2021-02-27 21:00:00  11.0   13.6  11.2  14.0 
##  4    73 2021-02-28 16:00:00   7.14   8.85  6.65  5.77
##  5    35 2021-02-27 02:00:00   5.35   5.59  5.72  7.94
##  6    10 2021-02-26 01:00:00   5.38   6.13  5.93  5.57
##  7    69 2021-02-28 12:00:00   1.84   1.98  2.34  2.81
##  8     3 2021-02-25 18:00:00   2.92   2.79  3.3   4.67
##  9    87 2021-03-01 06:00:00  11.3   14.5  12.7  10.7 
## 10    66 2021-02-28 09:00:00   1.93   1.69  1.42  3.46
fig <- plot_ly(df, x = ~num, y = ~estfer, name = 'PM2.5 Estacion Ferias', type = 'scatter', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~torres, name = 'PM2.5 Torres del Sol', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~ponte, name = 'PM2.5 Pontenovo', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~sml, name = 'PM2.5 Santa Maria del Lago', mode = 'lines+markers') 
fig
#Caso 1: ferias VS torres

df %>% select(estfer, torres) %>% skim()
Data summary
Name Piped data
Number of rows 92
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
estfer 0 1 7.00 4.91 0.42 3.10 6.17 10.48 22.62 ▇▅▅▁▁
torres 0 1 7.78 5.65 0.06 3.43 6.12 12.31 25.16 ▇▅▅▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = estfer ~ torres)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.982
ggplot(df, aes(x = estfer, y = torres)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 estfer", y = "PM25 torres",
       title = "Relationship between ferias and torres") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(torres ~ estfer, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept   -0.115     0.195    -0.593   0.554   -0.502    0.271
## 2 estfer       1.13      0.023    49.5     0        1.08     1.17
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 92 x 5
##       ID torres estfer torres_hat residual
##    <int>  <dbl>  <dbl>      <dbl>    <dbl>
##  1     1   2      2.76       3.00   -0.999
##  2     2   2.35   2.61       2.83   -0.479
##  3     3   2.79   2.92       3.18   -0.389
##  4     4   3.43   3.12       3.40    0.025
##  5     5   3.42   3.05       3.33    0.094
##  6     6   5      4.35       4.79    0.208
##  7     7   4.79   4.85       5.36   -0.567
##  8     8   4.66   4.93       5.45   -0.787
##  9     9   3.89   3.42       3.74    0.147
## 10    10   6.13   5.38       5.96    0.175
## # ... with 82 more rows
#Caso 2: ferias VS ponte

df %>% select(estfer, ponte) %>% skim()
Data summary
Name Piped data
Number of rows 92
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
estfer 0 1 7.00 4.91 0.42 3.10 6.17 10.48 22.62 ▇▅▅▁▁
ponte 0 1 7.56 5.16 0.40 3.63 6.64 11.25 25.25 ▇▇▃▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = estfer ~ ponte)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.964
ggplot(df, aes(x = estfer, y = ponte)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 estfer", y = "PM25 ponte",
       title = "Relationship between ferias and ponte") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(ponte ~ estfer, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept     0.48     0.253      1.90   0.061   -0.023    0.982
## 2 estfer        1.01     0.03      34.2    0        0.953    1.07
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 92 x 5
##       ID ponte estfer ponte_hat residual
##    <int> <dbl>  <dbl>     <dbl>    <dbl>
##  1     1  3.14   2.76      3.27   -0.133
##  2     2  3.21   2.61      3.12    0.089
##  3     3  3.3    2.92      3.43   -0.134
##  4     4  3.98   3.12      3.64    0.343
##  5     5  3.51   3.05      3.57   -0.056
##  6     6  5.02   4.35      4.88    0.138
##  7     7  4.61   4.85      5.39   -0.778
##  8     8  4.8    4.93      5.47   -0.669
##  9     9  4.15   3.42      3.94    0.21 
## 10    10  5.93   5.38      5.92    0.006
## # ... with 82 more rows
#Caso 3: estacion ferias VS sml

df %>% select(estfer, sml) %>% skim()
Data summary
Name Piped data
Number of rows 92
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
estfer 0 1 7.00 4.91 0.42 3.10 6.17 10.48 22.62 ▇▅▅▁▁
sml 0 1 7.68 4.66 1.16 3.98 6.99 10.52 23.53 ▇▆▃▂▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = estfer ~ sml)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.895
ggplot(df, aes(x = estfer, y = sml)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 estfer", y = "PM25 sml",
       title = "Relationship between ferias and sml") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(sml ~ estfer, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    1.74      0.38       4.59       0     0.99    2.50 
## 2 estfer       0.848     0.044     19.1        0     0.76    0.937
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 92 x 5
##       ID   sml estfer sml_hat residual
##    <int> <dbl>  <dbl>   <dbl>    <dbl>
##  1     1  3.72   2.76    4.08   -0.365
##  2     2  3.26   2.61    3.96   -0.698
##  3     3  4.67   2.92    4.22    0.449
##  4     4  4.66   3.12    4.39    0.269
##  5     5  7.75   3.05    4.33    3.42 
##  6     6  8.66   4.35    5.43    3.23 
##  7     7  9.63   4.85    5.86    3.77 
##  8     8  7.97   4.93    5.93    2.04 
##  9     9  6.89   3.42    4.64    2.24 
## 10    10  5.57   5.38    6.31   -0.738
## # ... with 82 more rows
#Caso 4: torres VS pontenovo

df %>% select(torres, ponte) %>% skim()
Data summary
Name Piped data
Number of rows 92
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
torres 0 1 7.78 5.65 0.06 3.43 6.12 12.31 25.16 ▇▅▅▁▁
ponte 0 1 7.56 5.16 0.40 3.63 6.64 11.25 25.25 ▇▇▃▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = torres ~ ponte)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.968
ggplot(df, aes(x = torres, y = ponte)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 torres", y = "PM25 ponte",
       title = "Relationship between torres and pontenovo") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(ponte ~ torres, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    0.676     0.232      2.92   0.004    0.215    1.14 
## 2 torres       0.885     0.024     36.6    0        0.837    0.933
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 92 x 5
##       ID ponte torres ponte_hat residual
##    <int> <dbl>  <dbl>     <dbl>    <dbl>
##  1     1  3.14   2         2.45    0.694
##  2     2  3.21   2.35      2.76    0.454
##  3     3  3.3    2.79      3.14    0.155
##  4     4  3.98   3.43      3.71    0.268
##  5     5  3.51   3.42      3.70   -0.193
##  6     6  5.02   5         5.10   -0.081
##  7     7  4.61   4.79      4.92   -0.305
##  8     8  4.8    4.66      4.8     0    
##  9     9  4.15   3.89      4.12    0.031
## 10    10  5.93   6.13      6.10   -0.171
## # ... with 82 more rows
#Caso 9: torres VS sml

df %>% select(torres, sml) %>% skim()
Data summary
Name Piped data
Number of rows 92
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
torres 0 1 7.78 5.65 0.06 3.43 6.12 12.31 25.16 ▇▅▅▁▁
sml 0 1 7.68 4.66 1.16 3.98 6.99 10.52 23.53 ▇▆▃▂▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = torres ~ sml)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.890
ggplot(df, aes(x = torres, y = sml)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 torres", y = "PM25 sml",
       title = "Relationship between torres and sml") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(sml ~ torres, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    1.97      0.381      5.18       0    1.22     2.73 
## 2 torres       0.734     0.04      18.5        0    0.655    0.813
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 92 x 5
##       ID   sml torres sml_hat residual
##    <int> <dbl>  <dbl>   <dbl>    <dbl>
##  1     1  3.72   2       3.44    0.281
##  2     2  3.26   2.35    3.70   -0.435
##  3     3  4.67   2.79    4.02    0.652
##  4     4  4.66   3.43    4.49    0.172
##  5     5  7.75   3.42    4.48    3.27 
##  6     6  8.66   5       5.64    3.02 
##  7     7  9.63   4.79    5.49    4.14 
##  8     8  7.97   4.66    5.39    2.58 
##  9     9  6.89   3.89    4.82    2.06 
## 10    10  5.57   6.13    6.47   -0.899
## # ... with 82 more rows
#Caso 10: pontenovo VS sml

df %>% select(ponte, sml) %>% skim()
Data summary
Name Piped data
Number of rows 92
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
ponte 0 1 7.56 5.16 0.40 3.63 6.64 11.25 25.25 ▇▇▃▁▁
sml 0 1 7.68 4.66 1.16 3.98 6.99 10.52 23.53 ▇▆▃▂▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = ponte ~ sml)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.924
ggplot(df, aes(x = ponte, y = sml)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 ponte", y = "PM25 sml",
       title = "Relationship between pontenovo and sml") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(sml ~ ponte, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    1.38      0.332      4.15       0    0.717    2.04 
## 2 ponte        0.834     0.036     23.0        0    0.762    0.906
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 92 x 5
##       ID   sml ponte sml_hat residual
##    <int> <dbl> <dbl>   <dbl>    <dbl>
##  1     1  3.72  3.14    3.99   -0.274
##  2     2  3.26  3.21    4.05   -0.792
##  3     3  4.67  3.3     4.13    0.543
##  4     4  4.66  3.98    4.69   -0.034
##  5     5  7.75  3.51    4.30    3.45 
##  6     6  8.66  5.02    5.56    3.10 
##  7     7  9.63  4.61    5.22    4.41 
##  8     8  7.97  4.8     5.38    2.59 
##  9     9  6.89  4.15    4.84    2.05 
## 10    10  5.57  5.93    6.32   -0.75 
## # ... with 82 more rows