library(readxl)
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.2
## v tidyr   1.1.1     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(moderndive)
library(skimr)

# **ESTACION PAIBA VS CANAIRIOS**
# **5 sensores diferentes: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
# **Comparaciones entre sensores de bajo costo y la estación de referencia Paiba*

df <- read_excel("C:/Mediciones/PAIBA_CANAIRIOS.xlsx")
View(df)

glimpse(df)
## Rows: 1,241
## Columns: 8
## $ Num       <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha     <dttm> 2020-11-07 00:00:00, 2020-11-07 01:00:00, 2020-11-07 02:...
## $ Oficial   <dbl> 15.99, 12.09, 10.31, 13.64, 16.51, 18.62, 18.97, 22.07, 2...
## $ PMS7003   <dbl> 33.574074, 27.057692, 22.096154, 37.192308, 46.180000, 47...
## $ PMSA003   <dbl> 31.666667, 25.134615, 20.115385, 35.923077, 44.300000, 47...
## $ HPMA115S0 <dbl> 19.333333, 14.730769, 12.230769, 19.750000, 24.360000, 25...
## $ SPS30     <dbl> 17.185185, 14.115385, 11.634615, 19.442308, 23.260000, 24...
## $ SNGCJA5   <dbl> 13.259259, 10.634615, 8.673077, 14.846154, 17.900000, 19....
df %>%
  sample_n(size = 10)
## # A tibble: 10 x 8
##      Num Fecha               Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
##    <dbl> <dttm>                <dbl>   <dbl>   <dbl>     <dbl> <dbl>   <dbl>
##  1   118 2020-11-11 21:00:00    7.69   12.6    11.8       6.68  6.75    5.04
##  2   870 2020-12-14 13:00:00   20.2     6.82    5.4       4.65  3.73    2.16
##  3   666 2020-12-06 01:00:00   14.4    10.2     9.7       7.07  6.67    4.47
##  4   220 2020-11-16 03:00:00    5.11    8.32    6.94      5.26  5.53    3.49
##  5  1064 2020-12-22 15:00:00   27.4    46.7    43.4      23.2  22.7    16.2 
##  6   256 2020-11-17 15:00:00    8.72   15.6    10.7       8.61  7.20    4.83
##  7    24 2020-11-07 23:00:00    8.65   12.9    12.7       8.41  8.06    5.70
##  8   186 2020-11-14 17:00:00   15.9    37.3    41.7      20.2  20.3    16.5 
##  9  1018 2020-12-20 17:00:00    8.71    3.71    2.20      3.11  2.64    1.14
## 10   167 2020-11-13 22:00:00   14.7    28.3    31.6      15.3  15      12.5
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 1: SNGCJA5 VS SPS30

df %>% select(SNGCJA5, SPS30) %>% skim()
Data summary
Name Piped data
Number of rows 1241
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SNGCJA5 0 1 8.87 6.48 0.05 3.80 7.47 12.32 40.80 ▇▅▂▁▁
SPS30 0 1 12.25 8.36 1.03 5.78 10.41 16.59 51.67 ▇▅▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SNGCJA5 ~ SPS30)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.995
ggplot(df, aes(x = SNGCJA5, y = SPS30)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SNGCJA5", y = "PM25 SPS30",
       title = "Relationship between SNGCJA5 and SPS30") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(SPS30 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    0.875     0.042      20.9       0    0.793    0.957
## 2 SNGCJA5      1.28      0.004     336.        0    1.27     1.29
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
##       ID SPS30 SNGCJA5 SPS30_hat residual
##    <int> <dbl>   <dbl>     <dbl>    <dbl>
##  1     1  17.2   13.3       17.9   -0.688
##  2     2  14.1   10.6       14.5   -0.393
##  3     3  11.6    8.67      12.0   -0.359
##  4     4  19.4   14.8       19.9   -0.465
##  5     5  23.3   17.9       23.8   -0.563
##  6     6  25.0   19.3       25.7   -0.679
##  7     7  23.9   18.4       24.5   -0.56 
##  8     8  27.5   22.1       29.2   -1.76 
##  9     9  28.4   22.9       30.2   -1.75 
## 10    10  39.4   30.7       40.3   -0.885
## # ... with 1,231 more rows
#Caso 2: SNGCJA5 VS HPMA115S0

df %>% select(SNGCJA5, HPMA115S0) %>% skim()
Data summary
Name Piped data
Number of rows 1241
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SNGCJA5 0 1 8.87 6.48 0.05 3.80 7.47 12.32 40.80 ▇▅▂▁▁
HPMA115S0 0 1 12.88 8.60 1.35 6.15 11.05 17.25 51.89 ▇▅▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SNGCJA5 ~ HPMA115S0)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.990
ggplot(df, aes(x = SNGCJA5, y = HPMA115S0)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SNGCJA5", y = "PM25 HPMA115S0",
       title = "Relationship between SNGCJA5 and HPMA115S0") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept     1.24     0.059      20.9       0     1.12     1.36
## 2 SNGCJA5       1.31     0.005     243.        0     1.30     1.32
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
##       ID HPMA115S0 SNGCJA5 HPMA115S0_hat residual
##    <int>     <dbl>   <dbl>         <dbl>    <dbl>
##  1     1      19.3   13.3           18.6    0.688
##  2     2      14.7   10.6           15.2   -0.469
##  3     3      12.2    8.67          12.6   -0.394
##  4     4      19.8   14.8           20.7   -0.978
##  5     5      24.4   17.9           24.7   -0.377
##  6     6      25     19.3           26.6   -1.62 
##  7     7      24.5   18.4           25.4   -0.926
##  8     8      29.3   22.1           30.3   -0.987
##  9     9      30.8   22.9           31.3   -0.447
## 10    10      42.3   30.7           41.6    0.698
## # ... with 1,231 more rows
#Caso 3: SNGCJA5 VS PMSA003

df %>% select(SNGCJA5, PMSA003) %>% skim()
Data summary
Name Piped data
Number of rows 1241
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SNGCJA5 0 1 8.87 6.48 0.05 3.80 7.47 12.32 40.80 ▇▅▂▁▁
PMSA003 0 1 22.22 17.04 0.04 8.44 18.74 31.61 108.45 ▇▅▁▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SNGCJA5 ~ PMSA003)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.996
ggplot(df, aes(x = SNGCJA5, y = PMSA003)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SNGCJA5", y = "PM25 PMSA003",
       title = "Relationship between SNGCJA5 and PMSA003") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept   -0.991     0.074     -13.3       0    -1.14   -0.845
## 2 SNGCJA5      2.62      0.007     387.        0     2.60    2.63
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
##       ID PMSA003 SNGCJA5 PMSA003_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1    31.7   13.3         33.7    -2.04
##  2     2    25.1   10.6         26.8    -1.70
##  3     3    20.1    8.67        21.7    -1.59
##  4     4    35.9   14.8         37.9    -1.93
##  5     5    44.3   17.9         45.8    -1.55
##  6     6    47.1   19.3         49.6    -2.49
##  7     7    42.2   18.4         47.2    -5.00
##  8     8    52.0   22.1         56.9    -4.90
##  9     9    55.0   22.9         58.9    -3.87
## 10    10    75.5   30.7         79.4    -3.94
## # ... with 1,231 more rows
#Caso 4: SNGCJA5 VS PMS7003

df %>% select(SNGCJA5, PMS7003) %>% skim()
Data summary
Name Piped data
Number of rows 1241
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SNGCJA5 0 1 8.87 6.48 0.05 3.80 7.47 12.32 40.80 ▇▅▂▁▁
PMS7003 0 1 22.17 15.69 0.70 9.64 19.25 30.73 93.25 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SNGCJA5 ~ PMS7003)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.987
ggplot(df, aes(x = SNGCJA5, y = PMS7003)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SNGCJA5", y = "PM25 PMS7003",
       title = "Relationship between SNGCJA5 and PMS7003") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    0.984     0.122      8.08       0    0.745     1.22
## 2 SNGCJA5      2.39      0.011    215.         0    2.37      2.41
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
##       ID PMS7003 SNGCJA5 PMS7003_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1    33.6   13.3         32.7    0.92 
##  2     2    27.1   10.6         26.4    0.673
##  3     3    22.1    8.67        21.7    0.396
##  4     4    37.2   14.8         36.4    0.748
##  5     5    46.2   17.9         43.7    2.44 
##  6     6    47.7   19.3         47.2    0.561
##  7     7    44.4   18.4         45.0   -0.576
##  8     8    52.9   22.1         53.8   -0.889
##  9     9    54.0   22.9         55.6   -1.65 
## 10    10    73.7   30.7         74.4   -0.667
## # ... with 1,231 more rows
#Caso 5: SNGCJA5 VS Oficial

df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1241
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SNGCJA5 0 1 8.87 6.48 0.05 3.80 7.47 12.32 40.80 ▇▅▂▁▁
Oficial 0 1 15.38 8.41 2.06 9.51 13.39 19.63 62.22 ▇▅▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.564
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
       title = "Relationship between SNGCJA5 and Oficial") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    8.88      0.334      26.6       0    8.22     9.54 
## 2 SNGCJA5      0.732     0.03       24.1       0    0.673    0.792
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
##       ID Oficial SNGCJA5 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1    16.0   13.3         18.6    -2.6 
##  2     2    12.1   10.6         16.7    -4.58
##  3     3    10.3    8.67        15.2    -4.92
##  4     4    13.6   14.8         19.8    -6.11
##  5     5    16.5   17.9         22.0    -5.48
##  6     6    18.6   19.3         23.0    -4.42
##  7     7    19.0   18.4         22.4    -3.40
##  8     8    22.1   22.1         25.1    -3.00
##  9     9    22.6   22.9         25.6    -3.04
## 10    10    28.7   30.7         31.4    -2.67
## # ... with 1,231 more rows
#Caso 6: SPS30 VS HPMA115S0

df %>% select(SPS30, HPMA115S0) %>% skim()
Data summary
Name Piped data
Number of rows 1241
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SPS30 0 1 12.25 8.36 1.03 5.78 10.41 16.59 51.67 ▇▅▂▁▁
HPMA115S0 0 1 12.88 8.60 1.35 6.15 11.05 17.25 51.89 ▇▅▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SPS30 ~ HPMA115S0)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.994
ggplot(df, aes(x = SPS30, y = HPMA115S0)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SPS30", y = "PM25 HPMA115S0",
       title = "Relationship between SPS30 and HPMA115S0") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    0.357     0.047      7.56       0    0.265     0.45
## 2 SPS30        1.02      0.003    321.         0    1.02      1.03
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
##       ID HPMA115S0 SPS30 HPMA115S0_hat residual
##    <int>     <dbl> <dbl>         <dbl>    <dbl>
##  1     1      19.3  17.2          17.9    1.40 
##  2     2      14.7  14.1          14.8   -0.065
##  3     3      12.2  11.6          12.3   -0.027
##  4     4      19.8  19.4          20.2   -0.494
##  5     5      24.4  23.3          24.1    0.211
##  6     6      25    25.0          25.9   -0.91 
##  7     7      24.5  23.9          24.8   -0.341
##  8     8      29.3  27.5          28.4    0.83 
##  9     9      30.8  28.4          29.5    1.36 
## 10    10      42.3  39.4          40.6    1.63 
## # ... with 1,231 more rows
#Caso 7: SPS30 VS PMSA003

df %>% select(SPS30, PMSA003) %>% skim()
Data summary
Name Piped data
Number of rows 1241
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SPS30 0 1 12.25 8.36 1.03 5.78 10.41 16.59 51.67 ▇▅▂▁▁
PMSA003 0 1 22.22 17.04 0.04 8.44 18.74 31.61 108.45 ▇▅▁▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SPS30 ~ PMSA003)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.993
ggplot(df, aes(x = SPS30, y = PMSA003)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SPS30", y = "PM25 PMSA003",
       title = "Relationship between SPS30 and PMSA003") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    -2.57     0.1       -25.7       0    -2.77    -2.38
## 2 SPS30         2.02     0.007     300.        0     2.01     2.04
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
##       ID PMSA003 SPS30 PMSA003_hat residual
##    <int>   <dbl> <dbl>       <dbl>    <dbl>
##  1     1    31.7  17.2        32.2   -0.552
##  2     2    25.1  14.1        26.0   -0.869
##  3     3    20.1  11.6        21.0   -0.866
##  4     4    35.9  19.4        36.8   -0.865
##  5     5    44.3  23.3        44.5   -0.217
##  6     6    47.1  25.0        48.0   -0.891
##  7     7    42.2  23.9        45.9   -3.67 
##  8     8    52.0  27.5        53.0   -1.06 
##  9     9    55.0  28.4        55.0   -0.031
## 10    10    75.5  39.4        77.2   -1.68 
## # ... with 1,231 more rows
#Caso 8: SPS30 VS PMS7003

df %>% select(SPS30, PMS7003) %>% skim()
Data summary
Name Piped data
Number of rows 1241
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SPS30 0 1 12.25 8.36 1.03 5.78 10.41 16.59 51.67 ▇▅▂▁▁
PMS7003 0 1 22.17 15.69 0.70 9.64 19.25 30.73 93.25 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SPS30 ~ PMS7003)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.991
ggplot(df, aes(x = SPS30, y = PMS7003)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SPS30", y = "PM25 PMS7003",
       title = "Relationship between SPS30 and PMS7003") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    -0.62     0.105     -5.91       0   -0.825   -0.414
## 2 SPS30         1.86     0.007    263.         0    1.85     1.88
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
##       ID PMS7003 SPS30 PMS7003_hat residual
##    <int>   <dbl> <dbl>       <dbl>    <dbl>
##  1     1    33.6  17.2        31.4    2.21 
##  2     2    27.1  14.1        25.6    1.41 
##  3     3    22.1  11.6        21.0    1.06 
##  4     4    37.2  19.4        35.6    1.63 
##  5     5    46.2  23.3        42.7    3.51 
##  6     6    47.7  25.0        45.9    1.85 
##  7     7    44.4  23.9        43.9    0.492
##  8     8    52.9  27.5        50.5    2.42 
##  9     9    54.0  28.4        52.3    1.65 
## 10    10    73.7  39.4        72.7    1.04 
## # ... with 1,231 more rows
#Caso 9: SPS30 VS Oficial

df %>% select(SPS30, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1241
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SPS30 0 1 12.25 8.36 1.03 5.78 10.41 16.59 51.67 ▇▅▂▁▁
Oficial 0 1 15.38 8.41 2.06 9.51 13.39 19.63 62.22 ▇▅▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.559
ggplot(df, aes(x = SPS30, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SPS30", y = "PM25 Oficial",
       title = "Relationship between SPS30 and Oficial") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    8.49      0.352      24.1       0    7.80     9.18 
## 2 SPS30        0.562     0.024      23.7       0    0.516    0.609
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
##       ID Oficial SPS30 Oficial_hat residual
##    <int>   <dbl> <dbl>       <dbl>    <dbl>
##  1     1    16.0  17.2        18.2    -2.16
##  2     2    12.1  14.1        16.4    -4.34
##  3     3    10.3  11.6        15.0    -4.72
##  4     4    13.6  19.4        19.4    -5.78
##  5     5    16.5  23.3        21.6    -5.06
##  6     6    18.6  25.0        22.5    -3.92
##  7     7    19.0  23.9        21.9    -2.97
##  8     8    22.1  27.5        23.9    -1.86
##  9     9    22.6  28.4        24.5    -1.89
## 10    10    28.7  39.4        30.6    -1.92
## # ... with 1,231 more rows
#Caso 10: HPMA115S0 VS PMSA003

df %>% select(HPMA115S0, PMSA003) %>% skim()
Data summary
Name Piped data
Number of rows 1241
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
HPMA115S0 0 1 12.88 8.60 1.35 6.15 11.05 17.25 51.89 ▇▅▂▁▁
PMSA003 0 1 22.22 17.04 0.04 8.44 18.74 31.61 108.45 ▇▅▁▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = HPMA115S0 ~ PMSA003)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.984
ggplot(df, aes(x = HPMA115S0, y = PMSA003)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 HPMA115S0", y = "PM25 PMSA003",
       title = "Relationship between HPMA115S0 and PMSA003") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    -2.89     0.156     -18.6       0    -3.20    -2.59
## 2 HPMA115S0     1.95     0.01      194.        0     1.93     1.97
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
##       ID PMSA003 HPMA115S0 PMSA003_hat residual
##    <int>   <dbl>     <dbl>       <dbl>    <dbl>
##  1     1    31.7      19.3        34.8   -3.12 
##  2     2    25.1      14.7        25.8   -0.685
##  3     3    20.1      12.2        20.9   -0.832
##  4     4    35.9      19.8        35.6    0.32 
##  5     5    44.3      24.4        44.6   -0.288
##  6     6    47.1      25          45.8    1.27 
##  7     7    42.2      24.5        44.8   -2.64 
##  8     8    52.0      29.3        54.2   -2.21 
##  9     9    55.0      30.8        57.2   -2.19 
## 10    10    75.5      42.3        79.5   -4.03 
## # ... with 1,231 more rows
#Caso 11: HPMA115S0 VS PMS7003

df %>% select(HPMA115S0, PMS7003) %>% skim()
Data summary
Name Piped data
Number of rows 1241
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
HPMA115S0 0 1 12.88 8.60 1.35 6.15 11.05 17.25 51.89 ▇▅▂▁▁
PMS7003 0 1 22.17 15.69 0.70 9.64 19.25 30.73 93.25 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = HPMA115S0 ~ PMS7003)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.990
ggplot(df, aes(x = HPMA115S0, y = PMS7003)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 HPMA115S0", y = "PM25 PMS7003",
       title = "Relationship between HPMA115S0 and PMS7003") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    -1.11     0.111     -10.0       0    -1.33   -0.895
## 2 HPMA115S0     1.81     0.007     253.        0     1.79    1.82
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
##       ID PMS7003 HPMA115S0 PMS7003_hat residual
##    <int>   <dbl>     <dbl>       <dbl>    <dbl>
##  1     1    33.6      19.3        33.8   -0.252
##  2     2    27.1      14.7        25.5    1.55 
##  3     3    22.1      12.2        21.0    1.11 
##  4     4    37.2      19.8        34.6    2.61 
##  5     5    46.2      24.4        42.9    3.27 
##  6     6    47.7      25          44.1    3.66 
##  7     7    44.4      24.5        43.1    1.25 
##  8     8    52.9      29.3        51.8    1.11 
##  9     9    54.0      30.8        54.6   -0.612
## 10    10    73.7      42.3        75.3   -1.57 
## # ... with 1,231 more rows
#Caso 10: HPMA115S0 VS Oficial

df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1241
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
HPMA115S0 0 1 12.88 8.60 1.35 6.15 11.05 17.25 51.89 ▇▅▂▁▁
Oficial 0 1 15.38 8.41 2.06 9.51 13.39 19.63 62.22 ▇▅▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.577
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
       title = "Relationship between HPMA115S0 and Oficial") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    8.11      0.352      23.0       0     7.42    8.80 
## 2 HPMA115S0    0.564     0.023      24.8       0     0.52    0.609
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
##       ID Oficial HPMA115S0 Oficial_hat residual
##    <int>   <dbl>     <dbl>       <dbl>    <dbl>
##  1     1    16.0      19.3        19.0    -3.02
##  2     2    12.1      14.7        16.4    -4.33
##  3     3    10.3      12.2        15.0    -4.70
##  4     4    13.6      19.8        19.2    -5.61
##  5     5    16.5      24.4        21.8    -5.34
##  6     6    18.6      25          22.2    -3.59
##  7     7    19.0      24.5        21.9    -2.95
##  8     8    22.1      29.3        24.6    -2.55
##  9     9    22.6      30.8        25.5    -2.9 
## 10    10    28.7      42.3        32.0    -3.24
## # ... with 1,231 more rows
#Caso 11: PMSA003 VS PMS7003

df %>% select(PMSA003, PMS7003) %>% skim()
Data summary
Name Piped data
Number of rows 1241
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
PMSA003 0 1 22.22 17.04 0.04 8.44 18.74 31.61 108.45 ▇▅▁▁▁
PMS7003 0 1 22.17 15.69 0.70 9.64 19.25 30.73 93.25 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = PMSA003 ~ PMS7003)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.988
ggplot(df, aes(x = PMSA003, y = PMS7003)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 PMSA003", y = "PM25 PMS7003",
       title = "Relationship between PMSA003 and PMS7003") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept     1.96     0.115      17.1       0    1.73     2.18 
## 2 PMSA003       0.91     0.004     222.        0    0.902    0.918
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
##       ID PMS7003 PMSA003 PMS7003_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1    33.6    31.7        30.8     2.81
##  2     2    27.1    25.1        24.8     2.24
##  3     3    22.1    20.1        20.3     1.84
##  4     4    37.2    35.9        34.6     2.56
##  5     5    46.2    44.3        42.3     3.92
##  6     6    47.7    47.1        44.8     2.91
##  7     7    44.4    42.2        40.3     4.05
##  8     8    52.9    52.0        49.2     3.68
##  9     9    54.0    55.0        52.0     1.99
## 10    10    73.7    75.5        70.6     3.09
## # ... with 1,231 more rows
#Caso 12: PMSA003 VS Oficial

df %>% select(PMSA003, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1241
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
PMSA003 0 1 22.22 17.04 0.04 8.44 18.74 31.61 108.45 ▇▅▁▁▁
Oficial 0 1 15.38 8.41 2.06 9.51 13.39 19.63 62.22 ▇▅▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.580
ggplot(df, aes(x = PMSA003, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 PMSA003", y = "PM25 Oficial",
       title = "Relationship between PMSA003 and Oficial") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    9.00      0.32       28.1       0    8.38     9.63 
## 2 PMSA003      0.287     0.011      25.1       0    0.264    0.309
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
##       ID Oficial PMSA003 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1    16.0    31.7        18.1    -2.09
##  2     2    12.1    25.1        16.2    -4.12
##  3     3    10.3    20.1        14.8    -4.46
##  4     4    13.6    35.9        19.3    -5.66
##  5     5    16.5    44.3        21.7    -5.20
##  6     6    18.6    47.1        22.5    -3.89
##  7     7    19.0    42.2        21.1    -2.13
##  8     8    22.1    52.0        23.9    -1.83
##  9     9    22.6    55.0        24.8    -2.18
## 10    10    28.7    75.5        30.6    -1.93
## # ... with 1,231 more rows
#Caso 13: PMS7003 VS Oficial

df %>% select(PMS7003, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1241
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
PMS7003 0 1 22.17 15.69 0.70 9.64 19.25 30.73 93.25 ▇▆▂▁▁
Oficial 0 1 15.38 8.41 2.06 9.51 13.39 19.63 62.22 ▇▅▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.574
ggplot(df, aes(x = PMS7003, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 PMS7003", y = "PM25 Oficial",
       title = "Relationship between PMS7003 and Oficial") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model, digits = 11)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    8.55     0.339       25.2       0    7.89     9.22 
## 2 PMS7003      0.308    0.0125      24.7       0    0.283    0.332
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
##       ID Oficial PMS7003 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1    16.0    33.6        18.9    -2.89
##  2     2    12.1    27.1        16.9    -4.79
##  3     3    10.3    22.1        15.4    -5.04
##  4     4    13.6    37.2        20.0    -6.36
##  5     5    16.5    46.2        22.8    -6.25
##  6     6    18.6    47.7        23.2    -4.62
##  7     7    19.0    44.4        22.2    -3.24
##  8     8    22.1    52.9        24.8    -2.76
##  9     9    22.6    54.0        25.2    -2.57
## 10    10    28.7    73.7        31.2    -2.52
## # ... with 1,231 more rows
# **ESTACION PAIBA NOVIEMBRE VS CANAIRIOS**
# **Prueba con los valores de la estacion Paiba solo de Noviembre
# **5 sensores diferentes: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
# **Comparaciones con la estación referencia*


df <- read_excel("C:/Mediciones/PAIBA_CANAIRIOS_NOV.xlsx")
View(df)

glimpse(df)
## Rows: 544
## Columns: 8
## $ Num       <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha     <dttm> 2020-11-07 00:00:00, 2020-11-07 01:00:00, 2020-11-07 02:...
## $ Oficial   <dbl> 15.99, 12.09, 10.31, 13.64, 16.51, 18.62, 18.97, 22.07, 2...
## $ PMS7003   <dbl> 33.574074, 27.057692, 22.096154, 37.192308, 46.180000, 47...
## $ PMSA003   <dbl> 31.666667, 25.134615, 20.115385, 35.923077, 44.300000, 47...
## $ HPMA115S0 <dbl> 19.333333, 14.730769, 12.230769, 19.750000, 24.360000, 25...
## $ SPS30     <dbl> 17.185185, 14.115385, 11.634615, 19.442308, 23.260000, 24...
## $ SNGCJA5   <dbl> 13.259259, 10.634615, 8.673077, 14.846154, 17.900000, 19....
df %>%
  sample_n(size = 10)
## # A tibble: 10 x 8
##      Num Fecha               Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
##    <dbl> <dttm>                <dbl>   <dbl>   <dbl>     <dbl> <dbl>   <dbl>
##  1   519 2020-11-29 22:00:00    4.21    3.33    1.65      3.98  2.31    1.02
##  2   349 2020-11-22 20:00:00   11.5    21.9    23.8      13.4  12.8     9.07
##  3    31 2020-11-08 06:00:00   11.4    22.1    24.6      13.9  13.2    10.2 
##  4   544 2020-11-30 23:00:00    7.74   11.0    10.1       7.06  6.44    4.33
##  5    62 2020-11-09 13:00:00   12.6    23.2    18.7      12.9  11.3     8.15
##  6    58 2020-11-09 09:00:00   14.0    37.5    39.1      17.8  18.9    15.3 
##  7   437 2020-11-26 12:00:00    7.66   13.0    12.2       7.91  7.58    4.74
##  8   273 2020-11-19 16:00:00   10.9    25.5    24.5      12    12.7     9.5 
##  9   253 2020-11-17 12:00:00   12.3    28.0    23.5      14.4  13.1     9.23
## 10    24 2020-11-07 23:00:00    8.65   12.9    12.7       8.41  8.06    5.70
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 14: SNGCJA5 VS Oficial-NOV

df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 544
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SNGCJA5 0 1 10.07 6.36 0.05 5.29 9.15 13.71 36.96 ▇▇▃▁▁
Oficial 0 1 11.63 5.26 2.06 7.79 11.01 14.70 33.11 ▅▇▃▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.962
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
       title = "Relationship between SNGCJA5 and PaibaNov") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    3.61      0.115      31.4       0    3.38     3.84 
## 2 SNGCJA5      0.797     0.01       82.5       0    0.778    0.816
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 544 x 5
##       ID Oficial SNGCJA5 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1    16.0   13.3         14.2    1.81 
##  2     2    12.1   10.6         12.1    0.003
##  3     3    10.3    8.67        10.5   -0.213
##  4     4    13.6   14.8         15.4   -1.80 
##  5     5    16.5   17.9         17.9   -1.37 
##  6     6    18.6   19.3         19.0   -0.4  
##  7     7    19.0   18.4         18.3    0.679
##  8     8    22.1   22.1         21.2    0.836
##  9     9    22.6   22.9         21.8    0.751
## 10    10    28.7   30.7         28.1    0.609
## # ... with 534 more rows
#Caso 15: SPS30 VS Oficial-NOV

df %>% select(SPS30, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 544
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SPS30 0 1 13.47 8.07 1.03 7.38 12.33 18.29 44.85 ▇▇▃▁▁
Oficial 0 1 11.63 5.26 2.06 7.79 11.01 14.70 33.11 ▅▇▃▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.955
ggplot(df, aes(x = SPS30, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SPS30", y = "PM25 Oficial",
       title = "Relationship between SPS30 and PaibaNov") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    3.24      0.13       24.9       0    2.98     3.50 
## 2 SPS30        0.623     0.008      75.1       0    0.607    0.639
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 544 x 5
##       ID Oficial SPS30 Oficial_hat residual
##    <int>   <dbl> <dbl>       <dbl>    <dbl>
##  1     1    16.0  17.2        13.9    2.04 
##  2     2    12.1  14.1        12.0    0.056
##  3     3    10.3  11.6        10.5   -0.179
##  4     4    13.6  19.4        15.4   -1.71 
##  5     5    16.5  23.3        17.7   -1.22 
##  6     6    18.6  25.0        18.8   -0.183
##  7     7    19.0  23.9        18.1    0.823
##  8     8    22.1  27.5        20.3    1.72 
##  9     9    22.6  28.4        21.0    1.63 
## 10    10    28.7  39.4        27.8    0.936
## # ... with 534 more rows
#Caso 16: HPMA115S0 VS Oficial-NOV

df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 544
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
HPMA115S0 0 1 14.32 8.53 1.42 8.05 13.04 18.8 51.08 ▇▇▂▁▁
Oficial 0 1 11.63 5.26 2.06 7.79 11.01 14.7 33.11 ▅▇▃▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.959
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
       title = "Relationship between HPMA115S0 and PaibaNov") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    3.16      0.126      25.2       0    2.91     3.41 
## 2 HPMA115S0    0.592     0.008      78.5       0    0.577    0.606
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 544 x 5
##       ID Oficial HPMA115S0 Oficial_hat residual
##    <int>   <dbl>     <dbl>       <dbl>    <dbl>
##  1     1    16.0      19.3        14.6    1.39 
##  2     2    12.1      14.7        11.9    0.215
##  3     3    10.3      12.2        10.4   -0.086
##  4     4    13.6      19.8        14.8   -1.20 
##  5     5    16.5      24.4        17.6   -1.06 
##  6     6    18.6      25          18.0    0.67 
##  7     7    19.0      24.5        17.6    1.32 
##  8     8    22.1      29.3        20.5    1.59 
##  9     9    22.6      30.8        21.4    1.2  
## 10    10    28.7      42.3        28.2    0.542
## # ... with 534 more rows
#Caso 17: PMSA003 VS Oficial-NOV

df %>% select(PMSA003, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 544
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
PMSA003 0 1 24.69 16.22 0.04 12.43 22.70 34.49 95.43 ▇▇▃▁▁
Oficial 0 1 11.63 5.26 2.06 7.79 11.01 14.70 33.11 ▅▇▃▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.954
ggplot(df, aes(x = PMSA003, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 PMSA003", y = "PM25 Oficial",
       title = "Relationship between PMSA003 and PaibaNov") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept     3.99     0.123      32.4       0    3.75     4.23 
## 2 PMSA003       0.31     0.004      74.3       0    0.301    0.318
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 544 x 5
##       ID Oficial PMSA003 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1    16.0    31.7        13.8    2.19 
##  2     2    12.1    25.1        11.8    0.318
##  3     3    10.3    20.1        10.2    0.092
##  4     4    13.6    35.9        15.1   -1.47 
##  5     5    16.5    44.3        17.7   -1.20 
##  6     6    18.6    47.1        18.6    0.043
##  7     7    19.0    42.2        17.1    1.91 
##  8     8    22.1    52.0        20.1    1.99 
##  9     9    22.6    55.0        21.0    1.58 
## 10    10    28.7    75.5        27.4    1.35 
## # ... with 534 more rows
#Caso 18: PMS7003 VS Oficial-NOV

df %>% select(PMS7003, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 544
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
PMS7003 0 1 24.80 15.38 0.70 13.55 22.36 33.59 87.28 ▇▇▃▁▁
Oficial 0 1 11.63 5.26 2.06 7.79 11.01 14.70 33.11 ▅▇▃▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.952
ggplot(df, aes(x = PMS7003, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 PMS7003", y = "PM25 Oficial",
       title = "Relationship between PMS7003 and PaibaNov") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model, digits = 11)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    3.56    0.131        27.1       0    3.30     3.81 
## 2 PMS7003      0.326   0.00450      72.4       0    0.317    0.335
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 544 x 5
##       ID Oficial PMS7003 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1    16.0    33.6        14.5    1.50 
##  2     2    12.1    27.1        12.4   -0.281
##  3     3    10.3    22.1        10.8   -0.444
##  4     4    13.6    37.2        15.7   -2.03 
##  5     5    16.5    46.2        18.6   -2.09 
##  6     6    18.6    47.7        19.1   -0.482
##  7     7    19.0    44.4        18.0    0.95 
##  8     8    22.1    52.9        20.8    1.28 
##  9     9    22.6    54.0        21.1    1.46 
## 10    10    28.7    73.7        27.6    1.14 
## # ... with 534 more rows
# **ESTACION PAIBA DICIEMBRE VS CANAIRIOS**
# **Prueba con los valores de la estacion Paiba solo de Diciembre
# **5 sensores diferentes: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
# **Comparaciones con la estación referencia*


df <- read_excel("C:/Mediciones/PAIBA_CANAIRIOS_DIC.xlsx")
View(df)

glimpse(df)
## Rows: 697
## Columns: 8
## $ Num       <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha     <dttm> 2020-12-01 00:00:00, 2020-12-01 01:00:00, 2020-12-01 02:...
## $ Oficial   <dbl> 8.12, 7.27, 10.32, 9.91, 12.52, 13.31, 14.70, 21.35, 23.8...
## $ PMS7003   <dbl> 7.218182, 8.224138, 11.259259, 14.290909, 15.464286, 13.2...
## $ PMSA003   <dbl> 5.363636, 6.724138, 10.407407, 14.545455, 15.285714, 12.4...
## $ HPMA115S0 <dbl> 5.345455, 5.413793, 7.425926, 9.490909, 9.982143, 8.19642...
## $ SPS30     <dbl> 4.363636, 4.862069, 6.740741, 8.654545, 9.178571, 7.28571...
## $ SNGCJA5   <dbl> 2.600000, 3.068966, 4.666667, 6.236364, 6.642857, 5.30357...
df %>%
  sample_n(size = 10)
## # A tibble: 10 x 8
##      Num Fecha               Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
##    <dbl> <dttm>                <dbl>   <dbl>   <dbl>     <dbl> <dbl>   <dbl>
##  1   348 2020-12-15 11:00:00   29.5     4.29    3.12      3.67  2.59    1.29
##  2   237 2020-12-10 20:00:00   17.8    54.1    58        30.8  30.6    21.2 
##  3   439 2020-12-19 06:00:00   19.0    13.2    12.4       7.29  7       5.22
##  4   555 2020-12-24 02:00:00   10.2    25.6    27.7      14.5  15.1    10.6 
##  5   631 2020-12-27 06:00:00   15.4    17.2    17.3      10.5  10.1     7.38
##  6   576 2020-12-24 23:00:00    9.69   34.7    35        20.2  19      13.4 
##  7   139 2020-12-06 18:00:00    8.57    3.68    2.23      2.77  2.58    1.11
##  8   660 2020-12-28 11:00:00   11.0     3.98    2.54      3     2.80    1.26
##  9   277 2020-12-12 12:00:00   33.6    15.6    12.4       9.91  8.15    5.22
## 10   337 2020-12-15 00:00:00   26.8    25.1    26.6      15.6  14.0    10.3
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 19: SNGCJA5 VS Oficial-DIC

df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 697
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SNGCJA5 0 1 7.94 6.43 0.10 2.96 6.56 10.75 40.80 ▇▃▁▁▁
Oficial 0 1 18.30 9.22 3.47 11.12 16.86 23.87 62.22 ▇▇▃▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.606
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
       title = "Relationship between SNGCJA5 and PaibaDic") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept   11.4       0.442      25.8       0   10.5     12.3  
## 2 SNGCJA5      0.869     0.043      20.1       0    0.784    0.954
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 697 x 5
##       ID Oficial SNGCJA5 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1    8.12    2.6         13.7   -5.54 
##  2     2    7.27    3.07        14.1   -6.80 
##  3     3   10.3     4.67        15.5   -5.13 
##  4     4    9.91    6.24        16.8   -6.91 
##  5     5   12.5     6.64        17.2   -4.65 
##  6     6   13.3     5.30        16.0   -2.70 
##  7     7   14.7     4.72        15.5   -0.805
##  8     8   21.4     8.52        18.8    2.55 
##  9     9   23.9    12.5         22.3    1.59 
## 10    10   22.9    12.4         22.2    0.745
## # ... with 687 more rows
#Caso 20: SPS30 VS Oficial-DIC

df %>% select(SPS30, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 697
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SPS30 0 1 11.29 8.46 1.17 4.85 9.30 15.11 51.67 ▇▃▁▁▁
Oficial 0 1 18.30 9.22 3.47 11.12 16.86 23.87 62.22 ▇▇▃▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.573
ggplot(df, aes(x = SPS30, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SPS30", y = "PM25 Oficial",
       title = "Relationship between SPS30 and PaibaDic") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept   11.2       0.478      23.5       0   10.3     12.2  
## 2 SPS30        0.625     0.034      18.4       0    0.559    0.692
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 697 x 5
##       ID Oficial SPS30 Oficial_hat residual
##    <int>   <dbl> <dbl>       <dbl>    <dbl>
##  1     1    8.12  4.36        14.0   -5.85 
##  2     2    7.27  4.86        14.3   -7.01 
##  3     3   10.3   6.74        15.5   -5.13 
##  4     4    9.91  8.65        16.6   -6.74 
##  5     5   12.5   9.18        17.0   -4.46 
##  6     6   13.3   7.29        15.8   -2.48 
##  7     7   14.7   6.33        15.2   -0.498
##  8     8   21.4  10.8         18.0    3.35 
##  9     9   23.9  16.7         21.7    2.20 
## 10    10   22.9  16.9         21.8    1.11 
## # ... with 687 more rows
#Caso 21: HPMA115S0 VS Oficial-DIC

df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 697
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
HPMA115S0 0 1 11.76 8.49 1.35 5.31 9.60 15.41 51.89 ▇▃▁▁▁
Oficial 0 1 18.30 9.22 3.47 11.12 16.86 23.87 62.22 ▇▇▃▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.616
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
       title = "Relationship between HPMA115S0 and PaibaDic") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept   10.4       0.471      22.2       0    9.50    11.4  
## 2 HPMA115S0    0.669     0.032      20.6       0    0.605    0.733
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 697 x 5
##       ID Oficial HPMA115S0 Oficial_hat residual
##    <int>   <dbl>     <dbl>       <dbl>    <dbl>
##  1     1    8.12      5.34        14.0   -5.88 
##  2     2    7.27      5.41        14.0   -6.78 
##  3     3   10.3       7.43        15.4   -5.08 
##  4     4    9.91      9.49        16.8   -6.87 
##  5     5   12.5       9.98        17.1   -4.59 
##  6     6   13.3       8.20        15.9   -2.60 
##  7     7   14.7       7.31        15.3   -0.621
##  8     8   21.4      11.7         18.3    3.07 
##  9     9   23.9      17           21.8    2.07 
## 10    10   22.9      17.8         22.3    0.576
## # ... with 687 more rows
#Caso 22: PMSA003 VS Oficial-DIC

df %>% select(PMSA003, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 697
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
PMSA003 0 1 20.29 17.42 0.18 6.54 16.23 28.48 108.45 ▇▃▁▁▁
Oficial 0 1 18.30 9.22 3.47 11.12 16.86 23.87 62.22 ▇▇▃▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.607
ggplot(df, aes(x = PMSA003, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 PMSA003", y = "PM25 Oficial",
       title = "Relationship between PMSA003 and PaibaDic") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept   11.8       0.427      27.6       0   10.9     12.6  
## 2 PMSA003      0.322     0.016      20.2       0    0.290    0.353
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 697 x 5
##       ID Oficial PMSA003 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1    8.12    5.36        13.5   -5.37 
##  2     2    7.27    6.72        13.9   -6.66 
##  3     3   10.3    10.4         15.1   -4.80 
##  4     4    9.91   14.5         16.4   -6.54 
##  5     5   12.5    15.3         16.7   -4.16 
##  6     6   13.3    12.5         15.8   -2.47 
##  7     7   14.7    11.5         15.5   -0.758
##  8     8   21.4    22.8         19.1    2.26 
##  9     9   23.9    34.1         22.7    1.12 
## 10    10   22.9    32.8         22.3    0.605
## # ... with 687 more rows
#Caso 23: PMS7003 VS Oficial-DIC

df %>% select(PMS7003, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 697
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
PMS7003 0 1 20.12 15.64 0.96 7.84 16.61 27.35 93.25 ▇▃▂▁▁
Oficial 0 1 18.30 9.22 3.47 11.12 16.86 23.87 62.22 ▇▇▃▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.614
ggplot(df, aes(x = PMS7003, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 PMS7003", y = "PM25 Oficial",
       title = "Relationship between PMS7003 and PaibaDic") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model, digits = 11)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept   11.0      0.450       24.5       0   10.1     11.9  
## 2 PMS7003      0.362    0.0177      20.5       0    0.327    0.397
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 697 x 5
##       ID Oficial PMS7003 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1    8.12    7.22        13.6   -5.50 
##  2     2    7.27    8.22        14.0   -6.72 
##  3     3   10.3    11.3         15.1   -4.77 
##  4     4    9.91   14.3         16.2   -6.28 
##  5     5   12.5    15.5         16.6   -4.09 
##  6     6   13.3    13.3         15.8   -2.51 
##  7     7   14.7    11.9         15.3   -0.634
##  8     8   21.4    22.1         19.0    2.34 
##  9     9   23.9    32.0         22.6    1.27 
## 10    10   22.9    30.2         21.9    0.964
## # ... with 687 more rows