library(readxl)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.6     v purrr   0.3.4
## v tibble  3.1.3     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(moderndive)
library(skimr)

# **ESTACION FERIAS VS SENSORES BAJO COSTO**
# **5 sensores diferentes: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
# **Comparaciones entre sensores de bajo costo y con la estación oficial**

df <- read_excel("C:/Mediciones/FERIAS.xlsx")
View(df)

glimpse(df)
## Rows: 1,115
## Columns: 8
## $ Num       <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 1~
## $ Fecha     <chr> "12-11-2020 24:00", "13-11-2020 01:00", "13-11-2020 02:00", ~
## $ Oficial   <dbl> 3.7, 0.3, 5.9, 8.1, 3.7, 4.0, 4.8, 6.0, 8.3, 9.8, 16.5, 6.2,~
## $ PMS7003   <dbl> 0.00, 0.29, 0.71, 1.40, 3.12, 4.16, 6.13, 7.61, 8.40, 20.20,~
## $ PMSA003   <dbl> 0.0169, 0.0000, 1.6100, 0.4100, 1.7200, 2.6500, 4.0400, 5.35~
## $ HPMA115S0 <dbl> 18.6, 19.1, 19.4, 19.7, 20.4, 20.6, 22.0, 22.8, 23.7, 28.6, ~
## $ SPS30     <dbl> 0.90, 1.02, 1.25, 1.78, 2.60, 3.00, 4.11, 4.94, 5.48, 11.70,~
## $ SNGCJA5   <dbl> 0.00, 0.18, 0.45, 0.90, 1.51, 1.81, 2.86, 3.39, 3.48, 8.23, ~
df %>%
  sample_n(size = 10)
## # A tibble: 10 x 8
##      Num Fecha            Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
##    <dbl> <chr>              <dbl>   <dbl>   <dbl>     <dbl> <dbl>   <dbl>
##  1   277 24-11-2020 14:00    18.9  31.3    30.3        39   17.8   14.5  
##  2    81 16-11-2020 09:00     6.9   2.15    0.94       21.7  2.24   1.06 
##  3   517 03-12-2020 22:00    18    18.7    16.6        31.5 11.1    9.16 
##  4  1118 08-01-2021 19:00     7     0.932   0.508      23    1.51   0.458
##  5    32 14-11-2020 07:00    26.3  31.6    29.9        43   19.5   16.4  
##  6   904 22-12-2020 22:00    26     8.98    7.08       27.5  5.37   4.33 
##  7   859 20-12-2020 22:00     8     3.65    1.96       23.9  2.88   1.92 
##  8   329 26-11-2020 18:00    12    11.7     9.36       27.8  6.96   5.87 
##  9    97 17-11-2020 01:00    10.5   7.92    6.23       25.1  5.67   4.38 
## 10   961 25-12-2020 15:00    17     6.67    5.06       27    5.06   3.43
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 1: SNGCJA5 VS SPS30

df %>% select(SNGCJA5, SPS30) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SNGCJA5 0 1 6.69 9.75 0.00 1.53 4.81 9.76 236.87 ▇▁▁▁▁
SPS30 0 1 7.32 6.66 0.08 1.97 5.52 10.65 51.50 ▇▂▁▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SNGCJA5 ~ SPS30)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.496
ggplot(df, aes(x = SNGCJA5, y = SPS30)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SNGCJA5", y = "PM25 SPS30",
       title = "Relationship between SNGCJA5 and SPS30") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(SPS30 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    5.05      0.21       24.0       0    4.64     5.46 
## 2 SNGCJA5      0.339     0.018      19.1       0    0.304    0.374
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID SPS30 SNGCJA5 SPS30_hat residual
##    <int> <dbl>   <dbl>     <dbl>    <dbl>
##  1     1  0.9     0         5.05   -4.15 
##  2     2  1.02    0.18      5.11   -4.09 
##  3     3  1.25    0.45      5.20   -3.96 
##  4     4  1.78    0.9       5.36   -3.58 
##  5     5  2.6     1.51      5.56   -2.96 
##  6     6  3       1.81      5.66   -2.66 
##  7     7  4.11    2.86      6.02   -1.91 
##  8     8  4.94    3.39      6.20   -1.26 
##  9     9  5.48    3.48      6.23   -0.751
## 10    10 11.7     8.23      7.84    3.86 
## # ... with 1,105 more rows
#Caso 2: SNGCJA5 VS HPMA115S0

df %>% select(SNGCJA5, HPMA115S0) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SNGCJA5 0 1 6.69 9.75 0.0 1.53 4.81 9.76 236.87 ▇▁▁▁▁
HPMA115S0 0 1 28.48 7.27 18.6 23.10 26.40 31.05 77.70 ▇▂▁▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SNGCJA5 ~ HPMA115S0)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.500
ggplot(df, aes(x = SNGCJA5, y = HPMA115S0)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SNGCJA5", y = "PM25 HPMA115S0",
       title = "Relationship between SNGCJA5 and HPMA115S0") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept   26.0       0.229     114.        0   25.5     26.4  
## 2 SNGCJA5      0.373     0.019      19.2       0    0.335    0.411
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID HPMA115S0 SNGCJA5 HPMA115S0_hat residual
##    <int>     <dbl>   <dbl>         <dbl>    <dbl>
##  1     1      18.6    0             26.0   -7.39 
##  2     2      19.1    0.18          26.1   -6.96 
##  3     3      19.4    0.45          26.2   -6.76 
##  4     4      19.7    0.9           26.3   -6.63 
##  5     5      20.4    1.51          26.6   -6.16 
##  6     6      20.6    1.81          26.7   -6.07 
##  7     7      22      2.86          27.1   -5.06 
##  8     8      22.8    3.39          27.3   -4.46 
##  9     9      23.7    3.48          27.3   -3.59 
## 10    10      28.6    8.23          29.1   -0.459
## # ... with 1,105 more rows
#Caso 3: SNGCJA5 VS PMSA003

df %>% select(SNGCJA5, PMSA003) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SNGCJA5 0 1 6.69 9.75 0 1.53 4.81 9.76 236.87 ▇▁▁▁▁
PMSA003 0 1 10.46 11.99 0 0.85 6.48 16.20 89.00 ▇▂▁▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SNGCJA5 ~ PMSA003)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.503
ggplot(df, aes(x = SNGCJA5, y = PMSA003)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SNGCJA5", y = "PM25 PMSA003",
       title = "Relationship between SNGCJA5 and PMSA003") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    6.33      0.376      16.8       0    5.59     7.07 
## 2 SNGCJA5      0.618     0.032      19.4       0    0.556    0.681
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID PMSA003 SNGCJA5 PMSA003_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1   0.017    0           6.33    -6.31
##  2     2   0        0.18        6.44    -6.44
##  3     3   1.61     0.45        6.61    -5.00
##  4     4   0.41     0.9         6.88    -6.47
##  5     5   1.72     1.51        7.26    -5.54
##  6     6   2.65     1.81        7.45    -4.80
##  7     7   4.04     2.86        8.10    -4.06
##  8     8   5.35     3.39        8.42    -3.07
##  9     9   6.25     3.48        8.48    -2.23
## 10    10  18.5      8.23       11.4      7.08
## # ... with 1,105 more rows
#Caso 4: SNGCJA5 VS PMS7003

df %>% select(SNGCJA5, PMS7003) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SNGCJA5 0 1 6.69 9.75 0 1.53 4.81 9.76 236.87 ▇▁▁▁▁
PMS7003 0 1 11.60 11.87 0 1.94 8.34 17.90 91.80 ▇▂▁▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SNGCJA5 ~ PMS7003)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.496
ggplot(df, aes(x = SNGCJA5, y = PMS7003)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SNGCJA5", y = "PM25 PMS7003",
       title = "Relationship between SNGCJA5 and PMS7003") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    7.56      0.374      20.2       0    6.83     8.30 
## 2 SNGCJA5      0.604     0.032      19.1       0    0.542    0.666
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID PMS7003 SNGCJA5 PMS7003_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1    0       0           7.56    -7.56
##  2     2    0.29    0.18        7.67    -7.38
##  3     3    0.71    0.45        7.84    -7.13
##  4     4    1.4     0.9         8.11    -6.71
##  5     5    3.12    1.51        8.48    -5.36
##  6     6    4.16    1.81        8.66    -4.50
##  7     7    6.13    2.86        9.29    -3.16
##  8     8    7.61    3.39        9.61    -2.00
##  9     9    8.4     3.48        9.67    -1.27
## 10    10   20.2     8.23       12.5      7.66
## # ... with 1,105 more rows
#Caso 5: SNGCJA5 VS Oficial

df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SNGCJA5 0 1 6.69 9.75 0 1.53 4.81 9.76 236.87 ▇▁▁▁▁
Oficial 0 1 12.80 8.32 0 6.50 11.20 17.70 55.40 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.275
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
       title = "Relationship between SNGCJA5 and Oficial") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept   11.2       0.291     38.7        0   10.7     11.8  
## 2 SNGCJA5      0.234     0.025      9.53       0    0.186    0.283
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID Oficial SNGCJA5 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1     3.7    0           11.2    -7.53
##  2     2     0.3    0.18        11.3   -11.0 
##  3     3     5.9    0.45        11.3    -5.44
##  4     4     8.1    0.9         11.4    -3.34
##  5     5     3.7    1.51        11.6    -7.89
##  6     6     4      1.81        11.7    -7.66
##  7     7     4.8    2.86        11.9    -7.10
##  8     8     6      3.39        12.0    -6.03
##  9     9     8.3    3.48        12.0    -3.75
## 10    10     9.8    8.23        13.2    -3.36
## # ... with 1,105 more rows
#Caso 6: SPS30 VS HPMA115S0

df %>% select(SPS30, HPMA115S0) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SPS30 0 1 7.32 6.66 0.08 1.97 5.52 10.65 51.5 ▇▂▁▁▁
HPMA115S0 0 1 28.48 7.27 18.60 23.10 26.40 31.05 77.7 ▇▂▁▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SPS30 ~ HPMA115S0)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.983
ggplot(df, aes(x = SPS30, y = HPMA115S0)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SPS30", y = "PM25 HPMA115S0",
       title = "Relationship between SPS30 and HPMA115S0") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    20.6      0.059      348.       0    20.5     20.7 
## 2 SPS30         1.07     0.006      179.       0     1.06     1.08
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID HPMA115S0 SPS30 HPMA115S0_hat residual
##    <int>     <dbl> <dbl>         <dbl>    <dbl>
##  1     1      18.6  0.9           21.6    -3.00
##  2     2      19.1  1.02          21.7    -2.62
##  3     3      19.4  1.25          22.0    -2.57
##  4     4      19.7  1.78          22.5    -2.84
##  5     5      20.4  2.6           23.4    -3.02
##  6     6      20.6  3             23.8    -3.25
##  7     7      22    4.11          25.0    -3.04
##  8     8      22.8  4.94          25.9    -3.13
##  9     9      23.7  5.48          26.5    -2.81
## 10    10      28.6 11.7           33.2    -4.59
## # ... with 1,105 more rows
#Caso 7: SPS30 VS PMSA003

df %>% select(SPS30, PMSA003) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SPS30 0 1 7.32 6.66 0.08 1.97 5.52 10.65 51.5 ▇▂▁▁▁
PMSA003 0 1 10.46 11.99 0.00 0.85 6.48 16.20 89.0 ▇▂▁▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SPS30 ~ PMSA003)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.996
ggplot(df, aes(x = SPS30, y = PMSA003)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SPS30", y = "PM25 PMSA003",
       title = "Relationship between SPS30 and PMSA003") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    -2.65     0.048     -55.2       0    -2.75    -2.56
## 2 SPS30         1.79     0.005     369.        0     1.78     1.80
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID PMSA003 SPS30 PMSA003_hat residual
##    <int>   <dbl> <dbl>       <dbl>    <dbl>
##  1     1   0.017  0.9       -1.04     1.06 
##  2     2   0      1.02      -0.826    0.826
##  3     3   1.61   1.25      -0.414    2.02 
##  4     4   0.41   1.78       0.536   -0.126
##  5     5   1.72   2.6        2.01    -0.286
##  6     6   2.65   3          2.72    -0.073
##  7     7   4.04   4.11       4.71    -0.672
##  8     8   5.35   4.94       6.2     -0.85 
##  9     9   6.25   5.48       7.17    -0.918
## 10    10  18.5   11.7       18.3      0.183
## # ... with 1,105 more rows
#Caso 8: SPS30 VS PMS7003

df %>% select(SPS30, PMS7003) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SPS30 0 1 7.32 6.66 0.08 1.97 5.52 10.65 51.5 ▇▂▁▁▁
PMS7003 0 1 11.60 11.87 0.00 1.94 8.34 17.90 91.8 ▇▂▁▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SPS30 ~ PMS7003)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.996
ggplot(df, aes(x = SPS30, y = PMS7003)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SPS30", y = "PM25 PMS7003",
       title = "Relationship between SPS30 and PMS7003") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    -1.38     0.049     -28.1       0    -1.48    -1.29
## 2 SPS30         1.78     0.005     357.        0     1.76     1.78
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID PMS7003 SPS30 PMS7003_hat residual
##    <int>   <dbl> <dbl>       <dbl>    <dbl>
##  1     1    0     0.9        0.213   -0.213
##  2     2    0.29  1.02       0.426   -0.136
##  3     3    0.71  1.25       0.834   -0.124
##  4     4    1.4   1.78       1.78    -0.375
##  5     5    3.12  2.6        3.23    -0.11 
##  6     6    4.16  3          3.94     0.219
##  7     7    6.13  4.11       5.91     0.219
##  8     8    7.61  4.94       7.38     0.226
##  9     9    8.4   5.48       8.34     0.057
## 10    10   20.2  11.7       19.4      0.817
## # ... with 1,105 more rows
#Caso 9: SPS30 VS Oficial

df %>% select(SPS30, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SPS30 0 1 7.32 6.66 0.08 1.97 5.52 10.65 51.5 ▇▂▁▁▁
Oficial 0 1 12.80 8.32 0.00 6.50 11.20 17.70 55.4 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.565
ggplot(df, aes(x = SPS30, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SPS30", y = "PM25 Oficial",
       title = "Relationship between SPS30 and Oficial") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    7.64      0.306      25.0       0    7.04     8.24 
## 2 SPS30        0.705     0.031      22.8       0    0.645    0.766
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID Oficial SPS30 Oficial_hat residual
##    <int>   <dbl> <dbl>       <dbl>    <dbl>
##  1     1     3.7  0.9         8.27   -4.57 
##  2     2     0.3  1.02        8.36   -8.06 
##  3     3     5.9  1.25        8.52   -2.62 
##  4     4     8.1  1.78        8.89   -0.793
##  5     5     3.7  2.6         9.47   -5.77 
##  6     6     4    3           9.75   -5.75 
##  7     7     4.8  4.11       10.5    -5.74 
##  8     8     6    4.94       11.1    -5.12 
##  9     9     8.3  5.48       11.5    -3.20 
## 10    10     9.8 11.7        15.9    -6.09 
## # ... with 1,105 more rows
#Caso 10: HPMA115S0 VS PMSA003

df %>% select(HPMA115S0, PMSA003) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
HPMA115S0 0 1 28.48 7.27 18.6 23.10 26.40 31.05 77.7 ▇▂▁▁▁
PMSA003 0 1 10.46 11.99 0.0 0.85 6.48 16.20 89.0 ▇▂▁▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = HPMA115S0 ~ PMSA003)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.984
ggplot(df, aes(x = HPMA115S0, y = PMSA003)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 HPMA115S0", y = "PM25 PMSA003",
       title = "Relationship between HPMA115S0 and PMSA003") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept   -35.8      0.256     -140.       0   -36.3    -35.2 
## 2 HPMA115S0     1.62     0.009      186.       0     1.60     1.64
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID PMSA003 HPMA115S0 PMSA003_hat residual
##    <int>   <dbl>     <dbl>       <dbl>    <dbl>
##  1     1   0.017      18.6      -5.58      5.59
##  2     2   0          19.1      -4.76      4.76
##  3     3   1.61       19.4      -4.28      5.89
##  4     4   0.41       19.7      -3.79      4.2 
##  5     5   1.72       20.4      -2.65      4.37
##  6     6   2.65       20.6      -2.33      4.98
##  7     7   4.04       22        -0.058     4.10
##  8     8   5.35       22.8       1.24      4.11
##  9     9   6.25       23.7       2.7       3.55
## 10    10  18.5        28.6      10.6       7.85
## # ... with 1,105 more rows
#Caso 11: HPMA115S0 VS PMS7003

df %>% select(HPMA115S0, PMS7003) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
HPMA115S0 0 1 28.48 7.27 18.6 23.10 26.40 31.05 77.7 ▇▂▁▁▁
PMS7003 0 1 11.60 11.87 0.0 1.94 8.34 17.90 91.8 ▇▂▁▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = HPMA115S0 ~ PMS7003)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.978
ggplot(df, aes(x = HPMA115S0, y = PMS7003)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 HPMA115S0", y = "PM25 PMS7003",
       title = "Relationship between HPMA115S0 and PMS7003") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept   -33.9      0.298     -114.       0   -34.5    -33.3 
## 2 HPMA115S0     1.60     0.01       158.       0     1.58     1.62
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID PMS7003 HPMA115S0 PMS7003_hat residual
##    <int>   <dbl>     <dbl>       <dbl>    <dbl>
##  1     1    0         18.6       -4.18     4.18
##  2     2    0.29      19.1       -3.39     3.68
##  3     3    0.71      19.4       -2.91     3.62
##  4     4    1.4       19.7       -2.43     3.83
##  5     5    3.12      20.4       -1.31     4.43
##  6     6    4.16      20.6       -0.99     5.15
##  7     7    6.13      22          1.25     4.88
##  8     8    7.61      22.8        2.52     5.08
##  9     9    8.4       23.7        3.96     4.44
## 10    10   20.2       28.6       11.8      8.41
## # ... with 1,105 more rows
#Caso 10: HPMA115S0 VS Oficial

df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
HPMA115S0 0 1 28.48 7.27 18.6 23.1 26.4 31.05 77.7 ▇▂▁▁▁
Oficial 0 1 12.80 8.32 0.0 6.5 11.2 17.70 55.4 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.557
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
       title = "Relationship between HPMA115S0 and Oficial") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept   -5.35      0.837     -6.40       0   -7.00    -3.71 
## 2 HPMA115S0    0.637     0.028     22.4        0    0.581    0.693
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID Oficial HPMA115S0 Oficial_hat residual
##    <int>   <dbl>     <dbl>       <dbl>    <dbl>
##  1     1     3.7      18.6        6.5    -2.8  
##  2     2     0.3      19.1        6.82   -6.52 
##  3     3     5.9      19.4        7.01   -1.11 
##  4     4     8.1      19.7        7.20    0.899
##  5     5     3.7      20.4        7.65   -3.95 
##  6     6     4        20.6        7.78   -3.78 
##  7     7     4.8      22          8.67   -3.87 
##  8     8     6        22.8        9.18   -3.18 
##  9     9     8.3      23.7        9.75   -1.45 
## 10    10     9.8      28.6       12.9    -3.07 
## # ... with 1,105 more rows
#Caso 11: PMSA003 VS PMS7003

df %>% select(PMSA003, PMS7003) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
PMSA003 0 1 10.46 11.99 0 0.85 6.48 16.2 89.0 ▇▂▁▁▁
PMS7003 0 1 11.60 11.87 0 1.94 8.34 17.9 91.8 ▇▂▁▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = PMSA003 ~ PMS7003)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.996
ggplot(df, aes(x = PMSA003, y = PMS7003)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 PMSA003", y = "PM25 PMS7003",
       title = "Relationship between PMSA003 and PMS7003") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    1.28      0.042      30.5       0    1.2      1.36 
## 2 PMSA003      0.987     0.003     374.        0    0.982    0.992
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID PMS7003 PMSA003 PMS7003_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1    0      0.017        1.30   -1.30 
##  2     2    0.29   0            1.28   -0.992
##  3     3    0.71   1.61         2.87   -2.16 
##  4     4    1.4    0.41         1.69   -0.287
##  5     5    3.12   1.72         2.98    0.141
##  6     6    4.16   2.65         3.90    0.263
##  7     7    6.13   4.04         5.27    0.862
##  8     8    7.61   5.35         6.56    1.05 
##  9     9    8.4    6.25         7.45    0.951
## 10    10   20.2   18.5         19.5     0.664
## # ... with 1,105 more rows
#Caso 12: PMSA003 VS Oficial

df %>% select(PMSA003, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
PMSA003 0 1 10.46 11.99 0 0.85 6.48 16.2 89.0 ▇▂▁▁▁
Oficial 0 1 12.80 8.32 0 6.50 11.20 17.7 55.4 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.560
ggplot(df, aes(x = PMSA003, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 PMSA003", y = "PM25 Oficial",
       title = "Relationship between PMSA003 and Oficial") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    8.73      0.274      31.9       0    8.20     9.27 
## 2 PMSA003      0.389     0.017      22.5       0    0.355    0.422
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID Oficial PMSA003 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1     3.7   0.017        8.74   -5.04 
##  2     2     0.3   0            8.73   -8.43 
##  3     3     5.9   1.61         9.36   -3.46 
##  4     4     8.1   0.41         8.89   -0.793
##  5     5     3.7   1.72         9.40   -5.70 
##  6     6     4     2.65         9.76   -5.76 
##  7     7     4.8   4.04        10.3    -5.50 
##  8     8     6     5.35        10.8    -4.81 
##  9     9     8.3   6.25        11.2    -2.86 
## 10    10     9.8  18.5         15.9    -6.12 
## # ... with 1,105 more rows
#Caso 13: PMS7003 VS Oficial

df %>% select(PMS7003, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
PMS7003 0 1 11.6 11.87 0 1.94 8.34 17.9 91.8 ▇▂▁▁▁
Oficial 0 1 12.8 8.32 0 6.50 11.20 17.7 55.4 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.563
ggplot(df, aes(x = PMS7003, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 PMS7003", y = "PM25 Oficial",
       title = "Relationship between PMS7003 and Oficial") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model, digits = 11)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    8.22     0.288       28.5       0    7.66     8.79 
## 2 PMS7003      0.394    0.0174      22.7       0    0.360    0.428
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID Oficial PMS7003 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1     3.7    0           8.22   -4.52 
##  2     2     0.3    0.29        8.34   -8.04 
##  3     3     5.9    0.71        8.50   -2.60 
##  4     4     8.1    1.4         8.78   -0.675
##  5     5     3.7    3.12        9.45   -5.75 
##  6     6     4      4.16        9.86   -5.86 
##  7     7     4.8    6.13       10.6    -5.84 
##  8     8     6      7.61       11.2    -5.22 
##  9     9     8.3    8.4        11.5    -3.24 
## 10    10     9.8   20.2        16.2    -6.39 
## # ... with 1,105 more rows
# **ESTACION FERIAS -1hora VS SENSORES BAJO COSTO**
# **Prueba con los valores de la estacion Ferias retrasada 1 hora
# **5 sensores diferentes: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
# **Comparaciones con la estación oficial*


df <- read_excel("C:/Mediciones/FERIAS_1h.xlsx")
View(df)

glimpse(df)
## Rows: 1,114
## Columns: 8
## $ Num       <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 1~
## $ Fecha     <chr> "12-11-2020 24:00", "13-11-2020 01:00", "13-11-2020 02:00", ~
## $ Oficial   <dbl> 0.3, 5.9, 8.1, 3.7, 4.0, 4.8, 6.0, 8.3, 9.8, 16.5, 6.2, 10.2~
## $ PMS7003   <dbl> 0.00, 0.29, 0.71, 1.40, 3.12, 4.16, 6.13, 7.61, 8.40, 20.20,~
## $ PMSA003   <dbl> 0.0169, 0.0000, 1.6100, 0.4100, 1.7200, 2.6500, 4.0400, 5.35~
## $ HPMA115S0 <dbl> 18.6, 19.1, 19.4, 19.7, 20.4, 20.6, 22.0, 22.8, 23.7, 28.6, ~
## $ SPS30     <dbl> 0.90, 1.02, 1.25, 1.78, 2.60, 3.00, 4.11, 4.94, 5.48, 11.70,~
## $ SNGCJA5   <dbl> 0.00, 0.18, 0.45, 0.90, 1.51, 1.81, 2.86, 3.39, 3.48, 8.23, ~
df %>%
  sample_n(size = 10)
## # A tibble: 10 x 8
##      Num Fecha            Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
##    <dbl> <chr>              <dbl>   <dbl>   <dbl>     <dbl> <dbl>   <dbl>
##  1   122 18-11-2020 03:00     8.6    7.83    6.21      24.8  6.09    4.07
##  2   383 28-11-2020 23:00    13.2    8.57    6.41      25.2  5.66    4.09
##  3   999 27-12-2020 05:00    12      6.56    5.63      26.5  5.14    3.42
##  4    88 16-11-2020 16:00    19.7   27.9    28         37.9 17.7    14.1 
##  5   293 25-11-2020 06:00     8.4    5.21    2.91      22.8  3.27    2.23
##  6  1104 08-01-2021 05:00    15     11.8    10.8       28.5  7.5     5   
##  7   662 12-12-2020 04:00     7.7    9.91    7.91      27.4  6.27    4.98
##  8   140 18-11-2020 21:00    15.4   17.2    14.4       29.6  9.61    8.23
##  9  1152 10-01-2021 02:00     7      2.98    1.73      23.5  2.77  237.  
## 10   225 22-11-2020 10:00    19.5   14.5    13.5       30.4 10.2     6.77
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 14: SNGCJA5 VS Oficial-1h

df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1114
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SNGCJA5 0 1 6.68 9.75 0 1.53 4.8 9.75 236.87 ▇▁▁▁▁
Oficial 0 1 12.81 8.32 0 6.50 11.2 17.70 55.40 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.347
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
       title = "Relationship between SNGCJA5 and Oficial-1hour") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept   10.8       0.283      38.2       0   10.3     11.4  
## 2 SNGCJA5      0.296     0.024      12.4       0    0.249    0.343
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
##       ID Oficial SNGCJA5 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1     0.3    0           10.8   -10.5 
##  2     2     5.9    0.18        10.9    -4.98
##  3     3     8.1    0.45        11.0    -2.86
##  4     4     3.7    0.9         11.1    -7.39
##  5     5     4      1.51        11.3    -7.28
##  6     6     4.8    1.81        11.4    -6.56
##  7     7     6      2.86        11.7    -5.68
##  8     8     8.3    3.39        11.8    -3.53
##  9     9     9.8    3.48        11.9    -2.06
## 10    10    16.5    8.23        13.3     3.23
## # ... with 1,104 more rows
#Caso 15: SPS30 VS Oficial-1

df %>% select(SPS30, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1114
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SPS30 0 1 7.32 6.66 0.08 1.97 5.53 10.67 51.5 ▇▂▁▁▁
Oficial 0 1 12.81 8.32 0.00 6.50 11.20 17.70 55.4 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.712
ggplot(df, aes(x = SPS30, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SPS30", y = "PM25 Oficial",
       title = "Relationship between SPS30 and Oficial-1hour") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    6.3       0.26       24.2       0    5.79      6.81
## 2 SPS30        0.889     0.026      33.8       0    0.837     0.94
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
##       ID Oficial SPS30 Oficial_hat residual
##    <int>   <dbl> <dbl>       <dbl>    <dbl>
##  1     1     0.3  0.9         7.1    -6.8  
##  2     2     5.9  1.02        7.21   -1.31 
##  3     3     8.1  1.25        7.41    0.689
##  4     4     3.7  1.78        7.88   -4.18 
##  5     5     4    2.6         8.61   -4.61 
##  6     6     4.8  3           8.97   -4.17 
##  7     7     6    4.11        9.95   -3.95 
##  8     8     8.3  4.94       10.7    -2.39 
##  9     9     9.8  5.48       11.2    -1.37 
## 10    10    16.5 11.7        16.7    -0.196
## # ... with 1,104 more rows
#Caso 16: HPMA115S0 VS Oficial-1h

df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1114
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
HPMA115S0 0 1 28.49 7.27 18.6 23.1 26.4 31.08 77.7 ▇▂▁▁▁
Oficial 0 1 12.81 8.32 0.0 6.5 11.2 17.70 55.4 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.711
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
       title = "Relationship between HPMA115S0 and Oficial-1hour") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept  -10.4       0.709     -14.6       0  -11.8     -8.97 
## 2 HPMA115S0    0.813     0.024      33.7       0    0.766    0.861
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
##       ID Oficial HPMA115S0 Oficial_hat residual
##    <int>   <dbl>     <dbl>       <dbl>    <dbl>
##  1     1     0.3      18.6        4.76   -4.46 
##  2     2     5.9      19.1        5.17    0.729
##  3     3     8.1      19.4        5.42    2.68 
##  4     4     3.7      19.7        5.66   -1.96 
##  5     5     4        20.4        6.23   -2.23 
##  6     6     4.8      20.6        6.39   -1.59 
##  7     7     6        22          7.53   -1.53 
##  8     8     8.3      22.8        8.18    0.12 
##  9     9     9.8      23.7        8.91    0.888
## 10    10    16.5      28.6       12.9     3.60 
## # ... with 1,104 more rows
#Caso 17: PMSA003 VS Oficial-1h

df %>% select(PMSA003, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1114
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
PMSA003 0 1 10.47 11.99 0 0.85 6.49 16.2 89.0 ▇▂▁▁▁
Oficial 0 1 12.81 8.32 0 6.50 11.20 17.7 55.4 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.711
ggplot(df, aes(x = PMSA003, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 PMSA003", y = "PM25 Oficial",
       title = "Relationship between PMSA003 and Oficial-1hour") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    7.64      0.233      32.8       0    7.19     8.1  
## 2 PMSA003      0.493     0.015      33.7       0    0.464    0.522
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
##       ID Oficial PMSA003 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1     0.3   0.017        7.65   -7.35 
##  2     2     5.9   0            7.64   -1.74 
##  3     3     8.1   1.61         8.44   -0.337
##  4     4     3.7   0.41         7.85   -4.15 
##  5     5     4     1.72         8.49   -4.49 
##  6     6     4.8   2.65         8.95   -4.15 
##  7     7     6     4.04         9.64   -3.64 
##  8     8     8.3   5.35        10.3    -1.98 
##  9     9     9.8   6.25        10.7    -0.925
## 10    10    16.5  18.5         16.8    -0.266
## # ... with 1,104 more rows
#Caso 18: PMS7003 VS Oficial-1h

df %>% select(PMS7003, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1114
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
PMS7003 0 1 11.61 11.87 0 1.95 8.37 17.9 91.8 ▇▂▁▁▁
Oficial 0 1 12.81 8.32 0 6.50 11.20 17.7 55.4 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.713
ggplot(df, aes(x = PMS7003, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 PMS7003", y = "PM25 Oficial",
       title = "Relationship between PMS7003 and Oficial-1hour") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model, digits = 11)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    7.00     0.244       28.7       0    6.52     7.48 
## 2 PMS7003      0.500    0.0147      34.0       0    0.471    0.529
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
##       ID Oficial PMS7003 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1     0.3    0           7.00   -6.70 
##  2     2     5.9    0.29        7.15   -1.25 
##  3     3     8.1    0.71        7.36    0.742
##  4     4     3.7    1.4         7.70   -4.00 
##  5     5     4      3.12        8.56   -4.56 
##  6     6     4.8    4.16        9.08   -4.28 
##  7     7     6      6.13       10.1    -4.07 
##  8     8     8.3    7.61       10.8    -2.51 
##  9     9     9.8    8.4        11.2    -1.40 
## 10    10    16.5   20.2        17.1    -0.598
## # ... with 1,104 more rows
# **ESTACION FERIAS -2horas VS SENSORES BAJO COSTO**
# **Prueba con los valores de la estacion Ferias retrasada 2 horas
# **5 sensores diferentes: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
# **Comparaciones con la estación oficial*


df <- read_excel("C:/Mediciones/FERIAS_2h.xlsx")
View(df)

glimpse(df)
## Rows: 1,113
## Columns: 8
## $ Num       <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 1~
## $ Fecha     <chr> "12-11-2020 24:00", "13-11-2020 01:00", "13-11-2020 02:00", ~
## $ Oficial   <dbl> 5.9, 8.1, 3.7, 4.0, 4.8, 6.0, 8.3, 9.8, 16.5, 6.2, 10.2, 16.~
## $ PMS7003   <dbl> 0.00, 0.29, 0.71, 1.40, 3.12, 4.16, 6.13, 7.61, 8.40, 20.20,~
## $ PMSA003   <dbl> 0.0169, 0.0000, 1.6100, 0.4100, 1.7200, 2.6500, 4.0400, 5.35~
## $ HPMA115S0 <dbl> 18.6, 19.1, 19.4, 19.7, 20.4, 20.6, 22.0, 22.8, 23.7, 28.6, ~
## $ SPS30     <dbl> 0.90, 1.02, 1.25, 1.78, 2.60, 3.00, 4.11, 4.94, 5.48, 11.70,~
## $ SNGCJA5   <dbl> 0.00, 0.18, 0.45, 0.90, 1.51, 1.81, 2.86, 3.39, 3.48, 8.23, ~
df %>%
  sample_n(size = 10)
## # A tibble: 10 x 8
##      Num Fecha            Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
##    <dbl> <chr>              <dbl>   <dbl>   <dbl>     <dbl> <dbl>   <dbl>
##  1  1031 28-12-2020 18:00    16     13    11           29   8        6   
##  2   287 24-11-2020 24:00     9.9    6.69  4.71        23.9 4.38     3.29
##  3  1177 11-01-2021 03:00     8      0     0           21.9 0.407   15.6 
##  4   493 03-12-2020 10:00     3.2    0.44  0.0962      22.2 1.13     0.15
##  5   452 01-12-2020 20:00    12.5    5.34  3.23        23.8 3.55     2.5 
##  6   885 21-12-2020 24:00     8      3.71  2.55        24.6 3.13     2.16
##  7   911 23-12-2020 05:00    24     11.3   9.18        27.6 6.18     4.84
##  8   192 21-11-2020 01:00     3.6    1.37  0.44        22   1.86     1.09
##  9   983 26-12-2020 13:00    13      2.5   1.5         24.8 2.79     1.57
## 10   976 26-12-2020 06:00    10     10.3   9.92        28.3 7.54     5.54
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 19: SNGCJA5 VS Oficial-2h

df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1113
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SNGCJA5 0 1 6.67 9.75 0 1.53 4.8 9.75 236.87 ▇▁▁▁▁
Oficial 0 1 12.82 8.31 0 6.50 11.2 17.70 55.40 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.405
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
       title = "Relationship between SNGCJA5 and Oficial-2hour") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept   10.5       0.276      38.1       0    9.97    11.1  
## 2 SNGCJA5      0.345     0.023      14.8       0    0.299    0.391
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
##       ID Oficial SNGCJA5 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1     5.9    0           10.5    -4.62
##  2     2     8.1    0.18        10.6    -2.48
##  3     3     3.7    0.45        10.7    -6.97
##  4     4     4      0.9         10.8    -6.83
##  5     5     4.8    1.51        11.0    -6.24
##  6     6     6      1.81        11.1    -5.14
##  7     7     8.3    2.86        11.5    -3.20
##  8     8     9.8    3.39        11.7    -1.89
##  9     9    16.5    3.48        11.7     4.78
## 10    10     6.2    8.23        13.4    -7.16
## # ... with 1,103 more rows
#Caso 20: SPS30 VS Oficial-2

df %>% select(SPS30, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1113
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SPS30 0 1 7.33 6.66 0.08 1.97 5.53 10.7 51.5 ▇▂▁▁▁
Oficial 0 1 12.82 8.31 0.00 6.50 11.20 17.7 55.4 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.807
ggplot(df, aes(x = SPS30, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SPS30", y = "PM25 Oficial",
       title = "Relationship between SPS30 and Oficial-2hour") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept     5.44     0.219      24.8       0    5.01      5.87
## 2 SPS30         1.01     0.022      45.5       0    0.964     1.05
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
##       ID Oficial SPS30 Oficial_hat residual
##    <int>   <dbl> <dbl>       <dbl>    <dbl>
##  1     1     5.9  0.9         6.34   -0.445
##  2     2     8.1  1.02        6.47    1.63 
##  3     3     3.7  1.25        6.70   -3.00 
##  4     4     4    1.78        7.23   -3.23 
##  5     5     4.8  2.6         8.06   -3.26 
##  6     6     6    3           8.46   -2.46 
##  7     7     8.3  4.11        9.58   -1.28 
##  8     8     9.8  4.94       10.4    -0.613
##  9     9    16.5  5.48       11.0     5.54 
## 10    10     6.2 11.7        17.2   -11.0  
## # ... with 1,103 more rows
#Caso 21: HPMA115S0 VS Oficial-2h

df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1113
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
HPMA115S0 0 1 28.49 7.27 18.6 23.1 26.4 31.1 77.7 ▇▂▁▁▁
Oficial 0 1 12.82 8.31 0.0 6.5 11.2 17.7 55.4 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.809
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
       title = "Relationship between HPMA115S0 and Oficial-2hour") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept  -13.5       0.593     -22.8       0  -14.7    -12.3  
## 2 HPMA115S0    0.924     0.02       45.8       0    0.884    0.963
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
##       ID Oficial HPMA115S0 Oficial_hat residual
##    <int>   <dbl>     <dbl>       <dbl>    <dbl>
##  1     1     5.9      18.6        3.68    2.22 
##  2     2     8.1      19.1        4.14    3.96 
##  3     3     3.7      19.4        4.42   -0.716
##  4     4     4        19.7        4.69   -0.693
##  5     5     4.8      20.4        5.34   -0.54 
##  6     6     6        20.6        5.52    0.475
##  7     7     8.3      22          6.82    1.48 
##  8     8     9.8      22.8        7.56    2.24 
##  9     9    16.5      23.7        8.39    8.11 
## 10    10     6.2      28.6       12.9    -6.72 
## # ... with 1,103 more rows
#Caso 22: PMSA003 VS Oficial-2h

df %>% select(PMSA003, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1113
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
PMSA003 0 1 10.48 11.99 0 0.85 6.5 16.2 89.0 ▇▂▁▁▁
Oficial 0 1 12.82 8.31 0 6.50 11.2 17.7 55.4 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.806
ggplot(df, aes(x = PMSA003, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 PMSA003", y = "PM25 Oficial",
       title = "Relationship between PMSA003 and Oficial-2hour") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    6.96      0.196      35.5       0    6.58     7.35 
## 2 PMSA003      0.559     0.012      45.3       0    0.534    0.583
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
##       ID Oficial PMSA003 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1     5.9   0.017        6.97   -1.07 
##  2     2     8.1   0            6.96    1.14 
##  3     3     3.7   1.61         7.86   -4.16 
##  4     4     4     0.41         7.19   -3.19 
##  5     5     4.8   1.72         7.92   -3.12 
##  6     6     6     2.65         8.44   -2.44 
##  7     7     8.3   4.04         9.22   -0.921
##  8     8     9.8   5.35         9.95   -0.152
##  9     9    16.5   6.25        10.5     6.04 
## 10    10     6.2  18.5         17.3   -11.1  
## # ... with 1,103 more rows
#Caso 23: PMS7003 VS Oficial-2h

df %>% select(PMS7003, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1113
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
PMS7003 0 1 11.62 11.87 0 1.97 8.4 17.9 91.8 ▇▂▁▁▁
Oficial 0 1 12.82 8.31 0 6.50 11.2 17.7 55.4 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.811
ggplot(df, aes(x = PMS7003, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 PMS7003", y = "PM25 Oficial",
       title = "Relationship between PMS7003 and Oficial-2hour") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model, digits = 11)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    6.22     0.204       30.5       0    5.82     6.62 
## 2 PMS7003      0.567    0.0123      46.1       0    0.543    0.592
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
##       ID Oficial PMS7003 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1     5.9    0           6.22   -0.321
##  2     2     8.1    0.29        6.39    1.71 
##  3     3     3.7    0.71        6.62   -2.92 
##  4     4     4      1.4         7.02   -3.02 
##  5     5     4.8    3.12        7.99   -3.19 
##  6     6     6      4.16        8.58   -2.58 
##  7     7     8.3    6.13        9.7    -1.4  
##  8     8     9.8    7.61       10.5    -0.74 
##  9     9    16.5    8.4        11.0     5.51 
## 10    10     6.2   20.2        17.7   -11.5  
## # ... with 1,103 more rows