library(readxl)
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.2
## v tidyr   1.1.1     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(moderndive)
library(skimr)

# **ESTACION FERIAS VS CANAIRIOS**
# **5 sensores diferentes: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
# **Comparaciones entre sensores de bajo costo y con la estación oficial*

df <- read_excel("C:/Mediciones/FERIAS_CANAIRIOS.xlsx")
View(df)

glimpse(df)
## Rows: 1,115
## Columns: 8
## $ Num       <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha     <chr> "12-11-2020 24:00", "13-11-2020 01:00", "13-11-2020 02:00...
## $ Oficial   <dbl> 3.7, 0.3, 5.9, 8.1, 3.7, 4.0, 4.8, 6.0, 8.3, 9.8, 16.5, 6...
## $ PMS7003   <dbl> 0.00, 0.29, 0.71, 1.40, 3.12, 4.16, 6.13, 7.61, 8.40, 20....
## $ PMSA003   <dbl> 0.0169, 0.0000, 1.6100, 0.4100, 1.7200, 2.6500, 4.0400, 5...
## $ HPMA115S0 <dbl> 18.6, 19.1, 19.4, 19.7, 20.4, 20.6, 22.0, 22.8, 23.7, 28....
## $ SPS30     <dbl> 0.90, 1.02, 1.25, 1.78, 2.60, 3.00, 4.11, 4.94, 5.48, 11....
## $ SNGCJA5   <dbl> 0.00, 0.18, 0.45, 0.90, 1.51, 1.81, 2.86, 3.39, 3.48, 8.2...
df %>%
  sample_n(size = 10)
## # A tibble: 10 x 8
##      Num Fecha            Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
##    <dbl> <chr>              <dbl>   <dbl>   <dbl>     <dbl> <dbl>   <dbl>
##  1   641 11-12-2020 10:00    12    7.65    6.07        27.9 5.98   3.61  
##  2   455 01-12-2020 23:00     7.7  0.43    0.0172      21.2 1.03   0.19  
##  3   614 09-12-2020 14:00     4.5  9.03    7.42        29   5.97   4.26  
##  4    67 15-11-2020 18:00    15.1  5.42    3.22        22.8 3.95   2.8   
##  5   585 06-12-2020 15:00     2.9  0.290   0           22.1 1      0.24  
##  6   379 28-11-2020 19:00    17.1  8.73    6.16        25.7 5.65   4.27  
##  7   596 07-12-2020 03:00     4.8  6.04    4.2         25   4.24   3.09  
##  8   754 16-12-2020 03:00    13    3.78    1.83        23.5 2.74   1.79  
##  9  1134 09-01-2021 11:00     8    0.0877  0.0175      22.4 0.702  0.0175
## 10   904 22-12-2020 22:00    26    8.98    7.08        27.5 5.37   4.33
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 1: SNGCJA5 VS SPS30

df %>% select(SNGCJA5, SPS30) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SNGCJA5 0 1 6.69 9.75 0.00 1.53 4.81 9.76 236.87 ▇▁▁▁▁
SPS30 0 1 7.32 6.66 0.08 1.97 5.52 10.65 51.50 ▇▂▁▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SNGCJA5 ~ SPS30)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.496
ggplot(df, aes(x = SNGCJA5, y = SPS30)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SNGCJA5", y = "PM25 SPS30",
       title = "Relationship between SNGCJA5 and SPS30") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(SPS30 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    5.05      0.21       24.0       0    4.64     5.46 
## 2 SNGCJA5      0.339     0.018      19.1       0    0.304    0.374
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID SPS30 SNGCJA5 SPS30_hat residual
##    <int> <dbl>   <dbl>     <dbl>    <dbl>
##  1     1  0.9     0         5.05   -4.15 
##  2     2  1.02    0.18      5.11   -4.09 
##  3     3  1.25    0.45      5.20   -3.96 
##  4     4  1.78    0.9       5.36   -3.58 
##  5     5  2.6     1.51      5.56   -2.96 
##  6     6  3       1.81      5.66   -2.66 
##  7     7  4.11    2.86      6.02   -1.91 
##  8     8  4.94    3.39      6.20   -1.26 
##  9     9  5.48    3.48      6.23   -0.751
## 10    10 11.7     8.23      7.84    3.86 
## # ... with 1,105 more rows
#Caso 2: SNGCJA5 VS HPMA115S0

df %>% select(SNGCJA5, HPMA115S0) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SNGCJA5 0 1 6.69 9.75 0.0 1.53 4.81 9.76 236.87 ▇▁▁▁▁
HPMA115S0 0 1 28.48 7.27 18.6 23.10 26.40 31.05 77.70 ▇▂▁▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SNGCJA5 ~ HPMA115S0)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.500
ggplot(df, aes(x = SNGCJA5, y = HPMA115S0)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SNGCJA5", y = "PM25 HPMA115S0",
       title = "Relationship between SNGCJA5 and HPMA115S0") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept   26.0       0.229     114.        0   25.5     26.4  
## 2 SNGCJA5      0.373     0.019      19.2       0    0.335    0.411
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID HPMA115S0 SNGCJA5 HPMA115S0_hat residual
##    <int>     <dbl>   <dbl>         <dbl>    <dbl>
##  1     1      18.6    0             26.0   -7.39 
##  2     2      19.1    0.18          26.1   -6.96 
##  3     3      19.4    0.45          26.2   -6.76 
##  4     4      19.7    0.9           26.3   -6.63 
##  5     5      20.4    1.51          26.6   -6.16 
##  6     6      20.6    1.81          26.7   -6.07 
##  7     7      22      2.86          27.1   -5.06 
##  8     8      22.8    3.39          27.3   -4.46 
##  9     9      23.7    3.48          27.3   -3.59 
## 10    10      28.6    8.23          29.1   -0.459
## # ... with 1,105 more rows
#Caso 3: SNGCJA5 VS PMSA003

df %>% select(SNGCJA5, PMSA003) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SNGCJA5 0 1 6.69 9.75 0 1.53 4.81 9.76 236.87 ▇▁▁▁▁
PMSA003 0 1 10.46 11.99 0 0.85 6.48 16.20 89.00 ▇▂▁▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SNGCJA5 ~ PMSA003)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.503
ggplot(df, aes(x = SNGCJA5, y = PMSA003)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SNGCJA5", y = "PM25 PMSA003",
       title = "Relationship between SNGCJA5 and PMSA003") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    6.33      0.376      16.8       0    5.59     7.07 
## 2 SNGCJA5      0.618     0.032      19.4       0    0.556    0.681
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID PMSA003 SNGCJA5 PMSA003_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1   0.017    0           6.33    -6.31
##  2     2   0        0.18        6.44    -6.44
##  3     3   1.61     0.45        6.61    -5.00
##  4     4   0.41     0.9         6.88    -6.47
##  5     5   1.72     1.51        7.26    -5.54
##  6     6   2.65     1.81        7.45    -4.80
##  7     7   4.04     2.86        8.10    -4.06
##  8     8   5.35     3.39        8.42    -3.07
##  9     9   6.25     3.48        8.48    -2.23
## 10    10  18.5      8.23       11.4      7.08
## # ... with 1,105 more rows
#Caso 4: SNGCJA5 VS PMS7003

df %>% select(SNGCJA5, PMS7003) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SNGCJA5 0 1 6.69 9.75 0 1.53 4.81 9.76 236.87 ▇▁▁▁▁
PMS7003 0 1 11.60 11.87 0 1.94 8.34 17.90 91.80 ▇▂▁▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SNGCJA5 ~ PMS7003)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.496
ggplot(df, aes(x = SNGCJA5, y = PMS7003)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SNGCJA5", y = "PM25 PMS7003",
       title = "Relationship between SNGCJA5 and PMS7003") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    7.56      0.374      20.2       0    6.83     8.30 
## 2 SNGCJA5      0.604     0.032      19.1       0    0.542    0.666
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID PMS7003 SNGCJA5 PMS7003_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1   0        0           7.56    -7.56
##  2     2   0.290    0.18        7.67    -7.38
##  3     3   0.71     0.45        7.84    -7.13
##  4     4   1.4      0.9         8.11    -6.71
##  5     5   3.12     1.51        8.48    -5.36
##  6     6   4.16     1.81        8.66    -4.50
##  7     7   6.13     2.86        9.29    -3.16
##  8     8   7.61     3.39        9.61    -2.00
##  9     9   8.4      3.48        9.67    -1.27
## 10    10  20.2      8.23       12.5      7.66
## # ... with 1,105 more rows
#Caso 5: SNGCJA5 VS Oficial

df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SNGCJA5 0 1 6.69 9.75 0 1.53 4.81 9.76 236.87 ▇▁▁▁▁
Oficial 0 1 12.80 8.32 0 6.50 11.20 17.70 55.40 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.275
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
       title = "Relationship between SNGCJA5 and Oficial") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept   11.2       0.291     38.7        0   10.7     11.8  
## 2 SNGCJA5      0.234     0.025      9.53       0    0.186    0.283
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID Oficial SNGCJA5 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1     3.7    0           11.2    -7.53
##  2     2     0.3    0.18        11.3   -11.0 
##  3     3     5.9    0.45        11.3    -5.44
##  4     4     8.1    0.9         11.4    -3.34
##  5     5     3.7    1.51        11.6    -7.89
##  6     6     4      1.81        11.7    -7.66
##  7     7     4.8    2.86        11.9    -7.10
##  8     8     6      3.39        12.0    -6.03
##  9     9     8.3    3.48        12.0    -3.75
## 10    10     9.8    8.23        13.2    -3.36
## # ... with 1,105 more rows
#Caso 6: SPS30 VS HPMA115S0

df %>% select(SPS30, HPMA115S0) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SPS30 0 1 7.32 6.66 0.08 1.97 5.52 10.65 51.5 ▇▂▁▁▁
HPMA115S0 0 1 28.48 7.27 18.60 23.10 26.40 31.05 77.7 ▇▂▁▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SPS30 ~ HPMA115S0)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.983
ggplot(df, aes(x = SPS30, y = HPMA115S0)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SPS30", y = "PM25 HPMA115S0",
       title = "Relationship between SPS30 and HPMA115S0") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    20.6      0.059      348.       0    20.5     20.7 
## 2 SPS30         1.07     0.006      179.       0     1.06     1.08
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID HPMA115S0 SPS30 HPMA115S0_hat residual
##    <int>     <dbl> <dbl>         <dbl>    <dbl>
##  1     1      18.6  0.9           21.6    -3.00
##  2     2      19.1  1.02          21.7    -2.62
##  3     3      19.4  1.25          22.0    -2.57
##  4     4      19.7  1.78          22.5    -2.84
##  5     5      20.4  2.6           23.4    -3.02
##  6     6      20.6  3             23.8    -3.25
##  7     7      22    4.11          25.0    -3.04
##  8     8      22.8  4.94          25.9    -3.13
##  9     9      23.7  5.48          26.5    -2.81
## 10    10      28.6 11.7           33.2    -4.59
## # ... with 1,105 more rows
#Caso 7: SPS30 VS PMSA003

df %>% select(SPS30, PMSA003) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SPS30 0 1 7.32 6.66 0.08 1.97 5.52 10.65 51.5 ▇▂▁▁▁
PMSA003 0 1 10.46 11.99 0.00 0.85 6.48 16.20 89.0 ▇▂▁▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SPS30 ~ PMSA003)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.996
ggplot(df, aes(x = SPS30, y = PMSA003)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SPS30", y = "PM25 PMSA003",
       title = "Relationship between SPS30 and PMSA003") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    -2.65     0.048     -55.2       0    -2.75    -2.56
## 2 SPS30         1.79     0.005     369.        0     1.78     1.80
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID PMSA003 SPS30 PMSA003_hat residual
##    <int>   <dbl> <dbl>       <dbl>    <dbl>
##  1     1   0.017  0.9       -1.04     1.06 
##  2     2   0      1.02      -0.826    0.826
##  3     3   1.61   1.25      -0.414    2.02 
##  4     4   0.41   1.78       0.536   -0.126
##  5     5   1.72   2.6        2.01    -0.286
##  6     6   2.65   3          2.72    -0.073
##  7     7   4.04   4.11       4.71    -0.672
##  8     8   5.35   4.94       6.2     -0.85 
##  9     9   6.25   5.48       7.17    -0.918
## 10    10  18.5   11.7       18.3      0.183
## # ... with 1,105 more rows
#Caso 8: SPS30 VS PMS7003

df %>% select(SPS30, PMS7003) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SPS30 0 1 7.32 6.66 0.08 1.97 5.52 10.65 51.5 ▇▂▁▁▁
PMS7003 0 1 11.60 11.87 0.00 1.94 8.34 17.90 91.8 ▇▂▁▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SPS30 ~ PMS7003)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.996
ggplot(df, aes(x = SPS30, y = PMS7003)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SPS30", y = "PM25 PMS7003",
       title = "Relationship between SPS30 and PMS7003") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    -1.38     0.049     -28.1       0    -1.48    -1.29
## 2 SPS30         1.78     0.005     357.        0     1.76     1.78
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID PMS7003 SPS30 PMS7003_hat residual
##    <int>   <dbl> <dbl>       <dbl>    <dbl>
##  1     1   0      0.9        0.213   -0.213
##  2     2   0.290  1.02       0.426   -0.136
##  3     3   0.71   1.25       0.834   -0.124
##  4     4   1.4    1.78       1.78    -0.375
##  5     5   3.12   2.6        3.23    -0.11 
##  6     6   4.16   3          3.94     0.219
##  7     7   6.13   4.11       5.91     0.219
##  8     8   7.61   4.94       7.38     0.226
##  9     9   8.4    5.48       8.34     0.057
## 10    10  20.2   11.7       19.4      0.817
## # ... with 1,105 more rows
#Caso 9: SPS30 VS Oficial

df %>% select(SPS30, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SPS30 0 1 7.32 6.66 0.08 1.97 5.52 10.65 51.5 ▇▂▁▁▁
Oficial 0 1 12.80 8.32 0.00 6.50 11.20 17.70 55.4 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.565
ggplot(df, aes(x = SPS30, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SPS30", y = "PM25 Oficial",
       title = "Relationship between SPS30 and Oficial") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    7.64      0.306      25.0       0    7.04     8.24 
## 2 SPS30        0.705     0.031      22.8       0    0.645    0.766
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID Oficial SPS30 Oficial_hat residual
##    <int>   <dbl> <dbl>       <dbl>    <dbl>
##  1     1     3.7  0.9         8.27   -4.57 
##  2     2     0.3  1.02        8.36   -8.06 
##  3     3     5.9  1.25        8.52   -2.62 
##  4     4     8.1  1.78        8.89   -0.793
##  5     5     3.7  2.6         9.47   -5.77 
##  6     6     4    3           9.75   -5.75 
##  7     7     4.8  4.11       10.5    -5.74 
##  8     8     6    4.94       11.1    -5.12 
##  9     9     8.3  5.48       11.5    -3.20 
## 10    10     9.8 11.7        15.9    -6.09 
## # ... with 1,105 more rows
#Caso 10: HPMA115S0 VS PMSA003

df %>% select(HPMA115S0, PMSA003) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
HPMA115S0 0 1 28.48 7.27 18.6 23.10 26.40 31.05 77.7 ▇▂▁▁▁
PMSA003 0 1 10.46 11.99 0.0 0.85 6.48 16.20 89.0 ▇▂▁▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = HPMA115S0 ~ PMSA003)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.984
ggplot(df, aes(x = HPMA115S0, y = PMSA003)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 HPMA115S0", y = "PM25 PMSA003",
       title = "Relationship between HPMA115S0 and PMSA003") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept   -35.8      0.256     -140.       0   -36.3    -35.2 
## 2 HPMA115S0     1.62     0.009      186.       0     1.60     1.64
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID PMSA003 HPMA115S0 PMSA003_hat residual
##    <int>   <dbl>     <dbl>       <dbl>    <dbl>
##  1     1   0.017      18.6      -5.58      5.59
##  2     2   0          19.1      -4.76      4.76
##  3     3   1.61       19.4      -4.28      5.89
##  4     4   0.41       19.7      -3.79      4.2 
##  5     5   1.72       20.4      -2.65      4.37
##  6     6   2.65       20.6      -2.33      4.98
##  7     7   4.04       22        -0.058     4.10
##  8     8   5.35       22.8       1.24      4.11
##  9     9   6.25       23.7       2.7       3.55
## 10    10  18.5        28.6      10.6       7.85
## # ... with 1,105 more rows
#Caso 11: HPMA115S0 VS PMS7003

df %>% select(HPMA115S0, PMS7003) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
HPMA115S0 0 1 28.48 7.27 18.6 23.10 26.40 31.05 77.7 ▇▂▁▁▁
PMS7003 0 1 11.60 11.87 0.0 1.94 8.34 17.90 91.8 ▇▂▁▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = HPMA115S0 ~ PMS7003)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.978
ggplot(df, aes(x = HPMA115S0, y = PMS7003)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 HPMA115S0", y = "PM25 PMS7003",
       title = "Relationship between HPMA115S0 and PMS7003") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept   -33.9      0.298     -114.       0   -34.5    -33.3 
## 2 HPMA115S0     1.60     0.01       158.       0     1.58     1.62
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID PMS7003 HPMA115S0 PMS7003_hat residual
##    <int>   <dbl>     <dbl>       <dbl>    <dbl>
##  1     1   0          18.6       -4.18     4.18
##  2     2   0.290      19.1       -3.39     3.68
##  3     3   0.71       19.4       -2.91     3.62
##  4     4   1.4        19.7       -2.43     3.83
##  5     5   3.12       20.4       -1.31     4.43
##  6     6   4.16       20.6       -0.99     5.15
##  7     7   6.13       22          1.25     4.88
##  8     8   7.61       22.8        2.52     5.08
##  9     9   8.4        23.7        3.96     4.44
## 10    10  20.2        28.6       11.8      8.41
## # ... with 1,105 more rows
#Caso 10: HPMA115S0 VS Oficial

df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
HPMA115S0 0 1 28.48 7.27 18.6 23.1 26.4 31.05 77.7 ▇▂▁▁▁
Oficial 0 1 12.80 8.32 0.0 6.5 11.2 17.70 55.4 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.557
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
       title = "Relationship between HPMA115S0 and Oficial") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept   -5.35      0.837     -6.40       0   -7.00    -3.71 
## 2 HPMA115S0    0.637     0.028     22.4        0    0.581    0.693
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID Oficial HPMA115S0 Oficial_hat residual
##    <int>   <dbl>     <dbl>       <dbl>    <dbl>
##  1     1     3.7      18.6        6.5    -2.8  
##  2     2     0.3      19.1        6.82   -6.52 
##  3     3     5.9      19.4        7.01   -1.11 
##  4     4     8.1      19.7        7.20    0.899
##  5     5     3.7      20.4        7.65   -3.95 
##  6     6     4        20.6        7.78   -3.78 
##  7     7     4.8      22          8.67   -3.87 
##  8     8     6        22.8        9.18   -3.18 
##  9     9     8.3      23.7        9.75   -1.45 
## 10    10     9.8      28.6       12.9    -3.07 
## # ... with 1,105 more rows
#Caso 11: PMSA003 VS PMS7003

df %>% select(PMSA003, PMS7003) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
PMSA003 0 1 10.46 11.99 0 0.85 6.48 16.2 89.0 ▇▂▁▁▁
PMS7003 0 1 11.60 11.87 0 1.94 8.34 17.9 91.8 ▇▂▁▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = PMSA003 ~ PMS7003)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.996
ggplot(df, aes(x = PMSA003, y = PMS7003)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 PMSA003", y = "PM25 PMS7003",
       title = "Relationship between PMSA003 and PMS7003") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    1.28      0.042      30.5       0    1.2      1.36 
## 2 PMSA003      0.987     0.003     374.        0    0.982    0.992
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID PMS7003 PMSA003 PMS7003_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1   0       0.017        1.30   -1.30 
##  2     2   0.290   0            1.28   -0.992
##  3     3   0.71    1.61         2.87   -2.16 
##  4     4   1.4     0.41         1.69   -0.287
##  5     5   3.12    1.72         2.98    0.141
##  6     6   4.16    2.65         3.90    0.263
##  7     7   6.13    4.04         5.27    0.862
##  8     8   7.61    5.35         6.56    1.05 
##  9     9   8.4     6.25         7.45    0.951
## 10    10  20.2    18.5         19.5     0.664
## # ... with 1,105 more rows
#Caso 12: PMSA003 VS Oficial

df %>% select(PMSA003, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
PMSA003 0 1 10.46 11.99 0 0.85 6.48 16.2 89.0 ▇▂▁▁▁
Oficial 0 1 12.80 8.32 0 6.50 11.20 17.7 55.4 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.560
ggplot(df, aes(x = PMSA003, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 PMSA003", y = "PM25 Oficial",
       title = "Relationship between PMSA003 and Oficial") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    8.73      0.274      31.9       0    8.20     9.27 
## 2 PMSA003      0.389     0.017      22.5       0    0.355    0.422
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID Oficial PMSA003 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1     3.7   0.017        8.74   -5.04 
##  2     2     0.3   0            8.73   -8.43 
##  3     3     5.9   1.61         9.36   -3.46 
##  4     4     8.1   0.41         8.89   -0.793
##  5     5     3.7   1.72         9.40   -5.70 
##  6     6     4     2.65         9.76   -5.76 
##  7     7     4.8   4.04        10.3    -5.50 
##  8     8     6     5.35        10.8    -4.81 
##  9     9     8.3   6.25        11.2    -2.86 
## 10    10     9.8  18.5         15.9    -6.12 
## # ... with 1,105 more rows
#Caso 13: PMS7003 VS Oficial

df %>% select(PMS7003, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1115
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
PMS7003 0 1 11.6 11.87 0 1.94 8.34 17.9 91.8 ▇▂▁▁▁
Oficial 0 1 12.8 8.32 0 6.50 11.20 17.7 55.4 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.563
ggplot(df, aes(x = PMS7003, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 PMS7003", y = "PM25 Oficial",
       title = "Relationship between PMS7003 and Oficial") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model, digits = 11)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    8.22     0.288       28.5       0    7.66     8.79 
## 2 PMS7003      0.394    0.0174      22.7       0    0.360    0.428
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
##       ID Oficial PMS7003 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1     3.7   0            8.22   -4.52 
##  2     2     0.3   0.290        8.34   -8.04 
##  3     3     5.9   0.71         8.50   -2.60 
##  4     4     8.1   1.4          8.78   -0.675
##  5     5     3.7   3.12         9.45   -5.75 
##  6     6     4     4.16         9.86   -5.86 
##  7     7     4.8   6.13        10.6    -5.84 
##  8     8     6     7.61        11.2    -5.22 
##  9     9     8.3   8.4         11.5    -3.24 
## 10    10     9.8  20.2         16.2    -6.39 
## # ... with 1,105 more rows
# **ESTACION FERIAS -1hora VS CANAIRIOS**
# **Prueba con los valores de la estacion Ferias retrasada 1 hora
# **5 sensores diferentes: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
# **Comparaciones con la estación oficial*


df <- read_excel("C:/Mediciones/FERIAS_CANAIRIOS_1h.xlsx")
View(df)

glimpse(df)
## Rows: 1,114
## Columns: 8
## $ Num       <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha     <chr> "12-11-2020 24:00", "13-11-2020 01:00", "13-11-2020 02:00...
## $ Oficial   <dbl> 0.3, 5.9, 8.1, 3.7, 4.0, 4.8, 6.0, 8.3, 9.8, 16.5, 6.2, 1...
## $ PMS7003   <dbl> 0.00, 0.29, 0.71, 1.40, 3.12, 4.16, 6.13, 7.61, 8.40, 20....
## $ PMSA003   <dbl> 0.0169, 0.0000, 1.6100, 0.4100, 1.7200, 2.6500, 4.0400, 5...
## $ HPMA115S0 <dbl> 18.6, 19.1, 19.4, 19.7, 20.4, 20.6, 22.0, 22.8, 23.7, 28....
## $ SPS30     <dbl> 0.90, 1.02, 1.25, 1.78, 2.60, 3.00, 4.11, 4.94, 5.48, 11....
## $ SNGCJA5   <dbl> 0.00, 0.18, 0.45, 0.90, 1.51, 1.81, 2.86, 3.39, 3.48, 8.2...
df %>%
  sample_n(size = 10)
## # A tibble: 10 x 8
##      Num Fecha            Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
##    <dbl> <chr>              <dbl>   <dbl>   <dbl>     <dbl> <dbl>   <dbl>
##  1   873 21-12-2020 12:00     7      1.49   0.526      24.4  1.82   0.807
##  2  1108 08-01-2021 09:00    19      6.04   4.34       26.5  4.43   2.64 
##  3   866 21-12-2020 05:00    12     11.3    9.10       28    6.65   5.44 
##  4   476 02-12-2020 20:00    20.8   15.7   14          31.6  9.59   8.21 
##  5   359 27-11-2020 24:00    10.8    4.34   2.79       22.8  3.2    2.18 
##  6   108 17-11-2020 13:00    11.5    8.1    5.98       24.8  5      3.5  
##  7    33 14-11-2020 08:00    24.2   35.7   35.8        46.1 22.1   18.4  
##  8   173 20-11-2020 06:00    17.1   18.2   16.7        33.5 12     10.6  
##  9    46 14-11-2020 21:00    29.9   26.3   24.5        36.4 14.9   13.1  
## 10    14 13-11-2020 13:00    31.1   31     30.3        39.2 18.3   14.1
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 14: SNGCJA5 VS Oficial-1h

df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1114
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SNGCJA5 0 1 6.68 9.75 0 1.53 4.8 9.75 236.87 ▇▁▁▁▁
Oficial 0 1 12.81 8.32 0 6.50 11.2 17.70 55.40 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.347
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
       title = "Relationship between SNGCJA5 and Oficial-1hour") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept   10.8       0.283      38.2       0   10.3     11.4  
## 2 SNGCJA5      0.296     0.024      12.4       0    0.249    0.343
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
##       ID Oficial SNGCJA5 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1     0.3    0           10.8   -10.5 
##  2     2     5.9    0.18        10.9    -4.98
##  3     3     8.1    0.45        11.0    -2.86
##  4     4     3.7    0.9         11.1    -7.39
##  5     5     4      1.51        11.3    -7.28
##  6     6     4.8    1.81        11.4    -6.56
##  7     7     6      2.86        11.7    -5.68
##  8     8     8.3    3.39        11.8    -3.53
##  9     9     9.8    3.48        11.9    -2.06
## 10    10    16.5    8.23        13.3     3.23
## # ... with 1,104 more rows
#Caso 15: SPS30 VS Oficial-1

df %>% select(SPS30, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1114
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SPS30 0 1 7.32 6.66 0.08 1.97 5.53 10.67 51.5 ▇▂▁▁▁
Oficial 0 1 12.81 8.32 0.00 6.50 11.20 17.70 55.4 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.712
ggplot(df, aes(x = SPS30, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SPS30", y = "PM25 Oficial",
       title = "Relationship between SPS30 and Oficial-1hour") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    6.3       0.26       24.2       0    5.79      6.81
## 2 SPS30        0.889     0.026      33.8       0    0.837     0.94
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
##       ID Oficial SPS30 Oficial_hat residual
##    <int>   <dbl> <dbl>       <dbl>    <dbl>
##  1     1     0.3  0.9         7.1    -6.8  
##  2     2     5.9  1.02        7.21   -1.31 
##  3     3     8.1  1.25        7.41    0.689
##  4     4     3.7  1.78        7.88   -4.18 
##  5     5     4    2.6         8.61   -4.61 
##  6     6     4.8  3           8.97   -4.17 
##  7     7     6    4.11        9.95   -3.95 
##  8     8     8.3  4.94       10.7    -2.39 
##  9     9     9.8  5.48       11.2    -1.37 
## 10    10    16.5 11.7        16.7    -0.196
## # ... with 1,104 more rows
#Caso 16: HPMA115S0 VS Oficial-1h

df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1114
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
HPMA115S0 0 1 28.49 7.27 18.6 23.1 26.4 31.08 77.7 ▇▂▁▁▁
Oficial 0 1 12.81 8.32 0.0 6.5 11.2 17.70 55.4 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.711
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
       title = "Relationship between HPMA115S0 and Oficial-1hour") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept  -10.4       0.709     -14.6       0  -11.8     -8.97 
## 2 HPMA115S0    0.813     0.024      33.7       0    0.766    0.861
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
##       ID Oficial HPMA115S0 Oficial_hat residual
##    <int>   <dbl>     <dbl>       <dbl>    <dbl>
##  1     1     0.3      18.6        4.76   -4.46 
##  2     2     5.9      19.1        5.17    0.729
##  3     3     8.1      19.4        5.42    2.68 
##  4     4     3.7      19.7        5.66   -1.96 
##  5     5     4        20.4        6.23   -2.23 
##  6     6     4.8      20.6        6.39   -1.59 
##  7     7     6        22          7.53   -1.53 
##  8     8     8.3      22.8        8.18    0.12 
##  9     9     9.8      23.7        8.91    0.888
## 10    10    16.5      28.6       12.9     3.60 
## # ... with 1,104 more rows
#Caso 17: PMSA003 VS Oficial-1h

df %>% select(PMSA003, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1114
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
PMSA003 0 1 10.47 11.99 0 0.85 6.49 16.2 89.0 ▇▂▁▁▁
Oficial 0 1 12.81 8.32 0 6.50 11.20 17.7 55.4 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.711
ggplot(df, aes(x = PMSA003, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 PMSA003", y = "PM25 Oficial",
       title = "Relationship between PMSA003 and Oficial-1hour") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    7.64      0.233      32.8       0    7.19     8.1  
## 2 PMSA003      0.493     0.015      33.7       0    0.464    0.522
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
##       ID Oficial PMSA003 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1     0.3   0.017        7.65   -7.35 
##  2     2     5.9   0            7.64   -1.74 
##  3     3     8.1   1.61         8.44   -0.337
##  4     4     3.7   0.41         7.85   -4.15 
##  5     5     4     1.72         8.49   -4.49 
##  6     6     4.8   2.65         8.95   -4.15 
##  7     7     6     4.04         9.64   -3.64 
##  8     8     8.3   5.35        10.3    -1.98 
##  9     9     9.8   6.25        10.7    -0.925
## 10    10    16.5  18.5         16.8    -0.266
## # ... with 1,104 more rows
#Caso 18: PMS7003 VS Oficial-1h

df %>% select(PMS7003, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1114
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
PMS7003 0 1 11.61 11.87 0 1.95 8.37 17.9 91.8 ▇▂▁▁▁
Oficial 0 1 12.81 8.32 0 6.50 11.20 17.7 55.4 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.713
ggplot(df, aes(x = PMS7003, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 PMS7003", y = "PM25 Oficial",
       title = "Relationship between PMS7003 and Oficial-1hour") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model, digits = 11)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    7.00     0.244       28.7       0    6.52     7.48 
## 2 PMS7003      0.500    0.0147      34.0       0    0.471    0.529
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
##       ID Oficial PMS7003 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1     0.3   0            7.00   -6.70 
##  2     2     5.9   0.290        7.15   -1.25 
##  3     3     8.1   0.71         7.36    0.742
##  4     4     3.7   1.4          7.70   -4.00 
##  5     5     4     3.12         8.56   -4.56 
##  6     6     4.8   4.16         9.08   -4.28 
##  7     7     6     6.13        10.1    -4.07 
##  8     8     8.3   7.61        10.8    -2.51 
##  9     9     9.8   8.4         11.2    -1.40 
## 10    10    16.5  20.2         17.1    -0.598
## # ... with 1,104 more rows
# **ESTACION FERIAS -2horas VS CANAIRIOS**
# **Prueba con los valores de la estacion Ferias retrasada 2 horas
# **5 sensores diferentes: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
# **Comparaciones con la estación oficial*


df <- read_excel("C:/Mediciones/FERIAS_CANAIRIOS_2h.xlsx")
View(df)

glimpse(df)
## Rows: 1,113
## Columns: 8
## $ Num       <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha     <chr> "12-11-2020 24:00", "13-11-2020 01:00", "13-11-2020 02:00...
## $ Oficial   <dbl> 5.9, 8.1, 3.7, 4.0, 4.8, 6.0, 8.3, 9.8, 16.5, 6.2, 10.2, ...
## $ PMS7003   <dbl> 0.00, 0.29, 0.71, 1.40, 3.12, 4.16, 6.13, 7.61, 8.40, 20....
## $ PMSA003   <dbl> 0.0169, 0.0000, 1.6100, 0.4100, 1.7200, 2.6500, 4.0400, 5...
## $ HPMA115S0 <dbl> 18.6, 19.1, 19.4, 19.7, 20.4, 20.6, 22.0, 22.8, 23.7, 28....
## $ SPS30     <dbl> 0.90, 1.02, 1.25, 1.78, 2.60, 3.00, 4.11, 4.94, 5.48, 11....
## $ SNGCJA5   <dbl> 0.00, 0.18, 0.45, 0.90, 1.51, 1.81, 2.86, 3.39, 3.48, 8.2...
df %>%
  sample_n(size = 10)
## # A tibble: 10 x 8
##      Num Fecha            Oficial PMS7003 PMSA003 HPMA115S0  SPS30 SNGCJA5
##    <dbl> <chr>              <dbl>   <dbl>   <dbl>     <dbl>  <dbl>   <dbl>
##  1    56 15-11-2020 07:00    10.3 26.1    25.4         35.9 15.5     12.6 
##  2   541 04-12-2020 23:00    11.7  6.6     3.6         25.8  4.8      3.6 
##  3   918 23-12-2020 12:00    38   21.8    23           38.1 14       10.9 
##  4   417 30-11-2020 09:00     4.2  0.48    0.0508      21.9  1.07     0.27
##  5   268 24-11-2020 05:00    55.4 77.8    73.6         65.2 39.8     36.2 
##  6   385 29-11-2020 01:00     7.2  4.77    3.06        23.2  3.76     2.34
##  7   846 20-12-2020 09:00     5    0       0           22.3  1        0   
##  8  1180 11-01-2021 06:00     7    0.0357  0           22    0.446   13.6 
##  9   142 18-11-2020 23:00     8    5.7     3.32        23.2  4.05     2.89
## 10   888 22-12-2020 06:00    18    7.89    5.79        25.4  4.39     3.52
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 19: SNGCJA5 VS Oficial-2h

df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1113
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SNGCJA5 0 1 6.67 9.75 0 1.53 4.8 9.75 236.87 ▇▁▁▁▁
Oficial 0 1 12.82 8.31 0 6.50 11.2 17.70 55.40 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.405
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
       title = "Relationship between SNGCJA5 and Oficial-2hour") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept   10.5       0.276      38.1       0    9.97    11.1  
## 2 SNGCJA5      0.345     0.023      14.8       0    0.299    0.391
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
##       ID Oficial SNGCJA5 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1     5.9    0           10.5    -4.62
##  2     2     8.1    0.18        10.6    -2.48
##  3     3     3.7    0.45        10.7    -6.97
##  4     4     4      0.9         10.8    -6.83
##  5     5     4.8    1.51        11.0    -6.24
##  6     6     6      1.81        11.1    -5.14
##  7     7     8.3    2.86        11.5    -3.20
##  8     8     9.8    3.39        11.7    -1.89
##  9     9    16.5    3.48        11.7     4.78
## 10    10     6.2    8.23        13.4    -7.16
## # ... with 1,103 more rows
#Caso 20: SPS30 VS Oficial-2

df %>% select(SPS30, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1113
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SPS30 0 1 7.33 6.66 0.08 1.97 5.53 10.7 51.5 ▇▂▁▁▁
Oficial 0 1 12.82 8.31 0.00 6.50 11.20 17.7 55.4 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.807
ggplot(df, aes(x = SPS30, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SPS30", y = "PM25 Oficial",
       title = "Relationship between SPS30 and Oficial-2hour") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept     5.44     0.219      24.8       0    5.01      5.87
## 2 SPS30         1.01     0.022      45.5       0    0.964     1.05
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
##       ID Oficial SPS30 Oficial_hat residual
##    <int>   <dbl> <dbl>       <dbl>    <dbl>
##  1     1     5.9  0.9         6.34   -0.445
##  2     2     8.1  1.02        6.47    1.63 
##  3     3     3.7  1.25        6.70   -3.00 
##  4     4     4    1.78        7.23   -3.23 
##  5     5     4.8  2.6         8.06   -3.26 
##  6     6     6    3           8.46   -2.46 
##  7     7     8.3  4.11        9.58   -1.28 
##  8     8     9.8  4.94       10.4    -0.613
##  9     9    16.5  5.48       11.0     5.54 
## 10    10     6.2 11.7        17.2   -11.0  
## # ... with 1,103 more rows
#Caso 21: HPMA115S0 VS Oficial-2h

df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1113
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
HPMA115S0 0 1 28.49 7.27 18.6 23.1 26.4 31.1 77.7 ▇▂▁▁▁
Oficial 0 1 12.82 8.31 0.0 6.5 11.2 17.7 55.4 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.809
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
       title = "Relationship between HPMA115S0 and Oficial-2hour") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept  -13.5       0.593     -22.8       0  -14.7    -12.3  
## 2 HPMA115S0    0.924     0.02       45.8       0    0.884    0.963
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
##       ID Oficial HPMA115S0 Oficial_hat residual
##    <int>   <dbl>     <dbl>       <dbl>    <dbl>
##  1     1     5.9      18.6        3.68    2.22 
##  2     2     8.1      19.1        4.14    3.96 
##  3     3     3.7      19.4        4.42   -0.716
##  4     4     4        19.7        4.69   -0.693
##  5     5     4.8      20.4        5.34   -0.54 
##  6     6     6        20.6        5.52    0.475
##  7     7     8.3      22          6.82    1.48 
##  8     8     9.8      22.8        7.56    2.24 
##  9     9    16.5      23.7        8.39    8.11 
## 10    10     6.2      28.6       12.9    -6.72 
## # ... with 1,103 more rows
#Caso 22: PMSA003 VS Oficial-2h

df %>% select(PMSA003, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1113
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
PMSA003 0 1 10.48 11.99 0 0.85 6.5 16.2 89.0 ▇▂▁▁▁
Oficial 0 1 12.82 8.31 0 6.50 11.2 17.7 55.4 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.806
ggplot(df, aes(x = PMSA003, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 PMSA003", y = "PM25 Oficial",
       title = "Relationship between PMSA003 and Oficial-2hour") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    6.96      0.196      35.5       0    6.58     7.35 
## 2 PMSA003      0.559     0.012      45.3       0    0.534    0.583
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
##       ID Oficial PMSA003 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1     5.9   0.017        6.97   -1.07 
##  2     2     8.1   0            6.96    1.14 
##  3     3     3.7   1.61         7.86   -4.16 
##  4     4     4     0.41         7.19   -3.19 
##  5     5     4.8   1.72         7.92   -3.12 
##  6     6     6     2.65         8.44   -2.44 
##  7     7     8.3   4.04         9.22   -0.921
##  8     8     9.8   5.35         9.95   -0.152
##  9     9    16.5   6.25        10.5     6.04 
## 10    10     6.2  18.5         17.3   -11.1  
## # ... with 1,103 more rows
#Caso 23: PMS7003 VS Oficial-2h

df %>% select(PMS7003, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1113
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
PMS7003 0 1 11.62 11.87 0 1.97 8.4 17.9 91.8 ▇▂▁▁▁
Oficial 0 1 12.82 8.31 0 6.50 11.2 17.7 55.4 ▇▆▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.811
ggplot(df, aes(x = PMS7003, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 PMS7003", y = "PM25 Oficial",
       title = "Relationship between PMS7003 and Oficial-2hour") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model, digits = 11)
## # A tibble: 2 x 7
##   term      estimate std_error statistic p_value lower_ci upper_ci
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept    6.22     0.204       30.5       0    5.82     6.62 
## 2 PMS7003      0.567    0.0123      46.1       0    0.543    0.592
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
##       ID Oficial PMS7003 Oficial_hat residual
##    <int>   <dbl>   <dbl>       <dbl>    <dbl>
##  1     1     5.9   0            6.22   -0.321
##  2     2     8.1   0.290        6.39    1.71 
##  3     3     3.7   0.71         6.62   -2.92 
##  4     4     4     1.4          7.02   -3.02 
##  5     5     4.8   3.12         7.99   -3.19 
##  6     6     6     4.16         8.58   -2.58 
##  7     7     8.3   6.13         9.7    -1.4  
##  8     8     9.8   7.61        10.5    -0.74 
##  9     9    16.5   8.4         11.0     5.51 
## 10    10     6.2  20.2         17.7   -11.5  
## # ... with 1,103 more rows