library(readxl)
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.1 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(moderndive)
library(skimr)
# **ESTACION PAIBA VS CANAIRIOS**
# **5 sensores diferentes: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
# **Comparaciones entre sensores de bajo costo y la estación de referencia Paiba*
df <- read_excel("C:/Mediciones/PAIBA_CANAIRIOS.xlsx")
View(df)
glimpse(df)
## Rows: 1,241
## Columns: 8
## $ Num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha <dttm> 2020-11-07 00:00:00, 2020-11-07 01:00:00, 2020-11-07 02:...
## $ Oficial <dbl> 15.99, 12.09, 10.31, 13.64, 16.51, 18.62, 18.97, 22.07, 2...
## $ PMS7003 <dbl> 33.574074, 27.057692, 22.096154, 37.192308, 46.180000, 47...
## $ PMSA003 <dbl> 31.666667, 25.134615, 20.115385, 35.923077, 44.300000, 47...
## $ HPMA115S0 <dbl> 19.333333, 14.730769, 12.230769, 19.750000, 24.360000, 25...
## $ SPS30 <dbl> 17.185185, 14.115385, 11.634615, 19.442308, 23.260000, 24...
## $ SNGCJA5 <dbl> 13.259259, 10.634615, 8.673077, 14.846154, 17.900000, 19....
df %>%
sample_n(size = 10)
## # A tibble: 10 x 8
## Num Fecha Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
## <dbl> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 118 2020-11-11 21:00:00 7.69 12.6 11.8 6.68 6.75 5.04
## 2 870 2020-12-14 13:00:00 20.2 6.82 5.4 4.65 3.73 2.16
## 3 666 2020-12-06 01:00:00 14.4 10.2 9.7 7.07 6.67 4.47
## 4 220 2020-11-16 03:00:00 5.11 8.32 6.94 5.26 5.53 3.49
## 5 1064 2020-12-22 15:00:00 27.4 46.7 43.4 23.2 22.7 16.2
## 6 256 2020-11-17 15:00:00 8.72 15.6 10.7 8.61 7.20 4.83
## 7 24 2020-11-07 23:00:00 8.65 12.9 12.7 8.41 8.06 5.70
## 8 186 2020-11-14 17:00:00 15.9 37.3 41.7 20.2 20.3 16.5
## 9 1018 2020-12-20 17:00:00 8.71 3.71 2.20 3.11 2.64 1.14
## 10 167 2020-11-13 22:00:00 14.7 28.3 31.6 15.3 15 12.5
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 1: SNGCJA5 VS SPS30
df %>% select(SNGCJA5, SPS30) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
8.87 |
6.48 |
0.05 |
3.80 |
7.47 |
12.32 |
40.80 |
▇▅▂▁▁ |
| SPS30 |
0 |
1 |
12.25 |
8.36 |
1.03 |
5.78 |
10.41 |
16.59 |
51.67 |
▇▅▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ SPS30)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.995
ggplot(df, aes(x = SNGCJA5, y = SPS30)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 SPS30",
title = "Relationship between SNGCJA5 and SPS30") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(SPS30 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.875 0.042 20.9 0 0.793 0.957
## 2 SNGCJA5 1.28 0.004 336. 0 1.27 1.29
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID SPS30 SNGCJA5 SPS30_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 17.2 13.3 17.9 -0.688
## 2 2 14.1 10.6 14.5 -0.393
## 3 3 11.6 8.67 12.0 -0.359
## 4 4 19.4 14.8 19.9 -0.465
## 5 5 23.3 17.9 23.8 -0.563
## 6 6 25.0 19.3 25.7 -0.679
## 7 7 23.9 18.4 24.5 -0.56
## 8 8 27.5 22.1 29.2 -1.76
## 9 9 28.4 22.9 30.2 -1.75
## 10 10 39.4 30.7 40.3 -0.885
## # ... with 1,231 more rows
#Caso 2: SNGCJA5 VS HPMA115S0
df %>% select(SNGCJA5, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
8.87 |
6.48 |
0.05 |
3.80 |
7.47 |
12.32 |
40.80 |
▇▅▂▁▁ |
| HPMA115S0 |
0 |
1 |
12.88 |
8.60 |
1.35 |
6.15 |
11.05 |
17.25 |
51.89 |
▇▅▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.990
ggplot(df, aes(x = SNGCJA5, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 HPMA115S0",
title = "Relationship between SNGCJA5 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 1.24 0.059 20.9 0 1.12 1.36
## 2 SNGCJA5 1.31 0.005 243. 0 1.30 1.32
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID HPMA115S0 SNGCJA5 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 19.3 13.3 18.6 0.688
## 2 2 14.7 10.6 15.2 -0.469
## 3 3 12.2 8.67 12.6 -0.394
## 4 4 19.8 14.8 20.7 -0.978
## 5 5 24.4 17.9 24.7 -0.377
## 6 6 25 19.3 26.6 -1.62
## 7 7 24.5 18.4 25.4 -0.926
## 8 8 29.3 22.1 30.3 -0.987
## 9 9 30.8 22.9 31.3 -0.447
## 10 10 42.3 30.7 41.6 0.698
## # ... with 1,231 more rows
#Caso 3: SNGCJA5 VS PMSA003
df %>% select(SNGCJA5, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
8.87 |
6.48 |
0.05 |
3.80 |
7.47 |
12.32 |
40.80 |
▇▅▂▁▁ |
| PMSA003 |
0 |
1 |
22.22 |
17.04 |
0.04 |
8.44 |
18.74 |
31.61 |
108.45 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.996
ggplot(df, aes(x = SNGCJA5, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMSA003",
title = "Relationship between SNGCJA5 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.991 0.074 -13.3 0 -1.14 -0.845
## 2 SNGCJA5 2.62 0.007 387. 0 2.60 2.63
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID PMSA003 SNGCJA5 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 31.7 13.3 33.7 -2.04
## 2 2 25.1 10.6 26.8 -1.70
## 3 3 20.1 8.67 21.7 -1.59
## 4 4 35.9 14.8 37.9 -1.93
## 5 5 44.3 17.9 45.8 -1.55
## 6 6 47.1 19.3 49.6 -2.49
## 7 7 42.2 18.4 47.2 -5.00
## 8 8 52.0 22.1 56.9 -4.90
## 9 9 55.0 22.9 58.9 -3.87
## 10 10 75.5 30.7 79.4 -3.94
## # ... with 1,231 more rows
#Caso 4: SNGCJA5 VS PMS7003
df %>% select(SNGCJA5, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
8.87 |
6.48 |
0.05 |
3.80 |
7.47 |
12.32 |
40.80 |
▇▅▂▁▁ |
| PMS7003 |
0 |
1 |
22.17 |
15.69 |
0.70 |
9.64 |
19.25 |
30.73 |
93.25 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.987
ggplot(df, aes(x = SNGCJA5, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMS7003",
title = "Relationship between SNGCJA5 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.984 0.122 8.08 0 0.745 1.22
## 2 SNGCJA5 2.39 0.011 215. 0 2.37 2.41
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID PMS7003 SNGCJA5 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 33.6 13.3 32.7 0.92
## 2 2 27.1 10.6 26.4 0.673
## 3 3 22.1 8.67 21.7 0.396
## 4 4 37.2 14.8 36.4 0.748
## 5 5 46.2 17.9 43.7 2.44
## 6 6 47.7 19.3 47.2 0.561
## 7 7 44.4 18.4 45.0 -0.576
## 8 8 52.9 22.1 53.8 -0.889
## 9 9 54.0 22.9 55.6 -1.65
## 10 10 73.7 30.7 74.4 -0.667
## # ... with 1,231 more rows
#Caso 5: SNGCJA5 VS Oficial
df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
8.87 |
6.48 |
0.05 |
3.80 |
7.47 |
12.32 |
40.80 |
▇▅▂▁▁ |
| Oficial |
0 |
1 |
15.38 |
8.41 |
2.06 |
9.51 |
13.39 |
19.63 |
62.22 |
▇▅▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.564
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
title = "Relationship between SNGCJA5 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 8.88 0.334 26.6 0 8.22 9.54
## 2 SNGCJA5 0.732 0.03 24.1 0 0.673 0.792
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID Oficial SNGCJA5 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 16.0 13.3 18.6 -2.6
## 2 2 12.1 10.6 16.7 -4.58
## 3 3 10.3 8.67 15.2 -4.92
## 4 4 13.6 14.8 19.8 -6.11
## 5 5 16.5 17.9 22.0 -5.48
## 6 6 18.6 19.3 23.0 -4.42
## 7 7 19.0 18.4 22.4 -3.40
## 8 8 22.1 22.1 25.1 -3.00
## 9 9 22.6 22.9 25.6 -3.04
## 10 10 28.7 30.7 31.4 -2.67
## # ... with 1,231 more rows
#Caso 6: SPS30 VS HPMA115S0
df %>% select(SPS30, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
12.25 |
8.36 |
1.03 |
5.78 |
10.41 |
16.59 |
51.67 |
▇▅▂▁▁ |
| HPMA115S0 |
0 |
1 |
12.88 |
8.60 |
1.35 |
6.15 |
11.05 |
17.25 |
51.89 |
▇▅▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.994
ggplot(df, aes(x = SPS30, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 HPMA115S0",
title = "Relationship between SPS30 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.357 0.047 7.56 0 0.265 0.45
## 2 SPS30 1.02 0.003 321. 0 1.02 1.03
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID HPMA115S0 SPS30 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 19.3 17.2 17.9 1.40
## 2 2 14.7 14.1 14.8 -0.065
## 3 3 12.2 11.6 12.3 -0.027
## 4 4 19.8 19.4 20.2 -0.494
## 5 5 24.4 23.3 24.1 0.211
## 6 6 25 25.0 25.9 -0.91
## 7 7 24.5 23.9 24.8 -0.341
## 8 8 29.3 27.5 28.4 0.83
## 9 9 30.8 28.4 29.5 1.36
## 10 10 42.3 39.4 40.6 1.63
## # ... with 1,231 more rows
#Caso 7: SPS30 VS PMSA003
df %>% select(SPS30, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
12.25 |
8.36 |
1.03 |
5.78 |
10.41 |
16.59 |
51.67 |
▇▅▂▁▁ |
| PMSA003 |
0 |
1 |
22.22 |
17.04 |
0.04 |
8.44 |
18.74 |
31.61 |
108.45 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.993
ggplot(df, aes(x = SPS30, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMSA003",
title = "Relationship between SPS30 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -2.57 0.1 -25.7 0 -2.77 -2.38
## 2 SPS30 2.02 0.007 300. 0 2.01 2.04
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID PMSA003 SPS30 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 31.7 17.2 32.2 -0.552
## 2 2 25.1 14.1 26.0 -0.869
## 3 3 20.1 11.6 21.0 -0.866
## 4 4 35.9 19.4 36.8 -0.865
## 5 5 44.3 23.3 44.5 -0.217
## 6 6 47.1 25.0 48.0 -0.891
## 7 7 42.2 23.9 45.9 -3.67
## 8 8 52.0 27.5 53.0 -1.06
## 9 9 55.0 28.4 55.0 -0.031
## 10 10 75.5 39.4 77.2 -1.68
## # ... with 1,231 more rows
#Caso 8: SPS30 VS PMS7003
df %>% select(SPS30, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
12.25 |
8.36 |
1.03 |
5.78 |
10.41 |
16.59 |
51.67 |
▇▅▂▁▁ |
| PMS7003 |
0 |
1 |
22.17 |
15.69 |
0.70 |
9.64 |
19.25 |
30.73 |
93.25 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.991
ggplot(df, aes(x = SPS30, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMS7003",
title = "Relationship between SPS30 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.62 0.105 -5.91 0 -0.825 -0.414
## 2 SPS30 1.86 0.007 263. 0 1.85 1.88
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID PMS7003 SPS30 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 33.6 17.2 31.4 2.21
## 2 2 27.1 14.1 25.6 1.41
## 3 3 22.1 11.6 21.0 1.06
## 4 4 37.2 19.4 35.6 1.63
## 5 5 46.2 23.3 42.7 3.51
## 6 6 47.7 25.0 45.9 1.85
## 7 7 44.4 23.9 43.9 0.492
## 8 8 52.9 27.5 50.5 2.42
## 9 9 54.0 28.4 52.3 1.65
## 10 10 73.7 39.4 72.7 1.04
## # ... with 1,231 more rows
#Caso 9: SPS30 VS Oficial
df %>% select(SPS30, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
12.25 |
8.36 |
1.03 |
5.78 |
10.41 |
16.59 |
51.67 |
▇▅▂▁▁ |
| Oficial |
0 |
1 |
15.38 |
8.41 |
2.06 |
9.51 |
13.39 |
19.63 |
62.22 |
▇▅▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.559
ggplot(df, aes(x = SPS30, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 Oficial",
title = "Relationship between SPS30 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 8.49 0.352 24.1 0 7.80 9.18
## 2 SPS30 0.562 0.024 23.7 0 0.516 0.609
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID Oficial SPS30 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 16.0 17.2 18.2 -2.16
## 2 2 12.1 14.1 16.4 -4.34
## 3 3 10.3 11.6 15.0 -4.72
## 4 4 13.6 19.4 19.4 -5.78
## 5 5 16.5 23.3 21.6 -5.06
## 6 6 18.6 25.0 22.5 -3.92
## 7 7 19.0 23.9 21.9 -2.97
## 8 8 22.1 27.5 23.9 -1.86
## 9 9 22.6 28.4 24.5 -1.89
## 10 10 28.7 39.4 30.6 -1.92
## # ... with 1,231 more rows
#Caso 10: HPMA115S0 VS PMSA003
df %>% select(HPMA115S0, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
12.88 |
8.60 |
1.35 |
6.15 |
11.05 |
17.25 |
51.89 |
▇▅▂▁▁ |
| PMSA003 |
0 |
1 |
22.22 |
17.04 |
0.04 |
8.44 |
18.74 |
31.61 |
108.45 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.984
ggplot(df, aes(x = HPMA115S0, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMSA003",
title = "Relationship between HPMA115S0 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -2.89 0.156 -18.6 0 -3.20 -2.59
## 2 HPMA115S0 1.95 0.01 194. 0 1.93 1.97
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID PMSA003 HPMA115S0 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 31.7 19.3 34.8 -3.12
## 2 2 25.1 14.7 25.8 -0.685
## 3 3 20.1 12.2 20.9 -0.832
## 4 4 35.9 19.8 35.6 0.32
## 5 5 44.3 24.4 44.6 -0.288
## 6 6 47.1 25 45.8 1.27
## 7 7 42.2 24.5 44.8 -2.64
## 8 8 52.0 29.3 54.2 -2.21
## 9 9 55.0 30.8 57.2 -2.19
## 10 10 75.5 42.3 79.5 -4.03
## # ... with 1,231 more rows
#Caso 11: HPMA115S0 VS PMS7003
df %>% select(HPMA115S0, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
12.88 |
8.60 |
1.35 |
6.15 |
11.05 |
17.25 |
51.89 |
▇▅▂▁▁ |
| PMS7003 |
0 |
1 |
22.17 |
15.69 |
0.70 |
9.64 |
19.25 |
30.73 |
93.25 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.990
ggplot(df, aes(x = HPMA115S0, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMS7003",
title = "Relationship between HPMA115S0 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -1.11 0.111 -10.0 0 -1.33 -0.895
## 2 HPMA115S0 1.81 0.007 253. 0 1.79 1.82
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID PMS7003 HPMA115S0 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 33.6 19.3 33.8 -0.252
## 2 2 27.1 14.7 25.5 1.55
## 3 3 22.1 12.2 21.0 1.11
## 4 4 37.2 19.8 34.6 2.61
## 5 5 46.2 24.4 42.9 3.27
## 6 6 47.7 25 44.1 3.66
## 7 7 44.4 24.5 43.1 1.25
## 8 8 52.9 29.3 51.8 1.11
## 9 9 54.0 30.8 54.6 -0.612
## 10 10 73.7 42.3 75.3 -1.57
## # ... with 1,231 more rows
#Caso 10: HPMA115S0 VS Oficial
df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
12.88 |
8.60 |
1.35 |
6.15 |
11.05 |
17.25 |
51.89 |
▇▅▂▁▁ |
| Oficial |
0 |
1 |
15.38 |
8.41 |
2.06 |
9.51 |
13.39 |
19.63 |
62.22 |
▇▅▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.577
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
title = "Relationship between HPMA115S0 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 8.11 0.352 23.0 0 7.42 8.80
## 2 HPMA115S0 0.564 0.023 24.8 0 0.52 0.609
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID Oficial HPMA115S0 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 16.0 19.3 19.0 -3.02
## 2 2 12.1 14.7 16.4 -4.33
## 3 3 10.3 12.2 15.0 -4.70
## 4 4 13.6 19.8 19.2 -5.61
## 5 5 16.5 24.4 21.8 -5.34
## 6 6 18.6 25 22.2 -3.59
## 7 7 19.0 24.5 21.9 -2.95
## 8 8 22.1 29.3 24.6 -2.55
## 9 9 22.6 30.8 25.5 -2.9
## 10 10 28.7 42.3 32.0 -3.24
## # ... with 1,231 more rows
#Caso 11: PMSA003 VS PMS7003
df %>% select(PMSA003, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
22.22 |
17.04 |
0.04 |
8.44 |
18.74 |
31.61 |
108.45 |
▇▅▁▁▁ |
| PMS7003 |
0 |
1 |
22.17 |
15.69 |
0.70 |
9.64 |
19.25 |
30.73 |
93.25 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.988
ggplot(df, aes(x = PMSA003, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 PMS7003",
title = "Relationship between PMSA003 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 1.96 0.115 17.1 0 1.73 2.18
## 2 PMSA003 0.91 0.004 222. 0 0.902 0.918
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID PMS7003 PMSA003 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 33.6 31.7 30.8 2.81
## 2 2 27.1 25.1 24.8 2.24
## 3 3 22.1 20.1 20.3 1.84
## 4 4 37.2 35.9 34.6 2.56
## 5 5 46.2 44.3 42.3 3.92
## 6 6 47.7 47.1 44.8 2.91
## 7 7 44.4 42.2 40.3 4.05
## 8 8 52.9 52.0 49.2 3.68
## 9 9 54.0 55.0 52.0 1.99
## 10 10 73.7 75.5 70.6 3.09
## # ... with 1,231 more rows
#Caso 12: PMSA003 VS Oficial
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
22.22 |
17.04 |
0.04 |
8.44 |
18.74 |
31.61 |
108.45 |
▇▅▁▁▁ |
| Oficial |
0 |
1 |
15.38 |
8.41 |
2.06 |
9.51 |
13.39 |
19.63 |
62.22 |
▇▅▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.580
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 9.00 0.32 28.1 0 8.38 9.63
## 2 PMSA003 0.287 0.011 25.1 0 0.264 0.309
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 16.0 31.7 18.1 -2.09
## 2 2 12.1 25.1 16.2 -4.12
## 3 3 10.3 20.1 14.8 -4.46
## 4 4 13.6 35.9 19.3 -5.66
## 5 5 16.5 44.3 21.7 -5.20
## 6 6 18.6 47.1 22.5 -3.89
## 7 7 19.0 42.2 21.1 -2.13
## 8 8 22.1 52.0 23.9 -1.83
## 9 9 22.6 55.0 24.8 -2.18
## 10 10 28.7 75.5 30.6 -1.93
## # ... with 1,231 more rows
#Caso 13: PMS7003 VS Oficial
df %>% select(PMS7003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMS7003 |
0 |
1 |
22.17 |
15.69 |
0.70 |
9.64 |
19.25 |
30.73 |
93.25 |
▇▆▂▁▁ |
| Oficial |
0 |
1 |
15.38 |
8.41 |
2.06 |
9.51 |
13.39 |
19.63 |
62.22 |
▇▅▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.574
ggplot(df, aes(x = PMS7003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMS7003", y = "PM25 Oficial",
title = "Relationship between PMS7003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model, digits = 11)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 8.55 0.339 25.2 0 7.89 9.22
## 2 PMS7003 0.308 0.0125 24.7 0 0.283 0.332
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID Oficial PMS7003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 16.0 33.6 18.9 -2.89
## 2 2 12.1 27.1 16.9 -4.79
## 3 3 10.3 22.1 15.4 -5.04
## 4 4 13.6 37.2 20.0 -6.36
## 5 5 16.5 46.2 22.8 -6.25
## 6 6 18.6 47.7 23.2 -4.62
## 7 7 19.0 44.4 22.2 -3.24
## 8 8 22.1 52.9 24.8 -2.76
## 9 9 22.6 54.0 25.2 -2.57
## 10 10 28.7 73.7 31.2 -2.52
## # ... with 1,231 more rows
# **ESTACION PAIBA NOVIEMBRE VS CANAIRIOS**
# **Prueba con los valores de la estacion Paiba solo de Noviembre
# **5 sensores diferentes: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
# **Comparaciones con la estación referencia*
df <- read_excel("C:/Mediciones/PAIBA_CANAIRIOS_NOV.xlsx")
View(df)
glimpse(df)
## Rows: 544
## Columns: 8
## $ Num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha <dttm> 2020-11-07 00:00:00, 2020-11-07 01:00:00, 2020-11-07 02:...
## $ Oficial <dbl> 15.99, 12.09, 10.31, 13.64, 16.51, 18.62, 18.97, 22.07, 2...
## $ PMS7003 <dbl> 33.574074, 27.057692, 22.096154, 37.192308, 46.180000, 47...
## $ PMSA003 <dbl> 31.666667, 25.134615, 20.115385, 35.923077, 44.300000, 47...
## $ HPMA115S0 <dbl> 19.333333, 14.730769, 12.230769, 19.750000, 24.360000, 25...
## $ SPS30 <dbl> 17.185185, 14.115385, 11.634615, 19.442308, 23.260000, 24...
## $ SNGCJA5 <dbl> 13.259259, 10.634615, 8.673077, 14.846154, 17.900000, 19....
df %>%
sample_n(size = 10)
## # A tibble: 10 x 8
## Num Fecha Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
## <dbl> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 519 2020-11-29 22:00:00 4.21 3.33 1.65 3.98 2.31 1.02
## 2 349 2020-11-22 20:00:00 11.5 21.9 23.8 13.4 12.8 9.07
## 3 31 2020-11-08 06:00:00 11.4 22.1 24.6 13.9 13.2 10.2
## 4 544 2020-11-30 23:00:00 7.74 11.0 10.1 7.06 6.44 4.33
## 5 62 2020-11-09 13:00:00 12.6 23.2 18.7 12.9 11.3 8.15
## 6 58 2020-11-09 09:00:00 14.0 37.5 39.1 17.8 18.9 15.3
## 7 437 2020-11-26 12:00:00 7.66 13.0 12.2 7.91 7.58 4.74
## 8 273 2020-11-19 16:00:00 10.9 25.5 24.5 12 12.7 9.5
## 9 253 2020-11-17 12:00:00 12.3 28.0 23.5 14.4 13.1 9.23
## 10 24 2020-11-07 23:00:00 8.65 12.9 12.7 8.41 8.06 5.70
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 14: SNGCJA5 VS Oficial-NOV
df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
544 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
10.07 |
6.36 |
0.05 |
5.29 |
9.15 |
13.71 |
36.96 |
▇▇▃▁▁ |
| Oficial |
0 |
1 |
11.63 |
5.26 |
2.06 |
7.79 |
11.01 |
14.70 |
33.11 |
▅▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.962
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
title = "Relationship between SNGCJA5 and PaibaNov") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 3.61 0.115 31.4 0 3.38 3.84
## 2 SNGCJA5 0.797 0.01 82.5 0 0.778 0.816
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 544 x 5
## ID Oficial SNGCJA5 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 16.0 13.3 14.2 1.81
## 2 2 12.1 10.6 12.1 0.003
## 3 3 10.3 8.67 10.5 -0.213
## 4 4 13.6 14.8 15.4 -1.80
## 5 5 16.5 17.9 17.9 -1.37
## 6 6 18.6 19.3 19.0 -0.4
## 7 7 19.0 18.4 18.3 0.679
## 8 8 22.1 22.1 21.2 0.836
## 9 9 22.6 22.9 21.8 0.751
## 10 10 28.7 30.7 28.1 0.609
## # ... with 534 more rows
#Caso 15: SPS30 VS Oficial-NOV
df %>% select(SPS30, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
544 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
13.47 |
8.07 |
1.03 |
7.38 |
12.33 |
18.29 |
44.85 |
▇▇▃▁▁ |
| Oficial |
0 |
1 |
11.63 |
5.26 |
2.06 |
7.79 |
11.01 |
14.70 |
33.11 |
▅▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.955
ggplot(df, aes(x = SPS30, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 Oficial",
title = "Relationship between SPS30 and PaibaNov") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 3.24 0.13 24.9 0 2.98 3.50
## 2 SPS30 0.623 0.008 75.1 0 0.607 0.639
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 544 x 5
## ID Oficial SPS30 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 16.0 17.2 13.9 2.04
## 2 2 12.1 14.1 12.0 0.056
## 3 3 10.3 11.6 10.5 -0.179
## 4 4 13.6 19.4 15.4 -1.71
## 5 5 16.5 23.3 17.7 -1.22
## 6 6 18.6 25.0 18.8 -0.183
## 7 7 19.0 23.9 18.1 0.823
## 8 8 22.1 27.5 20.3 1.72
## 9 9 22.6 28.4 21.0 1.63
## 10 10 28.7 39.4 27.8 0.936
## # ... with 534 more rows
#Caso 16: HPMA115S0 VS Oficial-NOV
df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
544 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
14.32 |
8.53 |
1.42 |
8.05 |
13.04 |
18.8 |
51.08 |
▇▇▂▁▁ |
| Oficial |
0 |
1 |
11.63 |
5.26 |
2.06 |
7.79 |
11.01 |
14.7 |
33.11 |
▅▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.959
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
title = "Relationship between HPMA115S0 and PaibaNov") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 3.16 0.126 25.2 0 2.91 3.41
## 2 HPMA115S0 0.592 0.008 78.5 0 0.577 0.606
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 544 x 5
## ID Oficial HPMA115S0 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 16.0 19.3 14.6 1.39
## 2 2 12.1 14.7 11.9 0.215
## 3 3 10.3 12.2 10.4 -0.086
## 4 4 13.6 19.8 14.8 -1.20
## 5 5 16.5 24.4 17.6 -1.06
## 6 6 18.6 25 18.0 0.67
## 7 7 19.0 24.5 17.6 1.32
## 8 8 22.1 29.3 20.5 1.59
## 9 9 22.6 30.8 21.4 1.2
## 10 10 28.7 42.3 28.2 0.542
## # ... with 534 more rows
#Caso 17: PMSA003 VS Oficial-NOV
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
544 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
24.69 |
16.22 |
0.04 |
12.43 |
22.70 |
34.49 |
95.43 |
▇▇▃▁▁ |
| Oficial |
0 |
1 |
11.63 |
5.26 |
2.06 |
7.79 |
11.01 |
14.70 |
33.11 |
▅▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.954
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and PaibaNov") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 3.99 0.123 32.4 0 3.75 4.23
## 2 PMSA003 0.31 0.004 74.3 0 0.301 0.318
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 544 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 16.0 31.7 13.8 2.19
## 2 2 12.1 25.1 11.8 0.318
## 3 3 10.3 20.1 10.2 0.092
## 4 4 13.6 35.9 15.1 -1.47
## 5 5 16.5 44.3 17.7 -1.20
## 6 6 18.6 47.1 18.6 0.043
## 7 7 19.0 42.2 17.1 1.91
## 8 8 22.1 52.0 20.1 1.99
## 9 9 22.6 55.0 21.0 1.58
## 10 10 28.7 75.5 27.4 1.35
## # ... with 534 more rows
#Caso 18: PMS7003 VS Oficial-NOV
df %>% select(PMS7003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
544 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMS7003 |
0 |
1 |
24.80 |
15.38 |
0.70 |
13.55 |
22.36 |
33.59 |
87.28 |
▇▇▃▁▁ |
| Oficial |
0 |
1 |
11.63 |
5.26 |
2.06 |
7.79 |
11.01 |
14.70 |
33.11 |
▅▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.952
ggplot(df, aes(x = PMS7003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMS7003", y = "PM25 Oficial",
title = "Relationship between PMS7003 and PaibaNov") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model, digits = 11)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 3.56 0.131 27.1 0 3.30 3.81
## 2 PMS7003 0.326 0.00450 72.4 0 0.317 0.335
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 544 x 5
## ID Oficial PMS7003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 16.0 33.6 14.5 1.50
## 2 2 12.1 27.1 12.4 -0.281
## 3 3 10.3 22.1 10.8 -0.444
## 4 4 13.6 37.2 15.7 -2.03
## 5 5 16.5 46.2 18.6 -2.09
## 6 6 18.6 47.7 19.1 -0.482
## 7 7 19.0 44.4 18.0 0.95
## 8 8 22.1 52.9 20.8 1.28
## 9 9 22.6 54.0 21.1 1.46
## 10 10 28.7 73.7 27.6 1.14
## # ... with 534 more rows
# **ESTACION PAIBA DICIEMBRE VS CANAIRIOS**
# **Prueba con los valores de la estacion Paiba solo de Diciembre
# **5 sensores diferentes: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
# **Comparaciones con la estación referencia*
df <- read_excel("C:/Mediciones/PAIBA_CANAIRIOS_DIC.xlsx")
View(df)
glimpse(df)
## Rows: 697
## Columns: 8
## $ Num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha <dttm> 2020-12-01 00:00:00, 2020-12-01 01:00:00, 2020-12-01 02:...
## $ Oficial <dbl> 8.12, 7.27, 10.32, 9.91, 12.52, 13.31, 14.70, 21.35, 23.8...
## $ PMS7003 <dbl> 7.218182, 8.224138, 11.259259, 14.290909, 15.464286, 13.2...
## $ PMSA003 <dbl> 5.363636, 6.724138, 10.407407, 14.545455, 15.285714, 12.4...
## $ HPMA115S0 <dbl> 5.345455, 5.413793, 7.425926, 9.490909, 9.982143, 8.19642...
## $ SPS30 <dbl> 4.363636, 4.862069, 6.740741, 8.654545, 9.178571, 7.28571...
## $ SNGCJA5 <dbl> 2.600000, 3.068966, 4.666667, 6.236364, 6.642857, 5.30357...
df %>%
sample_n(size = 10)
## # A tibble: 10 x 8
## Num Fecha Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
## <dbl> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 348 2020-12-15 11:00:00 29.5 4.29 3.12 3.67 2.59 1.29
## 2 237 2020-12-10 20:00:00 17.8 54.1 58 30.8 30.6 21.2
## 3 439 2020-12-19 06:00:00 19.0 13.2 12.4 7.29 7 5.22
## 4 555 2020-12-24 02:00:00 10.2 25.6 27.7 14.5 15.1 10.6
## 5 631 2020-12-27 06:00:00 15.4 17.2 17.3 10.5 10.1 7.38
## 6 576 2020-12-24 23:00:00 9.69 34.7 35 20.2 19 13.4
## 7 139 2020-12-06 18:00:00 8.57 3.68 2.23 2.77 2.58 1.11
## 8 660 2020-12-28 11:00:00 11.0 3.98 2.54 3 2.80 1.26
## 9 277 2020-12-12 12:00:00 33.6 15.6 12.4 9.91 8.15 5.22
## 10 337 2020-12-15 00:00:00 26.8 25.1 26.6 15.6 14.0 10.3
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 19: SNGCJA5 VS Oficial-DIC
df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
697 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
7.94 |
6.43 |
0.10 |
2.96 |
6.56 |
10.75 |
40.80 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
18.30 |
9.22 |
3.47 |
11.12 |
16.86 |
23.87 |
62.22 |
▇▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.606
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
title = "Relationship between SNGCJA5 and PaibaDic") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 11.4 0.442 25.8 0 10.5 12.3
## 2 SNGCJA5 0.869 0.043 20.1 0 0.784 0.954
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 697 x 5
## ID Oficial SNGCJA5 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 8.12 2.6 13.7 -5.54
## 2 2 7.27 3.07 14.1 -6.80
## 3 3 10.3 4.67 15.5 -5.13
## 4 4 9.91 6.24 16.8 -6.91
## 5 5 12.5 6.64 17.2 -4.65
## 6 6 13.3 5.30 16.0 -2.70
## 7 7 14.7 4.72 15.5 -0.805
## 8 8 21.4 8.52 18.8 2.55
## 9 9 23.9 12.5 22.3 1.59
## 10 10 22.9 12.4 22.2 0.745
## # ... with 687 more rows
#Caso 20: SPS30 VS Oficial-DIC
df %>% select(SPS30, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
697 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
11.29 |
8.46 |
1.17 |
4.85 |
9.30 |
15.11 |
51.67 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
18.30 |
9.22 |
3.47 |
11.12 |
16.86 |
23.87 |
62.22 |
▇▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.573
ggplot(df, aes(x = SPS30, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 Oficial",
title = "Relationship between SPS30 and PaibaDic") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 11.2 0.478 23.5 0 10.3 12.2
## 2 SPS30 0.625 0.034 18.4 0 0.559 0.692
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 697 x 5
## ID Oficial SPS30 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 8.12 4.36 14.0 -5.85
## 2 2 7.27 4.86 14.3 -7.01
## 3 3 10.3 6.74 15.5 -5.13
## 4 4 9.91 8.65 16.6 -6.74
## 5 5 12.5 9.18 17.0 -4.46
## 6 6 13.3 7.29 15.8 -2.48
## 7 7 14.7 6.33 15.2 -0.498
## 8 8 21.4 10.8 18.0 3.35
## 9 9 23.9 16.7 21.7 2.20
## 10 10 22.9 16.9 21.8 1.11
## # ... with 687 more rows
#Caso 21: HPMA115S0 VS Oficial-DIC
df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
697 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
11.76 |
8.49 |
1.35 |
5.31 |
9.60 |
15.41 |
51.89 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
18.30 |
9.22 |
3.47 |
11.12 |
16.86 |
23.87 |
62.22 |
▇▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.616
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
title = "Relationship between HPMA115S0 and PaibaDic") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 10.4 0.471 22.2 0 9.50 11.4
## 2 HPMA115S0 0.669 0.032 20.6 0 0.605 0.733
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 697 x 5
## ID Oficial HPMA115S0 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 8.12 5.34 14.0 -5.88
## 2 2 7.27 5.41 14.0 -6.78
## 3 3 10.3 7.43 15.4 -5.08
## 4 4 9.91 9.49 16.8 -6.87
## 5 5 12.5 9.98 17.1 -4.59
## 6 6 13.3 8.20 15.9 -2.60
## 7 7 14.7 7.31 15.3 -0.621
## 8 8 21.4 11.7 18.3 3.07
## 9 9 23.9 17 21.8 2.07
## 10 10 22.9 17.8 22.3 0.576
## # ... with 687 more rows
#Caso 22: PMSA003 VS Oficial-DIC
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
697 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
20.29 |
17.42 |
0.18 |
6.54 |
16.23 |
28.48 |
108.45 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
18.30 |
9.22 |
3.47 |
11.12 |
16.86 |
23.87 |
62.22 |
▇▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.607
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and PaibaDic") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 11.8 0.427 27.6 0 10.9 12.6
## 2 PMSA003 0.322 0.016 20.2 0 0.290 0.353
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 697 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 8.12 5.36 13.5 -5.37
## 2 2 7.27 6.72 13.9 -6.66
## 3 3 10.3 10.4 15.1 -4.80
## 4 4 9.91 14.5 16.4 -6.54
## 5 5 12.5 15.3 16.7 -4.16
## 6 6 13.3 12.5 15.8 -2.47
## 7 7 14.7 11.5 15.5 -0.758
## 8 8 21.4 22.8 19.1 2.26
## 9 9 23.9 34.1 22.7 1.12
## 10 10 22.9 32.8 22.3 0.605
## # ... with 687 more rows
#Caso 23: PMS7003 VS Oficial-DIC
df %>% select(PMS7003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
697 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMS7003 |
0 |
1 |
20.12 |
15.64 |
0.96 |
7.84 |
16.61 |
27.35 |
93.25 |
▇▃▂▁▁ |
| Oficial |
0 |
1 |
18.30 |
9.22 |
3.47 |
11.12 |
16.86 |
23.87 |
62.22 |
▇▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.614
ggplot(df, aes(x = PMS7003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMS7003", y = "PM25 Oficial",
title = "Relationship between PMS7003 and PaibaDic") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model, digits = 11)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 11.0 0.450 24.5 0 10.1 11.9
## 2 PMS7003 0.362 0.0177 20.5 0 0.327 0.397
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 697 x 5
## ID Oficial PMS7003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 8.12 7.22 13.6 -5.50
## 2 2 7.27 8.22 14.0 -6.72
## 3 3 10.3 11.3 15.1 -4.77
## 4 4 9.91 14.3 16.2 -6.28
## 5 5 12.5 15.5 16.6 -4.09
## 6 6 13.3 13.3 15.8 -2.51
## 7 7 14.7 11.9 15.3 -0.634
## 8 8 21.4 22.1 19.0 2.34
## 9 9 23.9 32.0 22.6 1.27
## 10 10 22.9 30.2 21.9 0.964
## # ... with 687 more rows