library(readxl)
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.1 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(moderndive)
library(skimr)
# **ESTACION TUNAL Y VS CANAIRIOS**
# **5 sensores diferentes: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
# **Comparaciones entre sensores de bajo costo y la estación oficial*
df <- read_excel("C:/Mediciones/TUNAL_CANAIRIOS.xlsx")
View(df)
glimpse(df)
## Rows: 2,213
## Columns: 8
## $ Num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha <chr> "13-11-2020 24:00", "14-11-2020 01:00", "14-11-2020 02:00...
## $ Oficial <dbl> 30, 23, 18, 26, 24, 30, 38, 53, 54, 57, 60, 61, 41, 26, 1...
## $ PMS7003 <dbl> 26.70, 19.50, 25.60, 27.30, 32.80, 36.40, 44.20, 47.90, 5...
## $ PMSA003 <dbl> 30.70, 21.60, 29.00, 31.60, 38.10, 41.70, 50.30, 54.40, 6...
## $ HPMA115S0 <dbl> 17.40, 14.00, 17.20, 18.60, 23.50, 25.10, 30.00, 32.50, 3...
## $ SPS30 <dbl> 15.90, 12.40, 15.70, 17.00, 21.00, 22.40, 27.40, 29.70, 3...
## $ SNGCJA5 <dbl> 14.40, 10.70, 13.80, 14.70, 18.50, 20.30, 24.80, 26.60, 2...
df %>%
sample_n(size = 10)
## # A tibble: 10 x 8
## Num Fecha Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 987 25-12-2020 16:00 10 19.6 22.6 1.14 12.4 10.3
## 2 1387 11-01-2021 08:00 1 0.558 0.0435 0 0.1 0.268
## 3 736 15-12-2020 05:00 12 11 11.6 7.12 6.61 6.25
## 4 932 23-12-2020 09:00 53 13.8 15.7 9.81 7.79 5.75
## 5 1137 31-12-2020 22:00 29 30.3 36.4 0 18.5 16.5
## 6 823 18-12-2020 20:00 10 6.53 6.4 5.34 4.6 3.67
## 7 1635 21-01-2021 16:00 29 25.6 31.6 0 15.9 13.2
## 8 1215 04-01-2021 04:00 5 4.31 2.8 0 2.88 2.21
## 9 2251 16-02-2021 08:00 50 46.6 53.7 0 29.6 23.9
## 10 1972 04-02-2021 17:00 4 7.4 8.01 0 5.46 4.59
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 1: SNGCJA5 VS SPS30
df %>% select(SNGCJA5, SPS30) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.97 |
6.35 |
0.00 |
2.08 |
4.93 |
10.3 |
38.6 |
▇▃▁▁▁ |
| SPS30 |
0 |
1 |
8.46 |
7.30 |
0.01 |
2.98 |
5.94 |
12.0 |
43.4 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ SPS30)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.995
ggplot(df, aes(x = SNGCJA5, y = SPS30)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 SPS30",
title = "Relationship between SNGCJA5 and SPS30") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(SPS30 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.478 0.023 21.2 0 0.434 0.522
## 2 SNGCJA5 1.14 0.002 479. 0 1.14 1.15
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID SPS30 SNGCJA5 SPS30_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 15.9 14.4 17.0 -1.06
## 2 2 12.4 10.7 12.7 -0.324
## 3 3 15.7 13.8 16.3 -0.572
## 4 4 17 14.7 17.3 -0.302
## 5 5 21 18.5 21.7 -0.651
## 6 6 22.4 20.3 23.7 -1.31
## 7 7 27.4 24.8 28.9 -1.46
## 8 8 29.7 26.6 30.9 -1.22
## 9 9 34.9 28.7 33.3 1.58
## 10 10 39.9 32.4 37.6 2.34
## # ... with 2,203 more rows
#Caso 2: SNGCJA5 VS HPMA115S0
df %>% select(SNGCJA5, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.97 |
6.35 |
0 |
2.08 |
4.93 |
10.30 |
38.6 |
▇▃▁▁▁ |
| HPMA115S0 |
0 |
1 |
4.47 |
7.39 |
0 |
0.00 |
0.00 |
5.58 |
43.4 |
▇▁▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.607
ggplot(df, aes(x = SNGCJA5, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 HPMA115S0",
title = "Relationship between SNGCJA5 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.448 0.186 -2.41 0.016 -0.812 -0.084
## 2 SNGCJA5 0.706 0.02 35.9 0 0.667 0.745
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID HPMA115S0 SNGCJA5 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 17.4 14.4 9.72 7.68
## 2 2 14 10.7 7.11 6.89
## 3 3 17.2 13.8 9.30 7.90
## 4 4 18.6 14.7 9.93 8.67
## 5 5 23.5 18.5 12.6 10.9
## 6 6 25.1 20.3 13.9 11.2
## 7 7 30 24.8 17.1 12.9
## 8 8 32.5 26.6 18.3 14.2
## 9 9 37.1 28.7 19.8 17.3
## 10 10 42.1 32.4 22.4 19.7
## # ... with 2,203 more rows
#Caso 3: SNGCJA5 VS PMSA003
df %>% select(SNGCJA5, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.97 |
6.35 |
0 |
2.08 |
4.93 |
10.3 |
38.6 |
▇▃▁▁▁ |
| PMSA003 |
0 |
1 |
14.61 |
15.18 |
0 |
2.81 |
9.11 |
22.5 |
89.3 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.995
ggplot(df, aes(x = SNGCJA5, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMSA003",
title = "Relationship between SNGCJA5 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -1.97 0.048 -40.8 0 -2.07 -1.88
## 2 SNGCJA5 2.38 0.005 464. 0 2.37 2.39
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID PMSA003 SNGCJA5 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 30.7 14.4 32.3 -1.58
## 2 2 21.6 10.7 23.5 -1.88
## 3 3 29 13.8 30.8 -1.85
## 4 4 31.6 14.7 33.0 -1.39
## 5 5 38.1 18.5 42.0 -3.93
## 6 6 41.7 20.3 46.3 -4.61
## 7 7 50.3 24.8 57.0 -6.71
## 8 8 54.4 26.6 61.3 -6.89
## 9 9 62.3 28.7 66.3 -3.99
## 10 10 69.8 32.4 75.1 -5.29
## # ... with 2,203 more rows
#Caso 4: SNGCJA5 VS PMS7003
df %>% select(SNGCJA5, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.97 |
6.35 |
0 |
2.08 |
4.93 |
10.3 |
38.6 |
▇▃▁▁▁ |
| PMS7003 |
0 |
1 |
12.96 |
12.73 |
0 |
3.20 |
8.55 |
19.3 |
75.7 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.996
ggplot(df, aes(x = SNGCJA5, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMS7003",
title = "Relationship between SNGCJA5 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.96 0.036 -26.5 0 -1.03 -0.889
## 2 SNGCJA5 2.00 0.004 521. 0 1.99 2.00
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID PMS7003 SNGCJA5 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 26.7 14.4 27.8 -1.09
## 2 2 19.5 10.7 20.4 -0.901
## 3 3 25.6 13.8 26.6 -0.989
## 4 4 27.3 14.7 28.4 -1.09
## 5 5 32.8 18.5 36.0 -3.17
## 6 6 36.4 20.3 39.6 -3.16
## 7 7 44.2 24.8 48.5 -4.35
## 8 8 47.9 26.6 52.1 -4.24
## 9 9 54.9 28.7 56.3 -1.43
## 10 10 60.5 32.4 63.7 -3.22
## # ... with 2,203 more rows
#Caso 5: SNGCJA5 VS Oficial
df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.97 |
6.35 |
0 |
2.08 |
4.93 |
10.3 |
38.6 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7.00 |
13.00 |
22.0 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.740
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
title = "Relationship between SNGCJA5 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 6.04 0.252 24.0 0 5.54 6.53
## 2 SNGCJA5 1.38 0.027 51.7 0 1.33 1.43
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial SNGCJA5 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 30 14.4 25.9 4.09
## 2 2 23 10.7 20.8 2.20
## 3 3 18 13.8 25.1 -7.08
## 4 4 26 14.7 26.3 -0.324
## 5 5 24 18.5 31.6 -7.57
## 6 6 30 20.3 34.1 -4.05
## 7 7 38 24.8 40.3 -2.26
## 8 8 53 26.6 42.7 10.3
## 9 9 54 28.7 45.6 8.35
## 10 10 57 32.4 50.8 6.25
## # ... with 2,203 more rows
#Caso 6: SPS30 VS HPMA115S0
df %>% select(SPS30, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
8.46 |
7.30 |
0.01 |
2.98 |
5.94 |
12.00 |
43.4 |
▇▃▁▁▁ |
| HPMA115S0 |
0 |
1 |
4.47 |
7.39 |
0.00 |
0.00 |
0.00 |
5.58 |
43.4 |
▇▁▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.608
ggplot(df, aes(x = SPS30, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 HPMA115S0",
title = "Relationship between SPS30 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.731 0.191 -3.83 0 -1.10 -0.356
## 2 SPS30 0.615 0.017 36.0 0 0.582 0.649
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID HPMA115S0 SPS30 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 17.4 15.9 9.06 8.35
## 2 2 14 12.4 6.90 7.10
## 3 3 17.2 15.7 8.93 8.27
## 4 4 18.6 17 9.73 8.87
## 5 5 23.5 21 12.2 11.3
## 6 6 25.1 22.4 13.1 12.0
## 7 7 30 27.4 16.1 13.9
## 8 8 32.5 29.7 17.5 15.0
## 9 9 37.1 34.9 20.7 16.4
## 10 10 42.1 39.9 23.8 18.3
## # ... with 2,203 more rows
#Caso 7: SPS30 VS PMSA003
df %>% select(SPS30, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
8.46 |
7.30 |
0.01 |
2.98 |
5.94 |
12.0 |
43.4 |
▇▃▁▁▁ |
| PMSA003 |
0 |
1 |
14.61 |
15.18 |
0.00 |
2.81 |
9.11 |
22.5 |
89.3 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.995
ggplot(df, aes(x = SPS30, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMSA003",
title = "Relationship between SPS30 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -2.88 0.05 -57.6 0 -2.98 -2.78
## 2 SPS30 2.07 0.004 462. 0 2.06 2.08
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID PMSA003 SPS30 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 30.7 15.9 30.0 0.697
## 2 2 21.6 12.4 22.8 -1.16
## 3 3 29 15.7 29.6 -0.589
## 4 4 31.6 17 32.3 -0.678
## 5 5 38.1 21 40.6 -2.45
## 6 6 41.7 22.4 43.4 -1.75
## 7 7 50.3 27.4 53.8 -3.49
## 8 8 54.4 29.7 58.5 -4.14
## 9 9 62.3 34.9 69.3 -7.00
## 10 10 69.8 39.9 79.6 -9.84
## # ... with 2,203 more rows
#Caso 8: SPS30 VS PMS7003
df %>% select(SPS30, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
8.46 |
7.30 |
0.01 |
2.98 |
5.94 |
12.0 |
43.4 |
▇▃▁▁▁ |
| PMS7003 |
0 |
1 |
12.96 |
12.73 |
0.00 |
3.20 |
8.55 |
19.3 |
75.7 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.995
ggplot(df, aes(x = SPS30, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMS7003",
title = "Relationship between SPS30 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -1.71 0.041 -41.6 0 -1.79 -1.63
## 2 SPS30 1.73 0.004 471. 0 1.73 1.74
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID PMS7003 SPS30 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 26.7 15.9 25.9 0.833
## 2 2 19.5 12.4 19.8 -0.297
## 3 3 25.6 15.7 25.5 0.08
## 4 4 27.3 17 27.8 -0.475
## 5 5 32.8 21 34.7 -1.91
## 6 6 36.4 22.4 37.1 -0.741
## 7 7 44.2 27.4 45.8 -1.61
## 8 8 47.9 29.7 49.8 -1.90
## 9 9 54.9 34.9 58.8 -3.92
## 10 10 60.5 39.9 67.5 -6.99
## # ... with 2,203 more rows
#Caso 9: SPS30 VS Oficial
df %>% select(SPS30, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
8.46 |
7.30 |
0.01 |
2.98 |
5.94 |
12 |
43.4 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0.00 |
7.00 |
13.00 |
22 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.730
ggplot(df, aes(x = SPS30, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 Oficial",
title = "Relationship between SPS30 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 5.64 0.263 21.4 0 5.12 6.15
## 2 SPS30 1.18 0.024 50.3 0 1.14 1.23
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial SPS30 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 30 15.9 24.5 5.52
## 2 2 23 12.4 20.3 2.67
## 3 3 18 15.7 24.2 -6.24
## 4 4 26 17 25.8 0.216
## 5 5 24 21 30.5 -6.52
## 6 6 30 22.4 32.2 -2.18
## 7 7 38 27.4 38.1 -0.11
## 8 8 53 29.7 40.8 12.2
## 9 9 54 34.9 47.0 7.00
## 10 10 57 39.9 52.9 4.08
## # ... with 2,203 more rows
#Caso 10: HPMA115S0 VS PMSA003
df %>% select(HPMA115S0, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
4.47 |
7.39 |
0 |
0.00 |
0.00 |
5.58 |
43.4 |
▇▁▁▁▁ |
| PMSA003 |
0 |
1 |
14.61 |
15.18 |
0 |
2.81 |
9.11 |
22.50 |
89.3 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.602
ggplot(df, aes(x = HPMA115S0, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMSA003",
title = "Relationship between HPMA115S0 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 9.08 0.301 30.1 0 8.49 9.67
## 2 HPMA115S0 1.24 0.035 35.4 0 1.17 1.30
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID PMSA003 HPMA115S0 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 30.7 17.4 30.6 0.113
## 2 2 21.6 14 26.4 -4.78
## 3 3 29 17.2 30.3 -1.34
## 4 4 31.6 18.6 32.1 -0.47
## 5 5 38.1 23.5 38.1 -0.026
## 6 6 41.7 25.1 40.1 1.60
## 7 7 50.3 30 46.2 4.14
## 8 8 54.4 32.5 49.2 5.15
## 9 9 62.3 37.1 54.9 7.36
## 10 10 69.8 42.1 61.1 8.68
## # ... with 2,203 more rows
#Caso 11: HPMA115S0 VS PMS7003
df %>% select(HPMA115S0, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
4.47 |
7.39 |
0 |
0.0 |
0.00 |
5.58 |
43.4 |
▇▁▁▁▁ |
| PMS7003 |
0 |
1 |
12.96 |
12.73 |
0 |
3.2 |
8.55 |
19.30 |
75.7 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.615
ggplot(df, aes(x = HPMA115S0, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMS7003",
title = "Relationship between HPMA115S0 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 8.22 0.25 32.9 0 7.74 8.71
## 2 HPMA115S0 1.06 0.029 36.6 0 1.00 1.12
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID PMS7003 HPMA115S0 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 26.7 17.4 26.6 0.062
## 2 2 19.5 14 23.0 -3.54
## 3 3 25.6 17.2 26.4 -0.826
## 4 4 27.3 18.6 27.9 -0.608
## 5 5 32.8 23.5 33.1 -0.293
## 6 6 36.4 25.1 34.8 1.61
## 7 7 44.2 30 40.0 4.23
## 8 8 47.9 32.5 42.6 5.28
## 9 9 54.9 37.1 47.5 7.42
## 10 10 60.5 42.1 52.8 7.72
## # ... with 2,203 more rows
#Caso 10: HPMA115S0 VS Oficial
df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
4.47 |
7.39 |
0 |
0 |
0 |
5.58 |
43.4 |
▇▁▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7 |
13 |
22.00 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.388
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
title = "Relationship between HPMA115S0 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 12.9 0.272 47.4 0 12.3 13.4
## 2 HPMA115S0 0.621 0.031 19.8 0 0.56 0.683
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial HPMA115S0 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 30 17.4 23.7 6.31
## 2 2 23 14 21.6 1.42
## 3 3 18 17.2 23.6 -5.57
## 4 4 26 18.6 24.4 1.56
## 5 5 24 23.5 27.5 -3.48
## 6 6 30 25.1 28.5 1.52
## 7 7 38 30 31.5 6.48
## 8 8 53 32.5 33.1 19.9
## 9 9 54 37.1 35.9 18.1
## 10 10 57 42.1 39.0 18.0
## # ... with 2,203 more rows
#Caso 11: PMSA003 VS PMS7003
df %>% select(PMSA003, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
14.61 |
15.18 |
0 |
2.81 |
9.11 |
22.5 |
89.3 |
▇▂▁▁▁ |
| PMS7003 |
0 |
1 |
12.96 |
12.73 |
0 |
3.20 |
8.55 |
19.3 |
75.7 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.999
ggplot(df, aes(x = PMSA003, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 PMS7003",
title = "Relationship between PMSA003 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.723 0.018 39.4 0 0.687 0.759
## 2 PMSA003 0.837 0.001 963. 0 0.836 0.839
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID PMS7003 PMSA003 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 26.7 30.7 26.4 0.267
## 2 2 19.5 21.6 18.8 0.688
## 3 3 25.6 29 25.0 0.59
## 4 4 27.3 31.6 27.2 0.113
## 5 5 32.8 38.1 32.6 0.169
## 6 6 36.4 41.7 35.6 0.754
## 7 7 44.2 50.3 42.8 1.35
## 8 8 47.9 54.4 46.3 1.62
## 9 9 54.9 62.3 52.9 2.00
## 10 10 60.5 69.8 59.2 1.32
## # ... with 2,203 more rows
#Caso 12: PMSA003 VS Oficial
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
14.61 |
15.18 |
0 |
2.81 |
9.11 |
22.5 |
89.3 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7.00 |
13.00 |
22.0 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.743
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 7.18 0.234 30.7 0 6.72 7.64
## 2 PMSA003 0.580 0.011 52.3 0 0.559 0.602
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 30 30.7 25.0 5.00
## 2 2 23 21.6 19.7 3.28
## 3 3 18 29 24.0 -6.01
## 4 4 26 31.6 25.5 0.481
## 5 5 24 38.1 29.3 -5.29
## 6 6 30 41.7 31.4 -1.38
## 7 7 38 50.3 36.4 1.63
## 8 8 53 54.4 38.8 14.2
## 9 9 54 62.3 43.3 10.7
## 10 10 57 69.8 47.7 9.31
## # ... with 2,203 more rows
#Caso 13: PMS7003 VS Oficial
df %>% select(PMS7003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMS7003 |
0 |
1 |
12.96 |
12.73 |
0 |
3.2 |
8.55 |
19.3 |
75.7 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7.0 |
13.00 |
22.0 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.743
ggplot(df, aes(x = PMS7003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMS7003", y = "PM25 Oficial",
title = "Relationship between PMS7003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model, digits = 11)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 6.69 0.241 27.8 0 6.22 7.17
## 2 PMS7003 0.692 0.0132 52.2 0 0.666 0.718
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial PMS7003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 30 26.7 25.2 4.84
## 2 2 23 19.5 20.2 2.82
## 3 3 18 25.6 24.4 -6.40
## 4 4 26 27.3 25.6 0.42
## 5 5 24 32.8 29.4 -5.38
## 6 6 30 36.4 31.9 -1.88
## 7 7 38 44.2 37.3 0.729
## 8 8 53 47.9 39.8 13.2
## 9 9 54 54.9 44.7 9.33
## 10 10 57 60.5 48.5 8.45
## # ... with 2,203 more rows
# **ESTACION TUNAL -1hora VS CANAIRIOS**
# **Prueba con los valores de la estacion Tunal retrasada 1 hora
# **5 sensores diferentes: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
# **Comparaciones con la estación oficial*
df <- read_excel("C:/Mediciones/TUNAL_CANAIRIOS-1h.xlsx")
View(df)
glimpse(df)
## Rows: 2,213
## Columns: 8
## $ Num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha <chr> "13-11-2020 24:00", "14-11-2020 01:00", "14-11-2020 02:00...
## $ Oficial <dbl> 23, 18, 26, 24, 30, 38, 53, 54, 57, 60, 61, 41, 26, 18, 1...
## $ PMS7003 <dbl> 26.70, 19.50, 25.60, 27.30, 32.80, 36.40, 44.20, 47.90, 5...
## $ PMSA003 <dbl> 30.70, 21.60, 29.00, 31.60, 38.10, 41.70, 50.30, 54.40, 6...
## $ HPMA115S0 <dbl> 17.40, 14.00, 17.20, 18.60, 23.50, 25.10, 30.00, 32.50, 3...
## $ SPS30 <dbl> 15.90, 12.40, 15.70, 17.00, 21.00, 22.40, 27.40, 29.70, 3...
## $ SNGCJA5 <dbl> 14.40, 10.70, 13.80, 14.70, 18.50, 20.30, 24.80, 26.60, 2...
df %>%
sample_n(size = 10)
## # A tibble: 10 x 8
## Num Fecha Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1026 27-12-2020 07:00 7 10.8 11.2 0 7.16 5.78
## 2 2117 10-02-2021 18:00 18 13.4 15.8 0 8.69 7.38
## 3 1749 26-01-2021 10:00 8 2.6 2.41 0 2.82 1.65
## 4 1101 30-12-2020 10:00 28 31.4 36.6 0 18.5 14.3
## 5 402 01-12-2020 07:00 14 6.84 6.94 5.54 4.88 3.86
## 6 2146 11-02-2021 23:00 15 10.5 11.4 0 7.9 6.28
## 7 206 22-11-2020 13:00 31 45.6 54.4 29.7 29.2 22.9
## 8 1395 11-01-2021 16:00 2 0.0863 0.0144 0 0.79 1.01
## 9 632 10-12-2020 21:00 39 53.7 61.6 32.8 33.1 27.5
## 10 2065 08-02-2021 14:00 27 21.9 26.3 0 13.1 11
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 14: SNGCJA5 VS Oficial-1h
df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.97 |
6.35 |
0 |
2.08 |
4.93 |
10.3 |
38.6 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7.00 |
13.00 |
22.0 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.896
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
title = "Relationship between SNGCJA5 and Oficial-1hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 4.01 0.167 24.1 0 3.68 4.34
## 2 SNGCJA5 1.67 0.018 94.6 0 1.64 1.71
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial SNGCJA5 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 23 14.4 28.1 -5.07
## 2 2 18 10.7 21.9 -3.89
## 3 3 26 13.8 27.1 -1.07
## 4 4 24 14.7 28.6 -4.57
## 5 5 30 18.5 34.9 -4.92
## 6 6 38 20.3 37.9 0.069
## 7 7 53 24.8 45.5 7.55
## 8 8 54 26.6 48.5 5.54
## 9 9 57 28.7 52.0 5.03
## 10 10 60 32.4 58.2 1.85
## # ... with 2,203 more rows
#Caso 15: SPS30 VS Oficial-1
df %>% select(SPS30, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
8.46 |
7.30 |
0.01 |
2.98 |
5.94 |
12 |
43.4 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0.00 |
7.00 |
13.00 |
22 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.873
ggplot(df, aes(x = SPS30, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 Oficial",
title = "Relationship between SPS30 and Oficial-1hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 3.68 0.188 19.6 0 3.31 4.05
## 2 SPS30 1.42 0.017 84.0 0 1.38 1.45
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial SPS30 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 23 15.9 26.2 -3.20
## 2 2 18 12.4 21.2 -3.24
## 3 3 26 15.7 25.9 0.084
## 4 4 24 17 27.8 -3.76
## 5 5 30 21 33.4 -3.42
## 6 6 38 22.4 35.4 2.60
## 7 7 53 27.4 42.5 10.5
## 8 8 54 29.7 45.7 8.26
## 9 9 57 34.9 53.1 3.89
## 10 10 60 39.9 60.2 -0.187
## # ... with 2,203 more rows
#Caso 16: HPMA115S0 VS Oficial-1h
df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
4.47 |
7.39 |
0 |
0 |
0 |
5.58 |
43.4 |
▇▁▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7 |
13 |
22.00 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.460
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
title = "Relationship between HPMA115S0 and Oficial-1hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 12.4 0.262 47.3 0 11.8 12.9
## 2 HPMA115S0 0.738 0.03 24.4 0 0.679 0.797
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial HPMA115S0 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 23 17.4 25.2 -2.20
## 2 2 18 14 22.7 -4.69
## 3 3 26 17.2 25.1 0.948
## 4 4 24 18.6 26.1 -2.08
## 5 5 30 23.5 29.7 0.299
## 6 6 38 25.1 30.9 7.12
## 7 7 53 30 34.5 18.5
## 8 8 54 32.5 36.3 17.7
## 9 9 57 37.1 39.7 17.3
## 10 10 60 42.1 43.4 16.6
## # ... with 2,203 more rows
#Caso 17: PMSA003 VS Oficial-1h
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
14.61 |
15.18 |
0 |
2.81 |
9.11 |
22.5 |
89.3 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7.00 |
13.00 |
22.0 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.891
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial-1hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 5.50 0.159 34.6 0 5.18 5.81
## 2 PMSA003 0.696 0.008 92.3 0 0.681 0.71
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 23 30.7 26.9 -3.85
## 2 2 18 21.6 20.5 -2.52
## 3 3 26 29 25.7 0.331
## 4 4 24 31.6 27.5 -3.48
## 5 5 30 38.1 32.0 -2.00
## 6 6 38 41.7 34.5 3.50
## 7 7 53 50.3 40.5 12.5
## 8 8 54 54.4 43.3 10.7
## 9 9 57 62.3 48.8 8.17
## 10 10 60 69.8 54.0 5.95
## # ... with 2,203 more rows
#Caso 18: PMS7003 VS Oficial-1h
df %>% select(PMS7003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMS7003 |
0 |
1 |
12.96 |
12.73 |
0 |
3.2 |
8.55 |
19.3 |
75.7 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7.0 |
13.00 |
22.0 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.894
ggplot(df, aes(x = PMS7003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMS7003", y = "PM25 Oficial",
title = "Relationship between PMS7003 and Oficial-1hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model, digits = 11)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 4.87 0.161 30.2 0 4.56 5.19
## 2 PMS7003 0.832 0.00887 93.8 0 0.815 0.850
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial PMS7003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 23 26.7 27.1 -4.10
## 2 2 18 19.5 21.1 -3.10
## 3 3 26 25.6 26.2 -0.18
## 4 4 24 27.3 27.6 -3.60
## 5 5 30 32.8 32.2 -2.17
## 6 6 38 36.4 35.2 2.83
## 7 7 53 44.2 41.7 11.3
## 8 8 54 47.9 44.7 9.26
## 9 9 57 54.9 50.6 6.43
## 10 10 60 60.5 55.2 4.77
## # ... with 2,203 more rows
# **ESTACION TUNAL -2horas VS CANAIRIOS**
# **Prueba con los valores de la estacion Tunal retrasada 2 horas
# **5 sensores diferentes: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
# **Comparaciones con la estación oficial*
df <- read_excel("C:/Mediciones/TUNAL_CANAIRIOS-2h.xlsx")
View(df)
glimpse(df)
## Rows: 2,213
## Columns: 8
## $ Num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha <chr> "13-11-2020 24:00", "14-11-2020 01:00", "14-11-2020 02:00...
## $ Oficial <dbl> 18, 26, 24, 30, 38, 53, 54, 57, 60, 61, 41, 26, 18, 17, 2...
## $ PMS7003 <dbl> 26.70, 19.50, 25.60, 27.30, 32.80, 36.40, 44.20, 47.90, 5...
## $ PMSA003 <dbl> 30.70, 21.60, 29.00, 31.60, 38.10, 41.70, 50.30, 54.40, 6...
## $ HPMA115S0 <dbl> 17.40, 14.00, 17.20, 18.60, 23.50, 25.10, 30.00, 32.50, 3...
## $ SPS30 <dbl> 15.90, 12.40, 15.70, 17.00, 21.00, 22.40, 27.40, 29.70, 3...
## $ SNGCJA5 <dbl> 14.40, 10.70, 13.80, 14.70, 18.50, 20.30, 24.80, 26.60, 2...
df %>%
sample_n(size = 10)
## # A tibble: 10 x 8
## Num Fecha Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1708 24-01-2021 17:00 5 2.04 2.87 0 2.32 1.38
## 2 1650 22-01-2021 07:00 53 25.1 30 0 14.7 13.7
## 3 1137 31-12-2020 22:00 23 30.3 36.4 0 18.5 16.5
## 4 1195 03-01-2021 08:00 5 2 1.83 3.67 2.2 1.43
## 5 542 07-12-2020 03:00 17 5.22 5.09 4.56 4.15 3.41
## 6 848 19-12-2020 21:00 14 24.6 27.8 15.9 14.9 12.5
## 7 238 23-11-2020 21:00 17 3.44 3.02 3.88 3.03 2.32
## 8 355 29-11-2020 08:00 6 5.99 6.15 5.03 4.36 2.93
## 9 1851 30-01-2021 16:00 9 3.57 3.55 0 3.25 2.18
## 10 377 30-11-2020 06:00 26 18.1 20.3 10.8 9.96 9.55
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 19: SNGCJA5 VS Oficial-2h
df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.97 |
6.35 |
0 |
2.08 |
4.93 |
10.3 |
38.6 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7.00 |
13.00 |
22.0 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.704
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
title = "Relationship between SNGCJA5 and Oficial-2hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 6.50 0.266 24.4 0 5.98 7.02
## 2 SNGCJA5 1.31 0.028 46.6 0 1.26 1.37
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial SNGCJA5 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 18 14.4 25.4 -7.41
## 2 2 26 10.7 20.6 5.45
## 3 3 24 13.8 24.6 -0.625
## 4 4 30 14.7 25.8 4.19
## 5 5 38 18.5 30.8 7.20
## 6 6 53 20.3 33.2 19.8
## 7 7 54 24.8 39.1 14.9
## 8 8 57 26.6 41.4 15.6
## 9 9 60 28.7 44.2 15.8
## 10 10 61 32.4 49.1 11.9
## # ... with 2,203 more rows
#Caso 20: SPS30 VS Oficial-2
df %>% select(SPS30, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
8.46 |
7.30 |
0.01 |
2.98 |
5.94 |
12 |
43.4 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0.00 |
7.00 |
13.00 |
22 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.683
ggplot(df, aes(x = SPS30, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 Oficial",
title = "Relationship between SPS30 and Oficial-2hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 6.29 0.282 22.3 0 5.74 6.84
## 2 SPS30 1.11 0.025 43.9 0 1.06 1.16
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial SPS30 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 18 15.9 23.9 -5.90
## 2 2 26 12.4 20.0 5.98
## 3 3 24 15.7 23.7 0.323
## 4 4 30 17 25.1 4.88
## 5 5 38 21 29.5 8.45
## 6 6 53 22.4 31.1 21.9
## 7 7 54 27.4 36.6 17.4
## 8 8 57 29.7 39.2 17.8
## 9 9 60 34.9 44.9 15.1
## 10 10 61 39.9 50.5 10.5
## # ... with 2,203 more rows
#Caso 21: HPMA115S0 VS Oficial-2h
df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
4.47 |
7.39 |
0 |
0 |
0 |
5.58 |
43.4 |
▇▁▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7 |
13 |
22.00 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.355
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
title = "Relationship between HPMA115S0 and Oficial-2hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 13.1 0.275 47.6 0 12.6 13.6
## 2 HPMA115S0 0.569 0.032 17.9 0 0.507 0.632
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial HPMA115S0 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 18 17.4 23.0 -5.01
## 2 2 26 14 21.1 4.92
## 3 3 24 17.2 22.9 1.10
## 4 4 30 18.6 23.7 6.30
## 5 5 38 23.5 26.5 11.5
## 6 6 53 25.1 27.4 25.6
## 7 7 54 30 30.2 23.8
## 8 8 57 32.5 31.6 25.4
## 9 9 60 37.1 34.2 25.8
## 10 10 61 42.1 37.1 23.9
## # ... with 2,203 more rows
#Caso 22: PMSA003 VS Oficial-2h
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
14.61 |
15.18 |
0 |
2.81 |
9.11 |
22.5 |
89.3 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7.00 |
13.00 |
22.0 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.691
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial-2hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 7.77 0.253 30.8 0 7.28 8.27
## 2 PMSA003 0.539 0.012 45.0 0 0.516 0.563
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 18 30.7 24.3 -6.34
## 2 2 26 21.6 19.4 6.57
## 3 3 24 29 23.4 0.581
## 4 4 30 31.6 24.8 5.18
## 5 5 38 38.1 28.3 9.67
## 6 6 53 41.7 30.3 22.7
## 7 7 54 50.3 34.9 19.1
## 8 8 57 54.4 37.1 19.9
## 9 9 60 62.3 41.4 18.6
## 10 10 61 69.8 45.4 15.6
## # ... with 2,203 more rows
#Caso 23: PMS7003 VS Oficial-2h
df %>% select(PMS7003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMS7003 |
0 |
1 |
12.96 |
12.73 |
0 |
3.2 |
8.55 |
19.3 |
75.7 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7.0 |
13.00 |
22.0 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.694
ggplot(df, aes(x = PMS7003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMS7003", y = "PM25 Oficial",
title = "Relationship between PMS7003 and Oficial-2hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model, digits = 11)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 7.28 0.259 28.1 0 6.77 7.79
## 2 PMS7003 0.646 0.0142 45.4 0 0.618 0.674
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial PMS7003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 18 26.7 24.5 -6.54
## 2 2 26 19.5 19.9 6.12
## 3 3 24 25.6 23.8 0.174
## 4 4 30 27.3 24.9 5.08
## 5 5 38 32.8 28.5 9.52
## 6 6 53 36.4 30.8 22.2
## 7 7 54 44.2 35.8 18.2
## 8 8 57 47.9 38.2 18.8
## 9 9 60 54.9 42.8 17.2
## 10 10 61 60.5 46.4 14.6
## # ... with 2,203 more rows