library(readxl)
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.1 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(moderndive)
library(skimr)
# **ESTACION KENNEDY VS CANAIRIOS**
# **5 sensores diferentes: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
# **Comparaciones entre sensores de bajo costo y la estación oficial*
df <- read_excel("C:/Mediciones/KENNEDY_CANAIRIOS.xlsx")
View(df)
glimpse(df)
## Rows: 803
## Columns: 8
## $ Num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha <chr> "10-11-2020 24:00", "11-11-2020 01:00", "11-11-2020 02:00...
## $ Oficial <dbl> 22.0, 15.0, 19.0, 17.0, 11.0, 8.0, 7.0, 9.0, 20.0, 41.0, ...
## $ PMS7003 <dbl> 6.51, 8.59, 10.40, 7.11, 4.76, 7.36, 11.40, 15.40, 15.90,...
## $ PMSA003 <dbl> 6.91, 9.69, 11.90, 7.80, 4.55, 7.64, 11.90, 18.10, 18.50,...
## $ HPMA115S0 <dbl> 4.75, 6.00, 6.64, 4.91, 3.13, 5.24, 5.74, 8.41, 7.45, 6.9...
## $ SPS30 <dbl> 4.51, 5.69, 6.51, 4.71, 3.27, 4.75, 6.02, 8.81, 8.69, 8.0...
## $ SNGCJA5 <dbl> 3.31, 4.46, 5.20, 3.45, 2.24, 3.84, 4.91, 7.37, 7.11, 6.5...
df %>%
sample_n(size = 10)
## # A tibble: 10 x 8
## Num Fecha Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 483 01-12-2020 02:00 17 12 13.7 7.96 6.95 5.55
## 2 400 27-11-2020 15:00 20 17.8 22.5 10.7 10.8 9.15
## 3 808 14-12-2020 15:00 8 12.5 15 8.51 7.42 6.18
## 4 298 23-11-2020 09:00 47 15.7 18.6 10.8 9.37 8.13
## 5 591 05-12-2020 14:00 13 2.65 2.02 4.42 2.21 1.44
## 6 73 13-11-2020 24:00 32 27.8 33.5 17.3 14.9 13.3
## 7 26 12-11-2020 01:00 38 26.5 32.1 13.4 13.8 12.2
## 8 166 17-11-2020 21:00 17 13.4 15.6 8.94 7.62 6.42
## 9 72 13-11-2020 23:00 30 22.7 27.3 14.5 12.2 10.7
## 10 528 02-12-2020 23:00 33 17.8 20.5 13.1 9.7 8.46
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 1: SNGCJA5 VS SPS30
df %>% select(SNGCJA5, SPS30) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
9.64 |
6.51 |
0.43 |
5.04 |
8.36 |
12.70 |
54.8 |
▇▃▁▁▁ |
| SPS30 |
0 |
1 |
11.28 |
7.11 |
1.35 |
6.25 |
9.89 |
14.65 |
62.7 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ SPS30)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.998
ggplot(df, aes(x = SNGCJA5, y = SPS30)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 SPS30",
title = "Relationship between SNGCJA5 and SPS30") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(SPS30 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.772 0.029 26.7 0 0.715 0.829
## 2 SNGCJA5 1.09 0.002 439. 0 1.08 1.09
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID SPS30 SNGCJA5 SPS30_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 4.51 3.31 4.38 0.132
## 2 2 5.69 4.46 5.63 0.06
## 3 3 6.51 5.2 6.44 0.074
## 4 4 4.71 3.45 4.53 0.18
## 5 5 3.27 2.24 3.21 0.058
## 6 6 4.75 3.84 4.96 -0.205
## 7 7 6.02 4.91 6.12 -0.101
## 8 8 8.81 7.37 8.8 0.01
## 9 9 8.69 7.11 8.52 0.173
## 10 10 8.06 6.57 7.93 0.131
## # ... with 793 more rows
#Caso 2: SNGCJA5 VS HPMA115S0
df %>% select(SNGCJA5, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
9.64 |
6.51 |
0.43 |
5.04 |
8.36 |
12.7 |
54.8 |
▇▃▁▁▁ |
| HPMA115S0 |
0 |
1 |
12.43 |
7.54 |
2.25 |
7.26 |
10.70 |
15.4 |
64.6 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.968
ggplot(df, aes(x = SNGCJA5, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 HPMA115S0",
title = "Relationship between SNGCJA5 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 1.63 0.119 13.7 0 1.39 1.86
## 2 SNGCJA5 1.12 0.01 109. 0 1.1 1.14
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID HPMA115S0 SNGCJA5 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 4.75 3.31 5.34 -0.586
## 2 2 6 4.46 6.62 -0.624
## 3 3 6.64 5.2 7.45 -0.813
## 4 4 4.91 3.45 5.49 -0.582
## 5 5 3.13 2.24 4.14 -1.01
## 6 6 5.24 3.84 5.93 -0.689
## 7 7 5.74 4.91 7.13 -1.39
## 8 8 8.41 7.37 9.88 -1.48
## 9 9 7.45 7.11 9.59 -2.14
## 10 10 6.96 6.57 8.99 -2.03
## # ... with 793 more rows
#Caso 3: SNGCJA5 VS PMSA003
df %>% select(SNGCJA5, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
9.64 |
6.51 |
0.43 |
5.04 |
8.36 |
12.7 |
54.8 |
▇▃▁▁▁ |
| PMSA003 |
0 |
1 |
24.25 |
17.10 |
0.84 |
11.70 |
20.90 |
32.9 |
138.0 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.997
ggplot(df, aes(x = SNGCJA5, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMSA003",
title = "Relationship between SNGCJA5 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.997 0.089 -11.2 0 -1.17 -0.822
## 2 SNGCJA5 2.62 0.008 342. 0 2.60 2.63
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID PMSA003 SNGCJA5 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 6.91 3.31 7.67 -0.756
## 2 2 9.69 4.46 10.7 -0.986
## 3 3 11.9 5.2 12.6 -0.713
## 4 4 7.8 3.45 8.03 -0.232
## 5 5 4.55 2.24 4.87 -0.316
## 6 6 7.64 3.84 9.05 -1.41
## 7 7 11.9 4.91 11.9 0.046
## 8 8 18.1 7.37 18.3 -0.192
## 9 9 18.5 7.11 17.6 0.888
## 10 10 16.8 6.57 16.2 0.602
## # ... with 793 more rows
#Caso 4: SNGCJA5 VS PMS7003
df %>% select(SNGCJA5, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
9.64 |
6.51 |
0.43 |
5.04 |
8.36 |
12.7 |
54.8 |
▇▃▁▁▁ |
| PMS7003 |
0 |
1 |
20.51 |
14.00 |
1.00 |
10.50 |
17.70 |
27.2 |
116.0 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.995
ggplot(df, aes(x = SNGCJA5, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMS7003",
title = "Relationship between SNGCJA5 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.116 0.089 -1.30 0.193 -0.291 0.059
## 2 SNGCJA5 2.14 0.008 279. 0 2.12 2.15
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID PMS7003 SNGCJA5 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 6.51 3.31 6.96 -0.453
## 2 2 8.59 4.46 9.42 -0.832
## 3 3 10.4 5.2 11.0 -0.605
## 4 4 7.11 3.45 7.26 -0.152
## 5 5 4.76 2.24 4.68 0.085
## 6 6 7.36 3.84 8.10 -0.737
## 7 7 11.4 4.91 10.4 1.01
## 8 8 15.4 7.37 15.6 -0.246
## 9 9 15.9 7.11 15.1 0.81
## 10 10 14.2 6.57 13.9 0.265
## # ... with 793 more rows
#Caso 5: SNGCJA5 VS Oficial
df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
9.64 |
6.51 |
0.43 |
5.04 |
8.36 |
12.7 |
54.8 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.14 |
14.37 |
0.00 |
16.00 |
22.00 |
32.0 |
124.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.519
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
title = "Relationship between SNGCJA5 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 14.1 0.775 18.2 0 12.6 15.6
## 2 SNGCJA5 1.15 0.067 17.2 0 1.01 1.28
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial SNGCJA5 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 22 3.31 17.9 4.11
## 2 2 15 4.46 19.2 -4.20
## 3 3 19 5.2 20.1 -1.05
## 4 4 17 3.45 18.0 -1.05
## 5 5 11 2.24 16.7 -5.66
## 6 6 8 3.84 18.5 -10.5
## 7 7 7 4.91 19.7 -12.7
## 8 8 9 7.37 22.5 -13.5
## 9 9 20 7.11 22.2 -2.24
## 10 10 41 6.57 21.6 19.4
## # ... with 793 more rows
#Caso 6: SPS30 VS HPMA115S0
df %>% select(SPS30, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
11.28 |
7.11 |
1.35 |
6.25 |
9.89 |
14.65 |
62.7 |
▇▃▁▁▁ |
| HPMA115S0 |
0 |
1 |
12.43 |
7.54 |
2.25 |
7.26 |
10.70 |
15.40 |
64.6 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.957
ggplot(df, aes(x = SPS30, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 HPMA115S0",
title = "Relationship between SPS30 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.992 0.145 6.84 0 0.707 1.28
## 2 SPS30 1.01 0.011 93.2 0 0.993 1.04
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID HPMA115S0 SPS30 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 4.75 4.51 5.57 -0.817
## 2 2 6 5.69 6.76 -0.764
## 3 3 6.64 6.51 7.60 -0.956
## 4 4 4.91 4.71 5.77 -0.86
## 5 5 3.13 3.27 4.31 -1.18
## 6 6 5.24 4.75 5.81 -0.571
## 7 7 5.74 6.02 7.10 -1.36
## 8 8 8.41 8.81 9.93 -1.52
## 9 9 7.45 8.69 9.81 -2.36
## 10 10 6.96 8.06 9.17 -2.21
## # ... with 793 more rows
#Caso 7: SPS30 VS PMSA003
df %>% select(SPS30, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
11.28 |
7.11 |
1.35 |
6.25 |
9.89 |
14.65 |
62.7 |
▇▃▁▁▁ |
| PMSA003 |
0 |
1 |
24.25 |
17.10 |
0.84 |
11.70 |
20.90 |
32.90 |
138.0 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.996
ggplot(df, aes(x = SPS30, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMSA003",
title = "Relationship between SPS30 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -2.79 0.097 -28.8 0 -2.98 -2.60
## 2 SPS30 2.40 0.007 330. 0 2.38 2.41
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID PMSA003 SPS30 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 6.91 4.51 8.02 -1.11
## 2 2 9.69 5.69 10.8 -1.16
## 3 3 11.9 6.51 12.8 -0.916
## 4 4 7.8 4.71 8.50 -0.701
## 5 5 4.55 3.27 5.05 -0.499
## 6 6 7.64 4.75 8.60 -0.957
## 7 7 11.9 6.02 11.6 0.259
## 8 8 18.1 8.81 18.3 -0.229
## 9 9 18.5 8.69 18.0 0.458
## 10 10 16.8 8.06 16.5 0.269
## # ... with 793 more rows
#Caso 8: SPS30 VS PMS7003
df %>% select(SPS30, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
11.28 |
7.11 |
1.35 |
6.25 |
9.89 |
14.65 |
62.7 |
▇▃▁▁▁ |
| PMS7003 |
0 |
1 |
20.51 |
14.00 |
1.00 |
10.50 |
17.70 |
27.20 |
116.0 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.992
ggplot(df, aes(x = SPS30, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMS7003",
title = "Relationship between SPS30 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -1.53 0.114 -13.4 0 -1.75 -1.30
## 2 SPS30 1.95 0.009 228. 0 1.94 1.97
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID PMS7003 SPS30 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 6.51 4.51 7.28 -0.774
## 2 2 8.59 5.69 9.59 -1
## 3 3 10.4 6.51 11.2 -0.793
## 4 4 7.11 4.71 7.68 -0.565
## 5 5 4.76 3.27 4.86 -0.101
## 6 6 7.36 4.75 7.75 -0.393
## 7 7 11.4 6.02 10.2 1.16
## 8 8 15.4 8.81 15.7 -0.287
## 9 9 15.9 8.69 15.5 0.447
## 10 10 14.2 8.06 14.2 -0.022
## # ... with 793 more rows
#Caso 9: SPS30 VS Oficial
df %>% select(SPS30, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
11.28 |
7.11 |
1.35 |
6.25 |
9.89 |
14.65 |
62.7 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.14 |
14.37 |
0.00 |
16.00 |
22.00 |
32.00 |
124.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.509
ggplot(df, aes(x = SPS30, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 Oficial",
title = "Relationship between SPS30 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 13.5 0.819 16.5 0 11.9 15.2
## 2 SPS30 1.03 0.061 16.7 0 0.908 1.15
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial SPS30 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 22 4.51 18.2 3.82
## 2 2 15 5.69 19.4 -4.40
## 3 3 19 6.51 20.2 -1.24
## 4 4 17 4.71 18.4 -1.39
## 5 5 11 3.27 16.9 -5.91
## 6 6 8 4.75 18.4 -10.4
## 7 7 7 6.02 19.7 -12.7
## 8 8 9 8.81 22.6 -13.6
## 9 9 20 8.69 22.5 -2.48
## 10 10 41 8.06 21.8 19.2
## # ... with 793 more rows
#Caso 10: HPMA115S0 VS PMSA003
df %>% select(HPMA115S0, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
12.43 |
7.54 |
2.25 |
7.26 |
10.7 |
15.4 |
64.6 |
▇▂▁▁▁ |
| PMSA003 |
0 |
1 |
24.25 |
17.10 |
0.84 |
11.70 |
20.9 |
32.9 |
138.0 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.957
ggplot(df, aes(x = HPMA115S0, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMSA003",
title = "Relationship between HPMA115S0 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -2.76 0.337 -8.2 0 -3.43 -2.10
## 2 HPMA115S0 2.17 0.023 93.7 0 2.13 2.22
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID PMSA003 HPMA115S0 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 6.91 4.75 7.56 -0.645
## 2 2 9.69 6 10.3 -0.580
## 3 3 11.9 6.64 11.7 0.239
## 4 4 7.8 4.91 7.90 -0.102
## 5 5 4.55 3.13 4.04 0.514
## 6 6 7.64 5.24 8.62 -0.979
## 7 7 11.9 5.74 9.70 2.19
## 8 8 18.1 8.41 15.5 2.59
## 9 9 18.5 7.45 13.4 5.08
## 10 10 16.8 6.96 12.4 4.44
## # ... with 793 more rows
#Caso 11: HPMA115S0 VS PMS7003
df %>% select(HPMA115S0, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
12.43 |
7.54 |
2.25 |
7.26 |
10.7 |
15.4 |
64.6 |
▇▂▁▁▁ |
| PMS7003 |
0 |
1 |
20.51 |
14.00 |
1.00 |
10.50 |
17.7 |
27.2 |
116.0 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.963
ggplot(df, aes(x = HPMA115S0, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMS7003",
title = "Relationship between HPMA115S0 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -1.72 0.258 -6.68 0 -2.23 -1.22
## 2 HPMA115S0 1.79 0.018 101. 0 1.75 1.82
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID PMS7003 HPMA115S0 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 6.51 4.75 6.77 -0.261
## 2 2 8.59 6 9.01 -0.416
## 3 3 10.4 6.64 10.2 0.249
## 4 4 7.11 4.91 7.06 0.053
## 5 5 4.76 3.13 3.87 0.886
## 6 6 7.36 5.24 7.65 -0.287
## 7 7 11.4 5.74 8.54 2.86
## 8 8 15.4 8.41 13.3 2.08
## 9 9 15.9 7.45 11.6 4.30
## 10 10 14.2 6.96 10.7 3.48
## # ... with 793 more rows
#Caso 10: HPMA115S0 VS Oficial
df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
12.43 |
7.54 |
2.25 |
7.26 |
10.7 |
15.4 |
64.6 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
25.14 |
14.37 |
0.00 |
16.00 |
22.0 |
32.0 |
124.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.514
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
title = "Relationship between HPMA115S0 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 13.0 0.84 15.4 0 11.3 14.6
## 2 HPMA115S0 0.98 0.058 17.0 0 0.867 1.09
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial HPMA115S0 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 22 4.75 17.6 4.39
## 2 2 15 6 18.8 -3.84
## 3 3 19 6.64 19.5 -0.463
## 4 4 17 4.91 17.8 -0.767
## 5 5 11 3.13 16.0 -5.02
## 6 6 8 5.24 18.1 -10.1
## 7 7 7 5.74 18.6 -11.6
## 8 8 9 8.41 21.2 -12.2
## 9 9 20 7.45 20.3 -0.257
## 10 10 41 6.96 19.8 21.2
## # ... with 793 more rows
#Caso 11: PMSA003 VS PMS7003
df %>% select(PMSA003, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
24.25 |
17.1 |
0.84 |
11.7 |
20.9 |
32.9 |
138 |
▇▃▁▁▁ |
| PMS7003 |
0 |
1 |
20.51 |
14.0 |
1.00 |
10.5 |
17.7 |
27.2 |
116 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.998
ggplot(df, aes(x = PMSA003, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 PMS7003",
title = "Relationship between PMSA003 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.705 0.054 13.0 0 0.599 0.811
## 2 PMSA003 0.817 0.002 448. 0 0.813 0.82
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID PMS7003 PMSA003 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 6.51 6.91 6.35 0.16
## 2 2 8.59 9.69 8.62 -0.031
## 3 3 10.4 11.9 10.4 -0.026
## 4 4 7.11 7.8 7.08 0.033
## 5 5 4.76 4.55 4.42 0.338
## 6 6 7.36 7.64 6.95 0.414
## 7 7 11.4 11.9 10.4 0.974
## 8 8 15.4 18.1 15.5 -0.091
## 9 9 15.9 18.5 15.8 0.083
## 10 10 14.2 16.8 14.4 -0.229
## # ... with 793 more rows
#Caso 12: PMSA003 VS Oficial
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
24.25 |
17.10 |
0.84 |
11.7 |
20.9 |
32.9 |
138 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.14 |
14.37 |
0.00 |
16.0 |
22.0 |
32.0 |
124 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.520
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 14.6 0.752 19.4 0 13.1 16.0
## 2 PMSA003 0.436 0.025 17.2 0 0.387 0.486
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 22 6.91 17.6 4.42
## 2 2 15 9.69 18.8 -3.79
## 3 3 19 11.9 19.8 -0.754
## 4 4 17 7.8 18.0 -0.964
## 5 5 11 4.55 16.5 -5.55
## 6 6 8 7.64 17.9 -9.90
## 7 7 7 11.9 19.8 -12.8
## 8 8 9 18.1 22.5 -13.5
## 9 9 20 18.5 22.6 -2.64
## 10 10 41 16.8 21.9 19.1
## # ... with 793 more rows
#Caso 13: PMS7003 VS Oficial
df %>% select(PMS7003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMS7003 |
0 |
1 |
20.51 |
14.00 |
1 |
10.5 |
17.7 |
27.2 |
116 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.14 |
14.37 |
0 |
16.0 |
22.0 |
32.0 |
124 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.525
ggplot(df, aes(x = PMS7003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMS7003", y = "PM25 Oficial",
title = "Relationship between PMS7003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model, digits = 11)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 14.1 0.766 18.4 0 12.6 15.6
## 2 PMS7003 0.539 0.0309 17.4 0 0.478 0.599
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial PMS7003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 22 6.51 17.6 4.40
## 2 2 15 8.59 18.7 -3.72
## 3 3 19 10.4 19.7 -0.697
## 4 4 17 7.11 17.9 -0.925
## 5 5 11 4.76 16.7 -5.66
## 6 6 8 7.36 18.1 -10.1
## 7 7 7 11.4 20.2 -13.2
## 8 8 9 15.4 22.4 -13.4
## 9 9 20 15.9 22.7 -2.66
## 10 10 41 14.2 21.7 19.3
## # ... with 793 more rows
# **ESTACION KENNEDY -1hora VS CANAIRIOS**
# **Prueba con los valores de la estacion Kennedy retrasada 1 hora
# **5 sensores diferentes: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
# **Comparaciones con la estación oficial*
df <- read_excel("C:/Mediciones/KENNEDY_CANAIRIOS_1h.xlsx")
View(df)
glimpse(df)
## Rows: 803
## Columns: 8
## $ Num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha <chr> "10-11-2020 24:00", "11-11-2020 01:00", "11-11-2020 02:00...
## $ Oficial <dbl> 15.0, 19.0, 17.0, 11.0, 8.0, 7.0, 9.0, 20.0, 41.0, 24.0, ...
## $ PMS7003 <dbl> 6.51, 8.59, 10.40, 7.11, 4.76, 7.36, 11.40, 15.40, 15.90,...
## $ PMSA003 <dbl> 6.91, 9.69, 11.90, 7.80, 4.55, 7.64, 11.90, 18.10, 18.50,...
## $ HPMA115S0 <dbl> 4.75, 6.00, 6.64, 4.91, 3.13, 5.24, 5.74, 8.41, 7.45, 6.9...
## $ SPS30 <dbl> 4.51, 5.69, 6.51, 4.71, 3.27, 4.75, 6.02, 8.81, 8.69, 8.0...
## $ SNGCJA5 <dbl> 3.31, 4.46, 5.20, 3.45, 2.24, 3.84, 4.91, 7.37, 7.11, 6.5...
df %>%
sample_n(size = 10)
## # A tibble: 10 x 8
## Num Fecha Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 162 17-11-2020 17:00 10 5.53 5.55 5.4 3.72 2.81
## 2 663 08-12-2020 14:00 19 32.8 40.8 16.5 18.1 15.6
## 3 137 16-11-2020 16:00 18 20.4 26.2 10.1 12.3 9.95
## 4 780 13-12-2020 11:00 12 2.21 1.79 3.04 2.07 1.11
## 5 248 21-11-2020 07:00 63 29.3 31.9 16.3 12.6 12.1
## 6 552 03-12-2020 23:00 19 34.6 41.8 16.8 18.5 15.6
## 7 63 13-11-2020 14:00 18 12.2 14.6 7.83 7.54 6.06
## 8 19 11-11-2020 18:00 8 24.8 28.5 12.9 11.9 10.6
## 9 207 19-11-2020 14:00 21 15.7 18.8 9.45 9.2 7.57
## 10 306 23-11-2020 17:00 13 30.1 36.6 15.3 15.7 13.3
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 14: SNGCJA5 VS Oficial-1h
df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
9.64 |
6.51 |
0.43 |
5.04 |
8.36 |
12.7 |
54.8 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.16 |
14.37 |
0.00 |
16.00 |
22.00 |
32.0 |
124.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.646
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
title = "Relationship between SNGCJA5 and Oficial-1hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 11.4 0.692 16.5 0 10.0 12.8
## 2 SNGCJA5 1.43 0.059 24.0 0 1.31 1.54
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial SNGCJA5 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 15 3.31 16.1 -1.12
## 2 2 19 4.46 17.8 1.24
## 3 3 17 5.2 18.8 -1.82
## 4 4 11 3.45 16.3 -5.32
## 5 5 8 2.24 14.6 -6.60
## 6 6 7 3.84 16.9 -9.88
## 7 7 9 4.91 18.4 -9.40
## 8 8 20 7.37 21.9 -1.91
## 9 9 41 7.11 21.5 19.5
## 10 10 24 6.57 20.8 3.23
## # ... with 793 more rows
#Caso 15: SPS30 VS Oficial-1
df %>% select(SPS30, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
11.28 |
7.11 |
1.35 |
6.25 |
9.89 |
14.65 |
62.7 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.16 |
14.37 |
0.00 |
16.00 |
22.00 |
32.00 |
124.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.629
ggplot(df, aes(x = SPS30, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 Oficial",
title = "Relationship between SPS30 and Oficial-1hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 10.8 0.74 14.6 0 9.36 12.3
## 2 SPS30 1.27 0.056 22.9 0 1.16 1.38
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial SPS30 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 15 4.51 16.5 -1.55
## 2 2 19 5.69 18.0 0.953
## 3 3 17 6.51 19.1 -2.09
## 4 4 11 4.71 16.8 -5.80
## 5 5 8 3.27 15.0 -6.97
## 6 6 7 4.75 16.9 -9.85
## 7 7 9 6.02 18.5 -9.47
## 8 8 20 8.81 22.0 -2.02
## 9 9 41 8.69 21.9 19.1
## 10 10 24 8.06 21.1 2.94
## # ... with 793 more rows
#Caso 16: HPMA115S0 VS Oficial-1h
df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
12.43 |
7.54 |
2.25 |
7.26 |
10.7 |
15.4 |
64.6 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
25.16 |
14.37 |
0.00 |
16.00 |
22.0 |
32.0 |
124.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.654
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
title = "Relationship between HPMA115S0 and Oficial-1hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 9.65 0.741 13.0 0 8.20 11.1
## 2 HPMA115S0 1.25 0.051 24.5 0 1.15 1.35
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial HPMA115S0 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 15 4.75 15.6 -0.575
## 2 2 19 6 17.1 1.87
## 3 3 17 6.64 17.9 -0.931
## 4 4 11 4.91 15.8 -4.77
## 5 5 8 3.13 13.6 -5.56
## 6 6 7 5.24 16.2 -9.19
## 7 7 9 5.74 16.8 -7.81
## 8 8 20 8.41 20.1 -0.139
## 9 9 41 7.45 18.9 22.1
## 10 10 24 6.96 18.3 5.67
## # ... with 793 more rows
#Caso 17: PMSA003 VS Oficial-1h
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
24.25 |
17.10 |
0.84 |
11.7 |
20.9 |
32.9 |
138 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.16 |
14.37 |
0.00 |
16.0 |
22.0 |
32.0 |
124 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.647
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial-1hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 12.0 0.671 17.8 0 10.6 13.3
## 2 PMSA003 0.544 0.023 24.0 0 0.5 0.588
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 15 6.91 15.7 -0.725
## 2 2 19 9.69 17.2 1.76
## 3 3 17 11.9 18.4 -1.44
## 4 4 11 7.8 16.2 -5.21
## 5 5 8 4.55 14.4 -6.44
## 6 6 7 7.64 16.1 -9.12
## 7 7 9 11.9 18.4 -9.44
## 8 8 20 18.1 21.8 -1.81
## 9 9 41 18.5 22.0 19.0
## 10 10 24 16.8 21.1 2.90
## # ... with 793 more rows
#Caso 18: PMS7003 VS Oficial-1h
df %>% select(PMS7003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMS7003 |
0 |
1 |
20.51 |
14.00 |
1 |
10.5 |
17.7 |
27.2 |
116 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.16 |
14.37 |
0 |
16.0 |
22.0 |
32.0 |
124 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.657
ggplot(df, aes(x = PMS7003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMS7003", y = "PM25 Oficial",
title = "Relationship between PMS7003 and Oficial-1hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model, digits = 11)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 11.3 0.679 16.7 0 9.99 12.7
## 2 PMS7003 0.674 0.0273 24.7 0 0.621 0.728
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial PMS7003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 15 6.51 15.7 -0.712
## 2 2 19 8.59 17.1 1.88
## 3 3 17 10.4 18.3 -1.34
## 4 4 11 7.11 16.1 -5.12
## 5 5 8 4.76 14.5 -6.53
## 6 6 7 7.36 16.3 -9.28
## 7 7 9 11.4 19.0 -10.0
## 8 8 20 15.4 21.7 -1.71
## 9 9 41 15.9 22.0 19.0
## 10 10 24 14.2 20.9 3.10
## # ... with 793 more rows
# **ESTACION KENNEDY -2horas VS CANAIRIOS**
# **Prueba con los valores de la estacion Kennedy retrasada 2 horas
# **5 sensores diferentes: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
# **Comparaciones con la estación oficial*
df <- read_excel("C:/Mediciones/KENNEDY_CANAIRIOS_2h.xlsx")
View(df)
glimpse(df)
## Rows: 803
## Columns: 8
## $ Num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha <chr> "10-11-2020 24:00", "11-11-2020 01:00", "11-11-2020 02:00...
## $ Oficial <dbl> 19.0, 17.0, 11.0, 8.0, 7.0, 9.0, 20.0, 41.0, 24.0, 23.0, ...
## $ PMS7003 <dbl> 6.51, 8.59, 10.40, 7.11, 4.76, 7.36, 11.40, 15.40, 15.90,...
## $ PMSA003 <dbl> 6.91, 9.69, 11.90, 7.80, 4.55, 7.64, 11.90, 18.10, 18.50,...
## $ HPMA115S0 <dbl> 4.75, 6.00, 6.64, 4.91, 3.13, 5.24, 5.74, 8.41, 7.45, 6.9...
## $ SPS30 <dbl> 4.51, 5.69, 6.51, 4.71, 3.27, 4.75, 6.02, 8.81, 8.69, 8.0...
## $ SNGCJA5 <dbl> 3.31, 4.46, 5.20, 3.45, 2.24, 3.84, 4.91, 7.37, 7.11, 6.5...
df %>%
sample_n(size = 10)
## # A tibble: 10 x 8
## Num Fecha Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 325 24-11-2020 12:00 21 10.3 11.9 7.88 6.47 5.05
## 2 301 23-11-2020 12:00 43 47.1 59 24.3 26.9 22.9
## 3 338 25-11-2020 01:00 25 17.1 19.8 9.98 9.3 7.66
## 4 507 02-12-2020 02:00 13 4.36 4.13 4.34 3.05 2.13
## 5 518 02-12-2020 13:00 10 1.97 1.47 3.14 1.76 0.93
## 6 273 22-11-2020 08:00 36 29.2 35.3 16.3 15.8 13.6
## 7 576 04-12-2020 23:00 24 24.4 28.7 16.3 12.6 11.2
## 8 66 13-11-2020 17:00 20 8.34 7.84 4.86 4.39 3.39
## 9 304 23-11-2020 15:00 14 27.4 33.7 14.1 14.8 12.4
## 10 390 27-11-2020 05:00 27 16.3 19.8 9.78 9.71 7.66
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 19: SNGCJA5 VS Oficial-2h
df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
9.64 |
6.51 |
0.43 |
5.04 |
8.36 |
12.7 |
54.8 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.18 |
14.37 |
0.00 |
16.00 |
22.00 |
32.5 |
124.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.786
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
title = "Relationship between SNGCJA5 and Oficial-2hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 8.45 0.561 15.1 0 7.35 9.55
## 2 SNGCJA5 1.74 0.048 36.0 0 1.64 1.83
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial SNGCJA5 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 19 3.31 14.2 4.81
## 2 2 17 4.46 16.2 0.811
## 3 3 11 5.2 17.5 -6.47
## 4 4 8 3.45 14.4 -6.44
## 5 5 7 2.24 12.3 -5.34
## 6 6 9 3.84 15.1 -6.11
## 7 7 20 4.91 17.0 3.03
## 8 8 41 7.37 21.2 19.8
## 9 9 24 7.11 20.8 3.21
## 10 10 23 6.57 19.8 3.15
## # ... with 793 more rows
#Caso 20: SPS30 VS Oficial-2
df %>% select(SPS30, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
11.28 |
7.11 |
1.35 |
6.25 |
9.89 |
14.65 |
62.7 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.18 |
14.37 |
0.00 |
16.00 |
22.00 |
32.50 |
124.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.766
ggplot(df, aes(x = SPS30, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 Oficial",
title = "Relationship between SPS30 and Oficial-2hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 7.71 0.612 12.6 0 6.51 8.91
## 2 SPS30 1.55 0.046 33.8 0 1.46 1.64
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial SPS30 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 19 4.51 14.7 4.30
## 2 2 17 5.69 16.5 0.475
## 3 3 11 6.51 17.8 -6.80
## 4 4 8 4.71 15.0 -7.01
## 5 5 7 3.27 12.8 -5.78
## 6 6 9 4.75 15.1 -6.07
## 7 7 20 6.02 17.0 2.96
## 8 8 41 8.81 21.4 19.6
## 9 9 24 8.69 21.2 2.83
## 10 10 23 8.06 20.2 2.80
## # ... with 793 more rows
#Caso 21: HPMA115S0 VS Oficial-2h
df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
12.43 |
7.54 |
2.25 |
7.26 |
10.7 |
15.4 |
64.6 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
25.18 |
14.37 |
0.00 |
16.00 |
22.0 |
32.5 |
124.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.813
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
title = "Relationship between HPMA115S0 and Oficial-2hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 5.91 0.571 10.4 0 4.79 7.03
## 2 HPMA115S0 1.55 0.039 39.5 0 1.47 1.63
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial HPMA115S0 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 19 4.75 13.3 5.72
## 2 2 17 6 15.2 1.79
## 3 3 11 6.64 16.2 -5.20
## 4 4 8 4.91 13.5 -5.52
## 5 5 7 3.13 10.8 -3.76
## 6 6 9 5.24 14.0 -5.04
## 7 7 20 5.74 14.8 5.19
## 8 8 41 8.41 18.9 22.1
## 9 9 24 7.45 17.5 6.54
## 10 10 23 6.96 16.7 6.30
## # ... with 793 more rows
#Caso 22: PMSA003 VS Oficial-2h
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
24.25 |
17.10 |
0.84 |
11.7 |
20.9 |
32.9 |
138 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.18 |
14.37 |
0.00 |
16.0 |
22.0 |
32.5 |
124 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.785
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial-2hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 9.19 0.546 16.8 0 8.12 10.3
## 2 PMSA003 0.66 0.018 35.9 0 0.624 0.696
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 19 6.91 13.8 5.25
## 2 2 17 9.69 15.6 1.42
## 3 3 11 11.9 17.0 -6.04
## 4 4 8 7.8 14.3 -6.34
## 5 5 7 4.55 12.2 -5.19
## 6 6 9 7.64 14.2 -5.23
## 7 7 20 11.9 17.0 2.96
## 8 8 41 18.1 21.1 19.9
## 9 9 24 18.5 21.4 2.60
## 10 10 23 16.8 20.3 2.73
## # ... with 793 more rows
#Caso 23: PMS7003 VS Oficial-2h
df %>% select(PMS7003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMS7003 |
0 |
1 |
20.51 |
14.00 |
1 |
10.5 |
17.7 |
27.2 |
116 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.18 |
14.37 |
0 |
16.0 |
22.0 |
32.5 |
124 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.800
ggplot(df, aes(x = PMS7003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMS7003", y = "PM25 Oficial",
title = "Relationship between PMS7003 and Oficial-2hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model, digits = 11)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 8.33 0.540 15.4 0 7.27 9.40
## 2 PMS7003 0.822 0.0218 37.8 0 0.779 0.864
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial PMS7003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 19 6.51 13.7 5.32
## 2 2 17 8.59 15.4 1.61
## 3 3 11 10.4 16.9 -5.88
## 4 4 8 7.11 14.2 -6.18
## 5 5 7 4.76 12.2 -5.24
## 6 6 9 7.36 14.4 -5.38
## 7 7 20 11.4 17.7 2.3
## 8 8 41 15.4 21.0 20.0
## 9 9 24 15.9 21.4 2.60
## 10 10 23 14.2 20.0 3.00
## # ... with 793 more rows