library(readxl)
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.1 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(moderndive)
library(skimr)
# **2 hours late station ESTACION TUNAL VS CANAIRIOS**
# **5 different sensors: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
df <- read_excel("C:/Mediciones/TUNAL_ESTACION_CANAIRIOS_2h.xlsx")
View(df)
glimpse(df)
## Rows: 2,213
## Columns: 8
## $ Num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha <chr> "13-11-2020 24:00", "14-11-2020 01:00", "14-11-2020 02:00...
## $ Oficial <dbl> 18, 26, 24, 30, 38, 53, 54, 57, 60, 61, 41, 26, 18, 17, 2...
## $ PMS7003 <dbl> 26.70, 19.50, 25.60, 27.30, 32.80, 36.40, 44.20, 47.90, 5...
## $ PMSA003 <dbl> 30.70, 21.60, 29.00, 31.60, 38.10, 41.70, 50.30, 54.40, 6...
## $ HPMA115S0 <dbl> 17.40, 14.00, 17.20, 18.60, 23.50, 25.10, 30.00, 32.50, 3...
## $ SPS30 <dbl> 15.90, 12.40, 15.70, 17.00, 21.00, 22.40, 27.40, 29.70, 3...
## $ SNGCJA5 <dbl> 14.40, 10.70, 13.80, 14.70, 18.50, 20.30, 24.80, 26.60, 2...
df %>%
sample_n(size = 10)
## # A tibble: 10 x 8
## Num Fecha Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1597 20-01-2021 02:00 8 2.39 1.82 0 2.55 1.98
## 2 257 25-11-2020 06:00 12 4.18 4.03 3.89 3.19 2.51
## 3 60 16-11-2020 11:00 17 6.68 6.95 5.75 5.24 3.72
## 4 2228 15-02-2021 09:00 35 33 38.5 3.92 21.7 17.7
## 5 2089 09-02-2021 14:00 39 40.6 48.6 0 26 20.6
## 6 875 20-12-2020 24:00 7 3.64 2.75 4.05 3.12 2.29
## 7 591 09-12-2020 04:00 8 4.74 4.62 4.43 3.91 3.25
## 8 1304 07-01-2021 21:00 13 5.05 5.15 0 4.11 3.36
## 9 2248 16-02-2021 05:00 53 30.2 34.2 0 17.1 16.7
## 10 1781 27-01-2021 18:00 12 7.95 8.16 0 5.7 4.84
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 1: SNGCJA5 VS SPS30
df %>% select(SNGCJA5, SPS30) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.97 |
6.35 |
0.00 |
2.08 |
4.93 |
10.3 |
38.6 |
▇▃▁▁▁ |
| SPS30 |
0 |
1 |
8.46 |
7.30 |
0.01 |
2.98 |
5.94 |
12.0 |
43.4 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ SPS30)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.995
ggplot(df, aes(x = SNGCJA5, y = SPS30)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 SPS30",
title = "Relationship between SNGCJA5 and SPS30") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(SPS30 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.478 0.023 21.2 0 0.434 0.522
## 2 SNGCJA5 1.14 0.002 479. 0 1.14 1.15
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID SPS30 SNGCJA5 SPS30_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 15.9 14.4 17.0 -1.06
## 2 2 12.4 10.7 12.7 -0.324
## 3 3 15.7 13.8 16.3 -0.572
## 4 4 17 14.7 17.3 -0.302
## 5 5 21 18.5 21.7 -0.651
## 6 6 22.4 20.3 23.7 -1.31
## 7 7 27.4 24.8 28.9 -1.46
## 8 8 29.7 26.6 30.9 -1.22
## 9 9 34.9 28.7 33.3 1.58
## 10 10 39.9 32.4 37.6 2.34
## # ... with 2,203 more rows
#Caso 2: SNGCJA5 VS HPMA115S0
df %>% select(SNGCJA5, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.97 |
6.35 |
0 |
2.08 |
4.93 |
10.30 |
38.6 |
▇▃▁▁▁ |
| HPMA115S0 |
0 |
1 |
4.47 |
7.39 |
0 |
0.00 |
0.00 |
5.58 |
43.4 |
▇▁▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.607
ggplot(df, aes(x = SNGCJA5, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 HPMA115S0",
title = "Relationship between SNGCJA5 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.448 0.186 -2.41 0.016 -0.812 -0.084
## 2 SNGCJA5 0.706 0.02 35.9 0 0.667 0.745
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID HPMA115S0 SNGCJA5 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 17.4 14.4 9.72 7.68
## 2 2 14 10.7 7.11 6.89
## 3 3 17.2 13.8 9.30 7.90
## 4 4 18.6 14.7 9.93 8.67
## 5 5 23.5 18.5 12.6 10.9
## 6 6 25.1 20.3 13.9 11.2
## 7 7 30 24.8 17.1 12.9
## 8 8 32.5 26.6 18.3 14.2
## 9 9 37.1 28.7 19.8 17.3
## 10 10 42.1 32.4 22.4 19.7
## # ... with 2,203 more rows
#Caso 3: SNGCJA5 VS PMSA003
df %>% select(SNGCJA5, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.97 |
6.35 |
0 |
2.08 |
4.93 |
10.3 |
38.6 |
▇▃▁▁▁ |
| PMSA003 |
0 |
1 |
14.61 |
15.18 |
0 |
2.81 |
9.11 |
22.5 |
89.3 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.995
ggplot(df, aes(x = SNGCJA5, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMSA003",
title = "Relationship between SNGCJA5 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -1.97 0.048 -40.8 0 -2.07 -1.88
## 2 SNGCJA5 2.38 0.005 464. 0 2.37 2.39
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID PMSA003 SNGCJA5 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 30.7 14.4 32.3 -1.58
## 2 2 21.6 10.7 23.5 -1.88
## 3 3 29 13.8 30.8 -1.85
## 4 4 31.6 14.7 33.0 -1.39
## 5 5 38.1 18.5 42.0 -3.93
## 6 6 41.7 20.3 46.3 -4.61
## 7 7 50.3 24.8 57.0 -6.71
## 8 8 54.4 26.6 61.3 -6.89
## 9 9 62.3 28.7 66.3 -3.99
## 10 10 69.8 32.4 75.1 -5.29
## # ... with 2,203 more rows
#Caso 4: SNGCJA5 VS PMS7003
df %>% select(SNGCJA5, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.97 |
6.35 |
0 |
2.08 |
4.93 |
10.3 |
38.6 |
▇▃▁▁▁ |
| PMS7003 |
0 |
1 |
12.96 |
12.73 |
0 |
3.20 |
8.55 |
19.3 |
75.7 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.996
ggplot(df, aes(x = SNGCJA5, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMS7003",
title = "Relationship between SNGCJA5 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.96 0.036 -26.5 0 -1.03 -0.889
## 2 SNGCJA5 2.00 0.004 521. 0 1.99 2.00
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID PMS7003 SNGCJA5 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 26.7 14.4 27.8 -1.09
## 2 2 19.5 10.7 20.4 -0.901
## 3 3 25.6 13.8 26.6 -0.989
## 4 4 27.3 14.7 28.4 -1.09
## 5 5 32.8 18.5 36.0 -3.17
## 6 6 36.4 20.3 39.6 -3.16
## 7 7 44.2 24.8 48.5 -4.35
## 8 8 47.9 26.6 52.1 -4.24
## 9 9 54.9 28.7 56.3 -1.43
## 10 10 60.5 32.4 63.7 -3.22
## # ... with 2,203 more rows
#Caso 5: SNGCJA5 VS Oficial
df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.97 |
6.35 |
0 |
2.08 |
4.93 |
10.3 |
38.6 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7.00 |
13.00 |
22.0 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.704
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
title = "Relationship between SNGCJA5 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 6.50 0.266 24.4 0 5.98 7.02
## 2 SNGCJA5 1.31 0.028 46.6 0 1.26 1.37
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial SNGCJA5 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 18 14.4 25.4 -7.41
## 2 2 26 10.7 20.6 5.45
## 3 3 24 13.8 24.6 -0.625
## 4 4 30 14.7 25.8 4.19
## 5 5 38 18.5 30.8 7.20
## 6 6 53 20.3 33.2 19.8
## 7 7 54 24.8 39.1 14.9
## 8 8 57 26.6 41.4 15.6
## 9 9 60 28.7 44.2 15.8
## 10 10 61 32.4 49.1 11.9
## # ... with 2,203 more rows
#Caso 6: SPS30 VS HPMA115S0
df %>% select(SPS30, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
8.46 |
7.30 |
0.01 |
2.98 |
5.94 |
12.00 |
43.4 |
▇▃▁▁▁ |
| HPMA115S0 |
0 |
1 |
4.47 |
7.39 |
0.00 |
0.00 |
0.00 |
5.58 |
43.4 |
▇▁▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.608
ggplot(df, aes(x = SPS30, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 HPMA115S0",
title = "Relationship between SPS30 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.731 0.191 -3.83 0 -1.10 -0.356
## 2 SPS30 0.615 0.017 36.0 0 0.582 0.649
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID HPMA115S0 SPS30 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 17.4 15.9 9.06 8.35
## 2 2 14 12.4 6.90 7.10
## 3 3 17.2 15.7 8.93 8.27
## 4 4 18.6 17 9.73 8.87
## 5 5 23.5 21 12.2 11.3
## 6 6 25.1 22.4 13.1 12.0
## 7 7 30 27.4 16.1 13.9
## 8 8 32.5 29.7 17.5 15.0
## 9 9 37.1 34.9 20.7 16.4
## 10 10 42.1 39.9 23.8 18.3
## # ... with 2,203 more rows
#Caso 7: SPS30 VS PMSA003
df %>% select(SPS30, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
8.46 |
7.30 |
0.01 |
2.98 |
5.94 |
12.0 |
43.4 |
▇▃▁▁▁ |
| PMSA003 |
0 |
1 |
14.61 |
15.18 |
0.00 |
2.81 |
9.11 |
22.5 |
89.3 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.995
ggplot(df, aes(x = SPS30, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMSA003",
title = "Relationship between SPS30 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -2.88 0.05 -57.6 0 -2.98 -2.78
## 2 SPS30 2.07 0.004 462. 0 2.06 2.08
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID PMSA003 SPS30 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 30.7 15.9 30.0 0.697
## 2 2 21.6 12.4 22.8 -1.16
## 3 3 29 15.7 29.6 -0.589
## 4 4 31.6 17 32.3 -0.678
## 5 5 38.1 21 40.6 -2.45
## 6 6 41.7 22.4 43.4 -1.75
## 7 7 50.3 27.4 53.8 -3.49
## 8 8 54.4 29.7 58.5 -4.14
## 9 9 62.3 34.9 69.3 -7.00
## 10 10 69.8 39.9 79.6 -9.84
## # ... with 2,203 more rows
#Caso 8: SPS30 VS PMS7003
df %>% select(SPS30, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
8.46 |
7.30 |
0.01 |
2.98 |
5.94 |
12.0 |
43.4 |
▇▃▁▁▁ |
| PMS7003 |
0 |
1 |
12.96 |
12.73 |
0.00 |
3.20 |
8.55 |
19.3 |
75.7 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.995
ggplot(df, aes(x = SPS30, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMS7003",
title = "Relationship between SPS30 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -1.71 0.041 -41.6 0 -1.79 -1.63
## 2 SPS30 1.73 0.004 471. 0 1.73 1.74
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID PMS7003 SPS30 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 26.7 15.9 25.9 0.833
## 2 2 19.5 12.4 19.8 -0.297
## 3 3 25.6 15.7 25.5 0.08
## 4 4 27.3 17 27.8 -0.475
## 5 5 32.8 21 34.7 -1.91
## 6 6 36.4 22.4 37.1 -0.741
## 7 7 44.2 27.4 45.8 -1.61
## 8 8 47.9 29.7 49.8 -1.90
## 9 9 54.9 34.9 58.8 -3.92
## 10 10 60.5 39.9 67.5 -6.99
## # ... with 2,203 more rows
#Caso 9: SPS30 VS Oficial
df %>% select(SPS30, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
8.46 |
7.30 |
0.01 |
2.98 |
5.94 |
12 |
43.4 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0.00 |
7.00 |
13.00 |
22 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.683
ggplot(df, aes(x = SPS30, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 Oficial",
title = "Relationship between SPS30 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 6.29 0.282 22.3 0 5.74 6.84
## 2 SPS30 1.11 0.025 43.9 0 1.06 1.16
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial SPS30 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 18 15.9 23.9 -5.90
## 2 2 26 12.4 20.0 5.98
## 3 3 24 15.7 23.7 0.323
## 4 4 30 17 25.1 4.88
## 5 5 38 21 29.5 8.45
## 6 6 53 22.4 31.1 21.9
## 7 7 54 27.4 36.6 17.4
## 8 8 57 29.7 39.2 17.8
## 9 9 60 34.9 44.9 15.1
## 10 10 61 39.9 50.5 10.5
## # ... with 2,203 more rows
#Caso 10: HPMA115S0 VS PMSA003
df %>% select(HPMA115S0, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
4.47 |
7.39 |
0 |
0.00 |
0.00 |
5.58 |
43.4 |
▇▁▁▁▁ |
| PMSA003 |
0 |
1 |
14.61 |
15.18 |
0 |
2.81 |
9.11 |
22.50 |
89.3 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.602
ggplot(df, aes(x = HPMA115S0, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMSA003",
title = "Relationship between HPMA115S0 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 9.08 0.301 30.1 0 8.49 9.67
## 2 HPMA115S0 1.24 0.035 35.4 0 1.17 1.30
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID PMSA003 HPMA115S0 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 30.7 17.4 30.6 0.113
## 2 2 21.6 14 26.4 -4.78
## 3 3 29 17.2 30.3 -1.34
## 4 4 31.6 18.6 32.1 -0.47
## 5 5 38.1 23.5 38.1 -0.026
## 6 6 41.7 25.1 40.1 1.60
## 7 7 50.3 30 46.2 4.14
## 8 8 54.4 32.5 49.2 5.15
## 9 9 62.3 37.1 54.9 7.36
## 10 10 69.8 42.1 61.1 8.68
## # ... with 2,203 more rows
#Caso 11: HPMA115S0 VS PMS7003
df %>% select(HPMA115S0, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
4.47 |
7.39 |
0 |
0.0 |
0.00 |
5.58 |
43.4 |
▇▁▁▁▁ |
| PMS7003 |
0 |
1 |
12.96 |
12.73 |
0 |
3.2 |
8.55 |
19.30 |
75.7 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.615
ggplot(df, aes(x = HPMA115S0, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMS7003",
title = "Relationship between HPMA115S0 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 8.22 0.25 32.9 0 7.74 8.71
## 2 HPMA115S0 1.06 0.029 36.6 0 1.00 1.12
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID PMS7003 HPMA115S0 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 26.7 17.4 26.6 0.062
## 2 2 19.5 14 23.0 -3.54
## 3 3 25.6 17.2 26.4 -0.826
## 4 4 27.3 18.6 27.9 -0.608
## 5 5 32.8 23.5 33.1 -0.293
## 6 6 36.4 25.1 34.8 1.61
## 7 7 44.2 30 40.0 4.23
## 8 8 47.9 32.5 42.6 5.28
## 9 9 54.9 37.1 47.5 7.42
## 10 10 60.5 42.1 52.8 7.72
## # ... with 2,203 more rows
#Caso 10: HPMA115S0 VS Oficial
df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
4.47 |
7.39 |
0 |
0 |
0 |
5.58 |
43.4 |
▇▁▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7 |
13 |
22.00 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.355
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
title = "Relationship between HPMA115S0 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 13.1 0.275 47.6 0 12.6 13.6
## 2 HPMA115S0 0.569 0.032 17.9 0 0.507 0.632
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial HPMA115S0 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 18 17.4 23.0 -5.01
## 2 2 26 14 21.1 4.92
## 3 3 24 17.2 22.9 1.10
## 4 4 30 18.6 23.7 6.30
## 5 5 38 23.5 26.5 11.5
## 6 6 53 25.1 27.4 25.6
## 7 7 54 30 30.2 23.8
## 8 8 57 32.5 31.6 25.4
## 9 9 60 37.1 34.2 25.8
## 10 10 61 42.1 37.1 23.9
## # ... with 2,203 more rows
#Caso 11: PMSA003 VS PMS7003
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
14.61 |
15.18 |
0 |
2.81 |
9.11 |
22.5 |
89.3 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7.00 |
13.00 |
22.0 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.691
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 7.77 0.253 30.8 0 7.28 8.27
## 2 PMSA003 0.539 0.012 45.0 0 0.516 0.563
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 18 30.7 24.3 -6.34
## 2 2 26 21.6 19.4 6.57
## 3 3 24 29 23.4 0.581
## 4 4 30 31.6 24.8 5.18
## 5 5 38 38.1 28.3 9.67
## 6 6 53 41.7 30.3 22.7
## 7 7 54 50.3 34.9 19.1
## 8 8 57 54.4 37.1 19.9
## 9 9 60 62.3 41.4 18.6
## 10 10 61 69.8 45.4 15.6
## # ... with 2,203 more rows
#Caso 12: PMSA003 VS Oficial
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
14.61 |
15.18 |
0 |
2.81 |
9.11 |
22.5 |
89.3 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7.00 |
13.00 |
22.0 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.691
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 7.77 0.253 30.8 0 7.28 8.27
## 2 PMSA003 0.539 0.012 45.0 0 0.516 0.563
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 18 30.7 24.3 -6.34
## 2 2 26 21.6 19.4 6.57
## 3 3 24 29 23.4 0.581
## 4 4 30 31.6 24.8 5.18
## 5 5 38 38.1 28.3 9.67
## 6 6 53 41.7 30.3 22.7
## 7 7 54 50.3 34.9 19.1
## 8 8 57 54.4 37.1 19.9
## 9 9 60 62.3 41.4 18.6
## 10 10 61 69.8 45.4 15.6
## # ... with 2,203 more rows
#Caso 13: PMS7003 VS Oficial
df %>% select(PMS7003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMS7003 |
0 |
1 |
12.96 |
12.73 |
0 |
3.2 |
8.55 |
19.3 |
75.7 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7.0 |
13.00 |
22.0 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.694
ggplot(df, aes(x = PMS7003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMS7003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 7.28 0.259 28.1 0 6.77 7.79
## 2 PMS7003 0.646 0.014 45.4 0 0.618 0.674
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial PMS7003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 18 26.7 24.5 -6.54
## 2 2 26 19.5 19.9 6.12
## 3 3 24 25.6 23.8 0.174
## 4 4 30 27.3 24.9 5.08
## 5 5 38 32.8 28.5 9.52
## 6 6 53 36.4 30.8 22.2
## 7 7 54 44.2 35.8 18.2
## 8 8 57 47.9 38.2 18.8
## 9 9 60 54.9 42.8 17.2
## 10 10 61 60.5 46.4 14.6
## # ... with 2,203 more rows