library(readxl)
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.1 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(moderndive)
library(skimr)
# **1 hour late station ESTACION TUNAL VS CANAIRIOS**
# **5 different sensors: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
df <- read_excel("C:/Mediciones/TUNAL_ESTACION_CANAIRIOS_1h.xlsx")
View(df)
glimpse(df)
## Rows: 2,213
## Columns: 8
## $ Num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha <chr> "13-11-2020 24:00", "14-11-2020 01:00", "14-11-2020 02:00...
## $ Oficial <dbl> 23, 18, 26, 24, 30, 38, 53, 54, 57, 60, 61, 41, 26, 18, 1...
## $ PMS7003 <dbl> 26.70, 19.50, 25.60, 27.30, 32.80, 36.40, 44.20, 47.90, 5...
## $ PMSA003 <dbl> 30.70, 21.60, 29.00, 31.60, 38.10, 41.70, 50.30, 54.40, 6...
## $ HPMA115S0 <dbl> 17.40, 14.00, 17.20, 18.60, 23.50, 25.10, 30.00, 32.50, 3...
## $ SPS30 <dbl> 15.90, 12.40, 15.70, 17.00, 21.00, 22.40, 27.40, 29.70, 3...
## $ SNGCJA5 <dbl> 14.40, 10.70, 13.80, 14.70, 18.50, 20.30, 24.80, 26.60, 2...
df %>%
sample_n(size = 10)
## # A tibble: 10 x 8
## Num Fecha Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1390 11-01-2021 11:00 2 0.0219 0.0146 0 0.7 0.0146
## 2 276 26-11-2020 01:00 23 38.9 44.7 25 23.3 20.8
## 3 6 14-11-2020 05:00 38 36.4 41.7 25.1 22.4 20.3
## 4 983 25-12-2020 12:00 14 15.5 17.9 0 10 7.54
## 5 480 04-12-2020 13:00 18 5.91 6.36 5.35 3.59 2.71
## 6 230 23-11-2020 13:00 23 25.5 29.7 17.3 15.9 13.4
## 7 1481 15-01-2021 06:00 21 9.81 10.5 0 6.02 5.67
## 8 1905 01-02-2021 22:00 10 2.55 2.03 0 2.78 2.01
## 9 182 21-11-2020 13:00 0 0.589 0.489 1.54 1.28 0.362
## 10 1761 26-01-2021 22:00 11 6.27 6.57 0 5.07 3.97
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 1: SNGCJA5 VS SPS30
df %>% select(SNGCJA5, SPS30) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.97 |
6.35 |
0.00 |
2.08 |
4.93 |
10.3 |
38.6 |
▇▃▁▁▁ |
| SPS30 |
0 |
1 |
8.46 |
7.30 |
0.01 |
2.98 |
5.94 |
12.0 |
43.4 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ SPS30)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.995
ggplot(df, aes(x = SNGCJA5, y = SPS30)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 SPS30",
title = "Relationship between SNGCJA5 and SPS30") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(SPS30 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.478 0.023 21.2 0 0.434 0.522
## 2 SNGCJA5 1.14 0.002 479. 0 1.14 1.15
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID SPS30 SNGCJA5 SPS30_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 15.9 14.4 17.0 -1.06
## 2 2 12.4 10.7 12.7 -0.324
## 3 3 15.7 13.8 16.3 -0.572
## 4 4 17 14.7 17.3 -0.302
## 5 5 21 18.5 21.7 -0.651
## 6 6 22.4 20.3 23.7 -1.31
## 7 7 27.4 24.8 28.9 -1.46
## 8 8 29.7 26.6 30.9 -1.22
## 9 9 34.9 28.7 33.3 1.58
## 10 10 39.9 32.4 37.6 2.34
## # ... with 2,203 more rows
#Caso 2: SNGCJA5 VS HPMA115S0
df %>% select(SNGCJA5, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.97 |
6.35 |
0 |
2.08 |
4.93 |
10.30 |
38.6 |
▇▃▁▁▁ |
| HPMA115S0 |
0 |
1 |
4.47 |
7.39 |
0 |
0.00 |
0.00 |
5.58 |
43.4 |
▇▁▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.607
ggplot(df, aes(x = SNGCJA5, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 HPMA115S0",
title = "Relationship between SNGCJA5 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.448 0.186 -2.41 0.016 -0.812 -0.084
## 2 SNGCJA5 0.706 0.02 35.9 0 0.667 0.745
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID HPMA115S0 SNGCJA5 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 17.4 14.4 9.72 7.68
## 2 2 14 10.7 7.11 6.89
## 3 3 17.2 13.8 9.30 7.90
## 4 4 18.6 14.7 9.93 8.67
## 5 5 23.5 18.5 12.6 10.9
## 6 6 25.1 20.3 13.9 11.2
## 7 7 30 24.8 17.1 12.9
## 8 8 32.5 26.6 18.3 14.2
## 9 9 37.1 28.7 19.8 17.3
## 10 10 42.1 32.4 22.4 19.7
## # ... with 2,203 more rows
#Caso 3: SNGCJA5 VS PMSA003
df %>% select(SNGCJA5, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.97 |
6.35 |
0 |
2.08 |
4.93 |
10.3 |
38.6 |
▇▃▁▁▁ |
| PMSA003 |
0 |
1 |
14.61 |
15.18 |
0 |
2.81 |
9.11 |
22.5 |
89.3 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.995
ggplot(df, aes(x = SNGCJA5, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMSA003",
title = "Relationship between SNGCJA5 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -1.97 0.048 -40.8 0 -2.07 -1.88
## 2 SNGCJA5 2.38 0.005 464. 0 2.37 2.39
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID PMSA003 SNGCJA5 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 30.7 14.4 32.3 -1.58
## 2 2 21.6 10.7 23.5 -1.88
## 3 3 29 13.8 30.8 -1.85
## 4 4 31.6 14.7 33.0 -1.39
## 5 5 38.1 18.5 42.0 -3.93
## 6 6 41.7 20.3 46.3 -4.61
## 7 7 50.3 24.8 57.0 -6.71
## 8 8 54.4 26.6 61.3 -6.89
## 9 9 62.3 28.7 66.3 -3.99
## 10 10 69.8 32.4 75.1 -5.29
## # ... with 2,203 more rows
#Caso 4: SNGCJA5 VS PMS7003
df %>% select(SNGCJA5, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.97 |
6.35 |
0 |
2.08 |
4.93 |
10.3 |
38.6 |
▇▃▁▁▁ |
| PMS7003 |
0 |
1 |
12.96 |
12.73 |
0 |
3.20 |
8.55 |
19.3 |
75.7 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.996
ggplot(df, aes(x = SNGCJA5, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMS7003",
title = "Relationship between SNGCJA5 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.96 0.036 -26.5 0 -1.03 -0.889
## 2 SNGCJA5 2.00 0.004 521. 0 1.99 2.00
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID PMS7003 SNGCJA5 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 26.7 14.4 27.8 -1.09
## 2 2 19.5 10.7 20.4 -0.901
## 3 3 25.6 13.8 26.6 -0.989
## 4 4 27.3 14.7 28.4 -1.09
## 5 5 32.8 18.5 36.0 -3.17
## 6 6 36.4 20.3 39.6 -3.16
## 7 7 44.2 24.8 48.5 -4.35
## 8 8 47.9 26.6 52.1 -4.24
## 9 9 54.9 28.7 56.3 -1.43
## 10 10 60.5 32.4 63.7 -3.22
## # ... with 2,203 more rows
#Caso 5: SNGCJA5 VS Oficial
df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.97 |
6.35 |
0 |
2.08 |
4.93 |
10.3 |
38.6 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7.00 |
13.00 |
22.0 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.896
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
title = "Relationship between SNGCJA5 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 4.01 0.167 24.1 0 3.68 4.34
## 2 SNGCJA5 1.67 0.018 94.6 0 1.64 1.71
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial SNGCJA5 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 23 14.4 28.1 -5.07
## 2 2 18 10.7 21.9 -3.89
## 3 3 26 13.8 27.1 -1.07
## 4 4 24 14.7 28.6 -4.57
## 5 5 30 18.5 34.9 -4.92
## 6 6 38 20.3 37.9 0.069
## 7 7 53 24.8 45.5 7.55
## 8 8 54 26.6 48.5 5.54
## 9 9 57 28.7 52.0 5.03
## 10 10 60 32.4 58.2 1.85
## # ... with 2,203 more rows
#Caso 6: SPS30 VS HPMA115S0
df %>% select(SPS30, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
8.46 |
7.30 |
0.01 |
2.98 |
5.94 |
12.00 |
43.4 |
▇▃▁▁▁ |
| HPMA115S0 |
0 |
1 |
4.47 |
7.39 |
0.00 |
0.00 |
0.00 |
5.58 |
43.4 |
▇▁▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.608
ggplot(df, aes(x = SPS30, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 HPMA115S0",
title = "Relationship between SPS30 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.731 0.191 -3.83 0 -1.10 -0.356
## 2 SPS30 0.615 0.017 36.0 0 0.582 0.649
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID HPMA115S0 SPS30 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 17.4 15.9 9.06 8.35
## 2 2 14 12.4 6.90 7.10
## 3 3 17.2 15.7 8.93 8.27
## 4 4 18.6 17 9.73 8.87
## 5 5 23.5 21 12.2 11.3
## 6 6 25.1 22.4 13.1 12.0
## 7 7 30 27.4 16.1 13.9
## 8 8 32.5 29.7 17.5 15.0
## 9 9 37.1 34.9 20.7 16.4
## 10 10 42.1 39.9 23.8 18.3
## # ... with 2,203 more rows
#Caso 7: SPS30 VS PMSA003
df %>% select(SPS30, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
8.46 |
7.30 |
0.01 |
2.98 |
5.94 |
12.0 |
43.4 |
▇▃▁▁▁ |
| PMSA003 |
0 |
1 |
14.61 |
15.18 |
0.00 |
2.81 |
9.11 |
22.5 |
89.3 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.995
ggplot(df, aes(x = SPS30, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMSA003",
title = "Relationship between SPS30 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -2.88 0.05 -57.6 0 -2.98 -2.78
## 2 SPS30 2.07 0.004 462. 0 2.06 2.08
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID PMSA003 SPS30 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 30.7 15.9 30.0 0.697
## 2 2 21.6 12.4 22.8 -1.16
## 3 3 29 15.7 29.6 -0.589
## 4 4 31.6 17 32.3 -0.678
## 5 5 38.1 21 40.6 -2.45
## 6 6 41.7 22.4 43.4 -1.75
## 7 7 50.3 27.4 53.8 -3.49
## 8 8 54.4 29.7 58.5 -4.14
## 9 9 62.3 34.9 69.3 -7.00
## 10 10 69.8 39.9 79.6 -9.84
## # ... with 2,203 more rows
#Caso 8: SPS30 VS PMS7003
df %>% select(SPS30, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
8.46 |
7.30 |
0.01 |
2.98 |
5.94 |
12.0 |
43.4 |
▇▃▁▁▁ |
| PMS7003 |
0 |
1 |
12.96 |
12.73 |
0.00 |
3.20 |
8.55 |
19.3 |
75.7 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.995
ggplot(df, aes(x = SPS30, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMS7003",
title = "Relationship between SPS30 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -1.71 0.041 -41.6 0 -1.79 -1.63
## 2 SPS30 1.73 0.004 471. 0 1.73 1.74
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID PMS7003 SPS30 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 26.7 15.9 25.9 0.833
## 2 2 19.5 12.4 19.8 -0.297
## 3 3 25.6 15.7 25.5 0.08
## 4 4 27.3 17 27.8 -0.475
## 5 5 32.8 21 34.7 -1.91
## 6 6 36.4 22.4 37.1 -0.741
## 7 7 44.2 27.4 45.8 -1.61
## 8 8 47.9 29.7 49.8 -1.90
## 9 9 54.9 34.9 58.8 -3.92
## 10 10 60.5 39.9 67.5 -6.99
## # ... with 2,203 more rows
#Caso 9: SPS30 VS Oficial
df %>% select(SPS30, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
8.46 |
7.30 |
0.01 |
2.98 |
5.94 |
12 |
43.4 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0.00 |
7.00 |
13.00 |
22 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.873
ggplot(df, aes(x = SPS30, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 Oficial",
title = "Relationship between SPS30 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 3.68 0.188 19.6 0 3.31 4.05
## 2 SPS30 1.42 0.017 84.0 0 1.38 1.45
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial SPS30 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 23 15.9 26.2 -3.20
## 2 2 18 12.4 21.2 -3.24
## 3 3 26 15.7 25.9 0.084
## 4 4 24 17 27.8 -3.76
## 5 5 30 21 33.4 -3.42
## 6 6 38 22.4 35.4 2.60
## 7 7 53 27.4 42.5 10.5
## 8 8 54 29.7 45.7 8.26
## 9 9 57 34.9 53.1 3.89
## 10 10 60 39.9 60.2 -0.187
## # ... with 2,203 more rows
#Caso 10: HPMA115S0 VS PMSA003
df %>% select(HPMA115S0, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
4.47 |
7.39 |
0 |
0.00 |
0.00 |
5.58 |
43.4 |
▇▁▁▁▁ |
| PMSA003 |
0 |
1 |
14.61 |
15.18 |
0 |
2.81 |
9.11 |
22.50 |
89.3 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.602
ggplot(df, aes(x = HPMA115S0, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMSA003",
title = "Relationship between HPMA115S0 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 9.08 0.301 30.1 0 8.49 9.67
## 2 HPMA115S0 1.24 0.035 35.4 0 1.17 1.30
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID PMSA003 HPMA115S0 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 30.7 17.4 30.6 0.113
## 2 2 21.6 14 26.4 -4.78
## 3 3 29 17.2 30.3 -1.34
## 4 4 31.6 18.6 32.1 -0.47
## 5 5 38.1 23.5 38.1 -0.026
## 6 6 41.7 25.1 40.1 1.60
## 7 7 50.3 30 46.2 4.14
## 8 8 54.4 32.5 49.2 5.15
## 9 9 62.3 37.1 54.9 7.36
## 10 10 69.8 42.1 61.1 8.68
## # ... with 2,203 more rows
#Caso 11: HPMA115S0 VS PMS7003
df %>% select(HPMA115S0, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
4.47 |
7.39 |
0 |
0.0 |
0.00 |
5.58 |
43.4 |
▇▁▁▁▁ |
| PMS7003 |
0 |
1 |
12.96 |
12.73 |
0 |
3.2 |
8.55 |
19.30 |
75.7 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.615
ggplot(df, aes(x = HPMA115S0, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMS7003",
title = "Relationship between HPMA115S0 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 8.22 0.25 32.9 0 7.74 8.71
## 2 HPMA115S0 1.06 0.029 36.6 0 1.00 1.12
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID PMS7003 HPMA115S0 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 26.7 17.4 26.6 0.062
## 2 2 19.5 14 23.0 -3.54
## 3 3 25.6 17.2 26.4 -0.826
## 4 4 27.3 18.6 27.9 -0.608
## 5 5 32.8 23.5 33.1 -0.293
## 6 6 36.4 25.1 34.8 1.61
## 7 7 44.2 30 40.0 4.23
## 8 8 47.9 32.5 42.6 5.28
## 9 9 54.9 37.1 47.5 7.42
## 10 10 60.5 42.1 52.8 7.72
## # ... with 2,203 more rows
#Caso 10: HPMA115S0 VS Oficial
df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
4.47 |
7.39 |
0 |
0 |
0 |
5.58 |
43.4 |
▇▁▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7 |
13 |
22.00 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.460
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
title = "Relationship between HPMA115S0 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 12.4 0.262 47.3 0 11.8 12.9
## 2 HPMA115S0 0.738 0.03 24.4 0 0.679 0.797
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial HPMA115S0 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 23 17.4 25.2 -2.20
## 2 2 18 14 22.7 -4.69
## 3 3 26 17.2 25.1 0.948
## 4 4 24 18.6 26.1 -2.08
## 5 5 30 23.5 29.7 0.299
## 6 6 38 25.1 30.9 7.12
## 7 7 53 30 34.5 18.5
## 8 8 54 32.5 36.3 17.7
## 9 9 57 37.1 39.7 17.3
## 10 10 60 42.1 43.4 16.6
## # ... with 2,203 more rows
#Caso 11: PMSA003 VS PMS7003
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
14.61 |
15.18 |
0 |
2.81 |
9.11 |
22.5 |
89.3 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7.00 |
13.00 |
22.0 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.891
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 5.50 0.159 34.6 0 5.18 5.81
## 2 PMSA003 0.696 0.008 92.3 0 0.681 0.71
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 23 30.7 26.9 -3.85
## 2 2 18 21.6 20.5 -2.52
## 3 3 26 29 25.7 0.331
## 4 4 24 31.6 27.5 -3.48
## 5 5 30 38.1 32.0 -2.00
## 6 6 38 41.7 34.5 3.50
## 7 7 53 50.3 40.5 12.5
## 8 8 54 54.4 43.3 10.7
## 9 9 57 62.3 48.8 8.17
## 10 10 60 69.8 54.0 5.95
## # ... with 2,203 more rows
#Caso 12: PMSA003 VS Oficial
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
14.61 |
15.18 |
0 |
2.81 |
9.11 |
22.5 |
89.3 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7.00 |
13.00 |
22.0 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.891
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 5.50 0.159 34.6 0 5.18 5.81
## 2 PMSA003 0.696 0.008 92.3 0 0.681 0.71
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 23 30.7 26.9 -3.85
## 2 2 18 21.6 20.5 -2.52
## 3 3 26 29 25.7 0.331
## 4 4 24 31.6 27.5 -3.48
## 5 5 30 38.1 32.0 -2.00
## 6 6 38 41.7 34.5 3.50
## 7 7 53 50.3 40.5 12.5
## 8 8 54 54.4 43.3 10.7
## 9 9 57 62.3 48.8 8.17
## 10 10 60 69.8 54.0 5.95
## # ... with 2,203 more rows
#Caso 13: PMS7003 VS Oficial
df %>% select(PMS7003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMS7003 |
0 |
1 |
12.96 |
12.73 |
0 |
3.2 |
8.55 |
19.3 |
75.7 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7.0 |
13.00 |
22.0 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.894
ggplot(df, aes(x = PMS7003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMS7003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 4.87 0.161 30.2 0 4.56 5.19
## 2 PMS7003 0.832 0.009 93.8 0 0.815 0.85
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial PMS7003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 23 26.7 27.1 -4.10
## 2 2 18 19.5 21.1 -3.10
## 3 3 26 25.6 26.2 -0.18
## 4 4 24 27.3 27.6 -3.60
## 5 5 30 32.8 32.2 -2.17
## 6 6 38 36.4 35.2 2.83
## 7 7 53 44.2 41.7 11.3
## 8 8 54 47.9 44.7 9.26
## 9 9 57 54.9 50.6 6.43
## 10 10 60 60.5 55.2 4.77
## # ... with 2,203 more rows