library(readxl)
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.1 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(moderndive)
library(skimr)
# **ESTACION TUNAL VS CANAIRIOS**
# **5 different sensors: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
df <- read_excel("C:/Mediciones/TUNAL_ESTACION_CANAIRIOS.xlsx")
View(df)
glimpse(df)
## Rows: 2,213
## Columns: 8
## $ Num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha <chr> "13-11-2020 24:00", "14-11-2020 01:00", "14-11-2020 02:00...
## $ Oficial <dbl> 30, 23, 18, 26, 24, 30, 38, 53, 54, 57, 60, 61, 41, 26, 1...
## $ PMS7003 <dbl> 26.70, 19.50, 25.60, 27.30, 32.80, 36.40, 44.20, 47.90, 5...
## $ PMSA003 <dbl> 30.70, 21.60, 29.00, 31.60, 38.10, 41.70, 50.30, 54.40, 6...
## $ HPMA115S0 <dbl> 17.40, 14.00, 17.20, 18.60, 23.50, 25.10, 30.00, 32.50, 3...
## $ SPS30 <dbl> 15.90, 12.40, 15.70, 17.00, 21.00, 22.40, 27.40, 29.70, 3...
## $ SNGCJA5 <dbl> 14.40, 10.70, 13.80, 14.70, 18.50, 20.30, 24.80, 26.60, 2...
df %>%
sample_n(size = 10)
## # A tibble: 10 x 8
## Num Fecha Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1390 11-01-2021 11:00 4 0.0219 0.0146 0 0.7 0.0146
## 2 463 03-12-2020 20:00 37 47.7 54.8 30.9 28.4 24.9
## 3 167 20-11-2020 22:00 14 4.64 4.56 4.81 3.75 3.27
## 4 1115 30-12-2020 24:00 22 15 17.1 0 10.4 8.24
## 5 1689 23-01-2021 22:00 6 2.48 2.87 0 2.65 1.93
## 6 1431 13-01-2021 04:00 1 4.64 4.27 0 3.61 3.01
## 7 82 17-11-2020 09:00 36 27.4 32.2 17.2 16.5 13.4
## 8 423 02-12-2020 04:00 13 10.4 10.9 7.61 6.44 5.95
## 9 1303 07-01-2021 20:00 6 3.67 3.4 0 3.13 2.35
## 10 532 06-12-2020 17:00 1 2.17 1.62 2.67 2.25 1.51
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 1: SNGCJA5 VS SPS30
df %>% select(SNGCJA5, SPS30) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.97 |
6.35 |
0.00 |
2.08 |
4.93 |
10.3 |
38.6 |
▇▃▁▁▁ |
| SPS30 |
0 |
1 |
8.46 |
7.30 |
0.01 |
2.98 |
5.94 |
12.0 |
43.4 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ SPS30)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.995
ggplot(df, aes(x = SNGCJA5, y = SPS30)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 SPS30",
title = "Relationship between SNGCJA5 and SPS30") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(SPS30 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.478 0.023 21.2 0 0.434 0.522
## 2 SNGCJA5 1.14 0.002 479. 0 1.14 1.15
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID SPS30 SNGCJA5 SPS30_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 15.9 14.4 17.0 -1.06
## 2 2 12.4 10.7 12.7 -0.324
## 3 3 15.7 13.8 16.3 -0.572
## 4 4 17 14.7 17.3 -0.302
## 5 5 21 18.5 21.7 -0.651
## 6 6 22.4 20.3 23.7 -1.31
## 7 7 27.4 24.8 28.9 -1.46
## 8 8 29.7 26.6 30.9 -1.22
## 9 9 34.9 28.7 33.3 1.58
## 10 10 39.9 32.4 37.6 2.34
## # ... with 2,203 more rows
#Caso 2: SNGCJA5 VS HPMA115S0
df %>% select(SNGCJA5, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.97 |
6.35 |
0 |
2.08 |
4.93 |
10.30 |
38.6 |
▇▃▁▁▁ |
| HPMA115S0 |
0 |
1 |
4.47 |
7.39 |
0 |
0.00 |
0.00 |
5.58 |
43.4 |
▇▁▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.607
ggplot(df, aes(x = SNGCJA5, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 HPMA115S0",
title = "Relationship between SNGCJA5 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.448 0.186 -2.41 0.016 -0.812 -0.084
## 2 SNGCJA5 0.706 0.02 35.9 0 0.667 0.745
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID HPMA115S0 SNGCJA5 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 17.4 14.4 9.72 7.68
## 2 2 14 10.7 7.11 6.89
## 3 3 17.2 13.8 9.30 7.90
## 4 4 18.6 14.7 9.93 8.67
## 5 5 23.5 18.5 12.6 10.9
## 6 6 25.1 20.3 13.9 11.2
## 7 7 30 24.8 17.1 12.9
## 8 8 32.5 26.6 18.3 14.2
## 9 9 37.1 28.7 19.8 17.3
## 10 10 42.1 32.4 22.4 19.7
## # ... with 2,203 more rows
#Caso 3: SNGCJA5 VS PMSA003
df %>% select(SNGCJA5, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.97 |
6.35 |
0 |
2.08 |
4.93 |
10.3 |
38.6 |
▇▃▁▁▁ |
| PMSA003 |
0 |
1 |
14.61 |
15.18 |
0 |
2.81 |
9.11 |
22.5 |
89.3 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.995
ggplot(df, aes(x = SNGCJA5, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMSA003",
title = "Relationship between SNGCJA5 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -1.97 0.048 -40.8 0 -2.07 -1.88
## 2 SNGCJA5 2.38 0.005 464. 0 2.37 2.39
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID PMSA003 SNGCJA5 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 30.7 14.4 32.3 -1.58
## 2 2 21.6 10.7 23.5 -1.88
## 3 3 29 13.8 30.8 -1.85
## 4 4 31.6 14.7 33.0 -1.39
## 5 5 38.1 18.5 42.0 -3.93
## 6 6 41.7 20.3 46.3 -4.61
## 7 7 50.3 24.8 57.0 -6.71
## 8 8 54.4 26.6 61.3 -6.89
## 9 9 62.3 28.7 66.3 -3.99
## 10 10 69.8 32.4 75.1 -5.29
## # ... with 2,203 more rows
#Caso 4: SNGCJA5 VS PMS7003
df %>% select(SNGCJA5, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.97 |
6.35 |
0 |
2.08 |
4.93 |
10.3 |
38.6 |
▇▃▁▁▁ |
| PMS7003 |
0 |
1 |
12.96 |
12.73 |
0 |
3.20 |
8.55 |
19.3 |
75.7 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.996
ggplot(df, aes(x = SNGCJA5, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMS7003",
title = "Relationship between SNGCJA5 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.96 0.036 -26.5 0 -1.03 -0.889
## 2 SNGCJA5 2.00 0.004 521. 0 1.99 2.00
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID PMS7003 SNGCJA5 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 26.7 14.4 27.8 -1.09
## 2 2 19.5 10.7 20.4 -0.901
## 3 3 25.6 13.8 26.6 -0.989
## 4 4 27.3 14.7 28.4 -1.09
## 5 5 32.8 18.5 36.0 -3.17
## 6 6 36.4 20.3 39.6 -3.16
## 7 7 44.2 24.8 48.5 -4.35
## 8 8 47.9 26.6 52.1 -4.24
## 9 9 54.9 28.7 56.3 -1.43
## 10 10 60.5 32.4 63.7 -3.22
## # ... with 2,203 more rows
#Caso 5: SNGCJA5 VS Oficial
df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.97 |
6.35 |
0 |
2.08 |
4.93 |
10.3 |
38.6 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7.00 |
13.00 |
22.0 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.740
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
title = "Relationship between SNGCJA5 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 6.04 0.252 24.0 0 5.54 6.53
## 2 SNGCJA5 1.38 0.027 51.7 0 1.33 1.43
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial SNGCJA5 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 30 14.4 25.9 4.09
## 2 2 23 10.7 20.8 2.20
## 3 3 18 13.8 25.1 -7.08
## 4 4 26 14.7 26.3 -0.324
## 5 5 24 18.5 31.6 -7.57
## 6 6 30 20.3 34.1 -4.05
## 7 7 38 24.8 40.3 -2.26
## 8 8 53 26.6 42.7 10.3
## 9 9 54 28.7 45.6 8.35
## 10 10 57 32.4 50.8 6.25
## # ... with 2,203 more rows
#Caso 6: SPS30 VS HPMA115S0
df %>% select(SPS30, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
8.46 |
7.30 |
0.01 |
2.98 |
5.94 |
12.00 |
43.4 |
▇▃▁▁▁ |
| HPMA115S0 |
0 |
1 |
4.47 |
7.39 |
0.00 |
0.00 |
0.00 |
5.58 |
43.4 |
▇▁▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.608
ggplot(df, aes(x = SPS30, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 HPMA115S0",
title = "Relationship between SPS30 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.731 0.191 -3.83 0 -1.10 -0.356
## 2 SPS30 0.615 0.017 36.0 0 0.582 0.649
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID HPMA115S0 SPS30 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 17.4 15.9 9.06 8.35
## 2 2 14 12.4 6.90 7.10
## 3 3 17.2 15.7 8.93 8.27
## 4 4 18.6 17 9.73 8.87
## 5 5 23.5 21 12.2 11.3
## 6 6 25.1 22.4 13.1 12.0
## 7 7 30 27.4 16.1 13.9
## 8 8 32.5 29.7 17.5 15.0
## 9 9 37.1 34.9 20.7 16.4
## 10 10 42.1 39.9 23.8 18.3
## # ... with 2,203 more rows
#Caso 7: SPS30 VS PMSA003
df %>% select(SPS30, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
8.46 |
7.30 |
0.01 |
2.98 |
5.94 |
12.0 |
43.4 |
▇▃▁▁▁ |
| PMSA003 |
0 |
1 |
14.61 |
15.18 |
0.00 |
2.81 |
9.11 |
22.5 |
89.3 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.995
ggplot(df, aes(x = SPS30, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMSA003",
title = "Relationship between SPS30 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -2.88 0.05 -57.6 0 -2.98 -2.78
## 2 SPS30 2.07 0.004 462. 0 2.06 2.08
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID PMSA003 SPS30 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 30.7 15.9 30.0 0.697
## 2 2 21.6 12.4 22.8 -1.16
## 3 3 29 15.7 29.6 -0.589
## 4 4 31.6 17 32.3 -0.678
## 5 5 38.1 21 40.6 -2.45
## 6 6 41.7 22.4 43.4 -1.75
## 7 7 50.3 27.4 53.8 -3.49
## 8 8 54.4 29.7 58.5 -4.14
## 9 9 62.3 34.9 69.3 -7.00
## 10 10 69.8 39.9 79.6 -9.84
## # ... with 2,203 more rows
#Caso 8: SPS30 VS PMS7003
df %>% select(SPS30, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
8.46 |
7.30 |
0.01 |
2.98 |
5.94 |
12.0 |
43.4 |
▇▃▁▁▁ |
| PMS7003 |
0 |
1 |
12.96 |
12.73 |
0.00 |
3.20 |
8.55 |
19.3 |
75.7 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.995
ggplot(df, aes(x = SPS30, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMS7003",
title = "Relationship between SPS30 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -1.71 0.041 -41.6 0 -1.79 -1.63
## 2 SPS30 1.73 0.004 471. 0 1.73 1.74
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID PMS7003 SPS30 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 26.7 15.9 25.9 0.833
## 2 2 19.5 12.4 19.8 -0.297
## 3 3 25.6 15.7 25.5 0.08
## 4 4 27.3 17 27.8 -0.475
## 5 5 32.8 21 34.7 -1.91
## 6 6 36.4 22.4 37.1 -0.741
## 7 7 44.2 27.4 45.8 -1.61
## 8 8 47.9 29.7 49.8 -1.90
## 9 9 54.9 34.9 58.8 -3.92
## 10 10 60.5 39.9 67.5 -6.99
## # ... with 2,203 more rows
#Caso 9: SPS30 VS Oficial
df %>% select(SPS30, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
8.46 |
7.30 |
0.01 |
2.98 |
5.94 |
12 |
43.4 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0.00 |
7.00 |
13.00 |
22 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.730
ggplot(df, aes(x = SPS30, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 Oficial",
title = "Relationship between SPS30 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 5.64 0.263 21.4 0 5.12 6.15
## 2 SPS30 1.18 0.024 50.3 0 1.14 1.23
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial SPS30 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 30 15.9 24.5 5.52
## 2 2 23 12.4 20.3 2.67
## 3 3 18 15.7 24.2 -6.24
## 4 4 26 17 25.8 0.216
## 5 5 24 21 30.5 -6.52
## 6 6 30 22.4 32.2 -2.18
## 7 7 38 27.4 38.1 -0.11
## 8 8 53 29.7 40.8 12.2
## 9 9 54 34.9 47.0 7.00
## 10 10 57 39.9 52.9 4.08
## # ... with 2,203 more rows
#Caso 10: HPMA115S0 VS PMSA003
df %>% select(HPMA115S0, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
4.47 |
7.39 |
0 |
0.00 |
0.00 |
5.58 |
43.4 |
▇▁▁▁▁ |
| PMSA003 |
0 |
1 |
14.61 |
15.18 |
0 |
2.81 |
9.11 |
22.50 |
89.3 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.602
ggplot(df, aes(x = HPMA115S0, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMSA003",
title = "Relationship between HPMA115S0 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 9.08 0.301 30.1 0 8.49 9.67
## 2 HPMA115S0 1.24 0.035 35.4 0 1.17 1.30
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID PMSA003 HPMA115S0 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 30.7 17.4 30.6 0.113
## 2 2 21.6 14 26.4 -4.78
## 3 3 29 17.2 30.3 -1.34
## 4 4 31.6 18.6 32.1 -0.47
## 5 5 38.1 23.5 38.1 -0.026
## 6 6 41.7 25.1 40.1 1.60
## 7 7 50.3 30 46.2 4.14
## 8 8 54.4 32.5 49.2 5.15
## 9 9 62.3 37.1 54.9 7.36
## 10 10 69.8 42.1 61.1 8.68
## # ... with 2,203 more rows
#Caso 11: HPMA115S0 VS PMS7003
df %>% select(HPMA115S0, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
4.47 |
7.39 |
0 |
0.0 |
0.00 |
5.58 |
43.4 |
▇▁▁▁▁ |
| PMS7003 |
0 |
1 |
12.96 |
12.73 |
0 |
3.2 |
8.55 |
19.30 |
75.7 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.615
ggplot(df, aes(x = HPMA115S0, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMS7003",
title = "Relationship between HPMA115S0 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 8.22 0.25 32.9 0 7.74 8.71
## 2 HPMA115S0 1.06 0.029 36.6 0 1.00 1.12
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID PMS7003 HPMA115S0 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 26.7 17.4 26.6 0.062
## 2 2 19.5 14 23.0 -3.54
## 3 3 25.6 17.2 26.4 -0.826
## 4 4 27.3 18.6 27.9 -0.608
## 5 5 32.8 23.5 33.1 -0.293
## 6 6 36.4 25.1 34.8 1.61
## 7 7 44.2 30 40.0 4.23
## 8 8 47.9 32.5 42.6 5.28
## 9 9 54.9 37.1 47.5 7.42
## 10 10 60.5 42.1 52.8 7.72
## # ... with 2,203 more rows
#Caso 10: HPMA115S0 VS Oficial
df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
4.47 |
7.39 |
0 |
0 |
0 |
5.58 |
43.4 |
▇▁▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7 |
13 |
22.00 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.388
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
title = "Relationship between HPMA115S0 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 12.9 0.272 47.4 0 12.3 13.4
## 2 HPMA115S0 0.621 0.031 19.8 0 0.56 0.683
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial HPMA115S0 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 30 17.4 23.7 6.31
## 2 2 23 14 21.6 1.42
## 3 3 18 17.2 23.6 -5.57
## 4 4 26 18.6 24.4 1.56
## 5 5 24 23.5 27.5 -3.48
## 6 6 30 25.1 28.5 1.52
## 7 7 38 30 31.5 6.48
## 8 8 53 32.5 33.1 19.9
## 9 9 54 37.1 35.9 18.1
## 10 10 57 42.1 39.0 18.0
## # ... with 2,203 more rows
#Caso 11: PMSA003 VS PMS7003
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
14.61 |
15.18 |
0 |
2.81 |
9.11 |
22.5 |
89.3 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7.00 |
13.00 |
22.0 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.743
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 7.18 0.234 30.7 0 6.72 7.64
## 2 PMSA003 0.580 0.011 52.3 0 0.559 0.602
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 30 30.7 25.0 5.00
## 2 2 23 21.6 19.7 3.28
## 3 3 18 29 24.0 -6.01
## 4 4 26 31.6 25.5 0.481
## 5 5 24 38.1 29.3 -5.29
## 6 6 30 41.7 31.4 -1.38
## 7 7 38 50.3 36.4 1.63
## 8 8 53 54.4 38.8 14.2
## 9 9 54 62.3 43.3 10.7
## 10 10 57 69.8 47.7 9.31
## # ... with 2,203 more rows
#Caso 12: PMSA003 VS Oficial
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
14.61 |
15.18 |
0 |
2.81 |
9.11 |
22.5 |
89.3 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7.00 |
13.00 |
22.0 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.743
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 7.18 0.234 30.7 0 6.72 7.64
## 2 PMSA003 0.580 0.011 52.3 0 0.559 0.602
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 30 30.7 25.0 5.00
## 2 2 23 21.6 19.7 3.28
## 3 3 18 29 24.0 -6.01
## 4 4 26 31.6 25.5 0.481
## 5 5 24 38.1 29.3 -5.29
## 6 6 30 41.7 31.4 -1.38
## 7 7 38 50.3 36.4 1.63
## 8 8 53 54.4 38.8 14.2
## 9 9 54 62.3 43.3 10.7
## 10 10 57 69.8 47.7 9.31
## # ... with 2,203 more rows
#Caso 13: PMS7003 VS Oficial
df %>% select(PMS7003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
2213 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMS7003 |
0 |
1 |
12.96 |
12.73 |
0 |
3.2 |
8.55 |
19.3 |
75.7 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
15.66 |
11.85 |
0 |
7.0 |
13.00 |
22.0 |
78.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.743
ggplot(df, aes(x = PMS7003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMS7003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 6.69 0.241 27.8 0 6.22 7.17
## 2 PMS7003 0.692 0.013 52.2 0 0.666 0.718
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 2,213 x 5
## ID Oficial PMS7003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 30 26.7 25.2 4.84
## 2 2 23 19.5 20.2 2.82
## 3 3 18 25.6 24.4 -6.40
## 4 4 26 27.3 25.6 0.42
## 5 5 24 32.8 29.4 -5.38
## 6 6 30 36.4 31.9 -1.88
## 7 7 38 44.2 37.3 0.729
## 8 8 53 47.9 39.8 13.2
## 9 9 54 54.9 44.7 9.33
## 10 10 57 60.5 48.5 8.45
## # ... with 2,203 more rows