library(readxl)
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.1 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(moderndive)
library(skimr)
# **ESTACION KENNEDY VS CANAIRIOS**
# **5 different sensors: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
df <- read_excel("C:/Mediciones/KENNEDY_CONSOLIDADO_final.xlsx")
View(df)
glimpse(df)
## Rows: 803
## Columns: 8
## $ Num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha <chr> "10-11-2020 24:00", "11-11-2020 01:00", "11-11-2020 02:00...
## $ Oficial <dbl> 22.0, 15.0, 19.0, 17.0, 11.0, 8.0, 7.0, 9.0, 20.0, 41.0, ...
## $ PMS7003 <dbl> 6.51, 8.59, 10.40, 7.11, 4.76, 7.36, 11.40, 15.40, 15.90,...
## $ PMSA003 <dbl> 6.91, 9.69, 11.90, 7.80, 4.55, 7.64, 11.90, 18.10, 18.50,...
## $ HPMA115S0 <dbl> 4.75, 6.00, 6.64, 4.91, 3.13, 5.24, 5.74, 8.41, 7.45, 6.9...
## $ SPS30 <dbl> 4.51, 5.69, 6.51, 4.71, 3.27, 4.75, 6.02, 8.81, 8.69, 8.0...
## $ SNGCJA5 <dbl> 3.31, 4.46, 5.20, 3.45, 2.24, 3.84, 4.91, 7.37, 7.11, 6.5...
df %>%
sample_n(size = 10)
## # A tibble: 10 x 8
## Num Fecha Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 223 20-11-2020 06:00 47 35.5 42.7 23.7 18.7 17.1
## 2 374 26-11-2020 13:00 20 9.22 11 9.76 6.48 5.21
## 3 382 26-11-2020 21:00 12 28.4 34.3 16.9 15.3 13.9
## 4 207 19-11-2020 14:00 24 15.7 18.8 9.45 9.2 7.57
## 5 603 06-12-2020 02:00 15 11.1 12.2 8.97 6.34 5.33
## 6 385 26-11-2020 24:00 19 22.3 27.2 11.8 12.2 10.5
## 7 791 13-12-2020 22:00 24 6.35 6.87 6.72 4.15 3.15
## 8 178 18-11-2020 09:00 20 11.3 13.2 7.38 7.23 5.79
## 9 291 23-11-2020 02:00 33 35.7 43.6 19.6 19.5 17
## 10 145 16-11-2020 24:00 7 3.43 3.31 2.64 2.88 1.59
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 1: SNGCJA5 VS SPS30
df %>% select(SNGCJA5, SPS30) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
9.64 |
6.51 |
0.43 |
5.04 |
8.36 |
12.70 |
54.8 |
▇▃▁▁▁ |
| SPS30 |
0 |
1 |
11.28 |
7.11 |
1.35 |
6.25 |
9.89 |
14.65 |
62.7 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ SPS30)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.998
ggplot(df, aes(x = SNGCJA5, y = SPS30)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 SPS30",
title = "Relationship between SNGCJA5 and SPS30") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(SPS30 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.772 0.029 26.7 0 0.715 0.829
## 2 SNGCJA5 1.09 0.002 439. 0 1.08 1.09
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID SPS30 SNGCJA5 SPS30_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 4.51 3.31 4.38 0.132
## 2 2 5.69 4.46 5.63 0.06
## 3 3 6.51 5.2 6.44 0.074
## 4 4 4.71 3.45 4.53 0.18
## 5 5 3.27 2.24 3.21 0.058
## 6 6 4.75 3.84 4.96 -0.205
## 7 7 6.02 4.91 6.12 -0.101
## 8 8 8.81 7.37 8.8 0.01
## 9 9 8.69 7.11 8.52 0.173
## 10 10 8.06 6.57 7.93 0.131
## # ... with 793 more rows
#Caso 2: SNGCJA5 VS HPMA115S0
df %>% select(SNGCJA5, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
9.64 |
6.51 |
0.43 |
5.04 |
8.36 |
12.7 |
54.8 |
▇▃▁▁▁ |
| HPMA115S0 |
0 |
1 |
12.43 |
7.54 |
2.25 |
7.26 |
10.70 |
15.4 |
64.6 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.968
ggplot(df, aes(x = SNGCJA5, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 HPMA115S0",
title = "Relationship between SNGCJA5 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 1.63 0.119 13.7 0 1.39 1.86
## 2 SNGCJA5 1.12 0.01 109. 0 1.1 1.14
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID HPMA115S0 SNGCJA5 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 4.75 3.31 5.34 -0.586
## 2 2 6 4.46 6.62 -0.624
## 3 3 6.64 5.2 7.45 -0.813
## 4 4 4.91 3.45 5.49 -0.582
## 5 5 3.13 2.24 4.14 -1.01
## 6 6 5.24 3.84 5.93 -0.689
## 7 7 5.74 4.91 7.13 -1.39
## 8 8 8.41 7.37 9.88 -1.48
## 9 9 7.45 7.11 9.59 -2.14
## 10 10 6.96 6.57 8.99 -2.03
## # ... with 793 more rows
#Caso 3: SNGCJA5 VS PMSA003
df %>% select(SNGCJA5, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
9.64 |
6.51 |
0.43 |
5.04 |
8.36 |
12.7 |
54.8 |
▇▃▁▁▁ |
| PMSA003 |
0 |
1 |
24.25 |
17.10 |
0.84 |
11.70 |
20.90 |
32.9 |
138.0 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.997
ggplot(df, aes(x = SNGCJA5, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMSA003",
title = "Relationship between SNGCJA5 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.997 0.089 -11.2 0 -1.17 -0.822
## 2 SNGCJA5 2.62 0.008 342. 0 2.60 2.63
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID PMSA003 SNGCJA5 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 6.91 3.31 7.67 -0.756
## 2 2 9.69 4.46 10.7 -0.986
## 3 3 11.9 5.2 12.6 -0.713
## 4 4 7.8 3.45 8.03 -0.232
## 5 5 4.55 2.24 4.87 -0.316
## 6 6 7.64 3.84 9.05 -1.41
## 7 7 11.9 4.91 11.9 0.046
## 8 8 18.1 7.37 18.3 -0.192
## 9 9 18.5 7.11 17.6 0.888
## 10 10 16.8 6.57 16.2 0.602
## # ... with 793 more rows
#Caso 4: SNGCJA5 VS PMS7003
df %>% select(SNGCJA5, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
9.64 |
6.51 |
0.43 |
5.04 |
8.36 |
12.7 |
54.8 |
▇▃▁▁▁ |
| PMS7003 |
0 |
1 |
20.51 |
14.00 |
1.00 |
10.50 |
17.70 |
27.2 |
116.0 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.995
ggplot(df, aes(x = SNGCJA5, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMS7003",
title = "Relationship between SNGCJA5 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.116 0.089 -1.30 0.193 -0.291 0.059
## 2 SNGCJA5 2.14 0.008 279. 0 2.12 2.15
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID PMS7003 SNGCJA5 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 6.51 3.31 6.96 -0.453
## 2 2 8.59 4.46 9.42 -0.832
## 3 3 10.4 5.2 11.0 -0.605
## 4 4 7.11 3.45 7.26 -0.152
## 5 5 4.76 2.24 4.68 0.085
## 6 6 7.36 3.84 8.10 -0.737
## 7 7 11.4 4.91 10.4 1.01
## 8 8 15.4 7.37 15.6 -0.246
## 9 9 15.9 7.11 15.1 0.81
## 10 10 14.2 6.57 13.9 0.265
## # ... with 793 more rows
#Caso 5: SNGCJA5 VS Oficial
df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
9.64 |
6.51 |
0.43 |
5.04 |
8.36 |
12.7 |
54.8 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.14 |
14.37 |
0.00 |
16.00 |
22.00 |
32.0 |
124.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.519
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
title = "Relationship between SNGCJA5 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 14.1 0.775 18.2 0 12.6 15.6
## 2 SNGCJA5 1.15 0.067 17.2 0 1.01 1.28
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial SNGCJA5 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 22 3.31 17.9 4.11
## 2 2 15 4.46 19.2 -4.20
## 3 3 19 5.2 20.1 -1.05
## 4 4 17 3.45 18.0 -1.05
## 5 5 11 2.24 16.7 -5.66
## 6 6 8 3.84 18.5 -10.5
## 7 7 7 4.91 19.7 -12.7
## 8 8 9 7.37 22.5 -13.5
## 9 9 20 7.11 22.2 -2.24
## 10 10 41 6.57 21.6 19.4
## # ... with 793 more rows
#Caso 6: SPS30 VS HPMA115S0
df %>% select(SPS30, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
11.28 |
7.11 |
1.35 |
6.25 |
9.89 |
14.65 |
62.7 |
▇▃▁▁▁ |
| HPMA115S0 |
0 |
1 |
12.43 |
7.54 |
2.25 |
7.26 |
10.70 |
15.40 |
64.6 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.957
ggplot(df, aes(x = SPS30, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 HPMA115S0",
title = "Relationship between SPS30 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.992 0.145 6.84 0 0.707 1.28
## 2 SPS30 1.01 0.011 93.2 0 0.993 1.04
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID HPMA115S0 SPS30 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 4.75 4.51 5.57 -0.817
## 2 2 6 5.69 6.76 -0.764
## 3 3 6.64 6.51 7.60 -0.956
## 4 4 4.91 4.71 5.77 -0.86
## 5 5 3.13 3.27 4.31 -1.18
## 6 6 5.24 4.75 5.81 -0.571
## 7 7 5.74 6.02 7.10 -1.36
## 8 8 8.41 8.81 9.93 -1.52
## 9 9 7.45 8.69 9.81 -2.36
## 10 10 6.96 8.06 9.17 -2.21
## # ... with 793 more rows
#Caso 7: SPS30 VS PMSA003
df %>% select(SPS30, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
11.28 |
7.11 |
1.35 |
6.25 |
9.89 |
14.65 |
62.7 |
▇▃▁▁▁ |
| PMSA003 |
0 |
1 |
24.25 |
17.10 |
0.84 |
11.70 |
20.90 |
32.90 |
138.0 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.996
ggplot(df, aes(x = SPS30, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMSA003",
title = "Relationship between SPS30 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -2.79 0.097 -28.8 0 -2.98 -2.60
## 2 SPS30 2.40 0.007 330. 0 2.38 2.41
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID PMSA003 SPS30 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 6.91 4.51 8.02 -1.11
## 2 2 9.69 5.69 10.8 -1.16
## 3 3 11.9 6.51 12.8 -0.916
## 4 4 7.8 4.71 8.50 -0.701
## 5 5 4.55 3.27 5.05 -0.499
## 6 6 7.64 4.75 8.60 -0.957
## 7 7 11.9 6.02 11.6 0.259
## 8 8 18.1 8.81 18.3 -0.229
## 9 9 18.5 8.69 18.0 0.458
## 10 10 16.8 8.06 16.5 0.269
## # ... with 793 more rows
#Caso 8: SPS30 VS PMS7003
df %>% select(SPS30, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
11.28 |
7.11 |
1.35 |
6.25 |
9.89 |
14.65 |
62.7 |
▇▃▁▁▁ |
| PMS7003 |
0 |
1 |
20.51 |
14.00 |
1.00 |
10.50 |
17.70 |
27.20 |
116.0 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.992
ggplot(df, aes(x = SPS30, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMS7003",
title = "Relationship between SPS30 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -1.53 0.114 -13.4 0 -1.75 -1.30
## 2 SPS30 1.95 0.009 228. 0 1.94 1.97
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID PMS7003 SPS30 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 6.51 4.51 7.28 -0.774
## 2 2 8.59 5.69 9.59 -1
## 3 3 10.4 6.51 11.2 -0.793
## 4 4 7.11 4.71 7.68 -0.565
## 5 5 4.76 3.27 4.86 -0.101
## 6 6 7.36 4.75 7.75 -0.393
## 7 7 11.4 6.02 10.2 1.16
## 8 8 15.4 8.81 15.7 -0.287
## 9 9 15.9 8.69 15.5 0.447
## 10 10 14.2 8.06 14.2 -0.022
## # ... with 793 more rows
#Caso 9: SPS30 VS Oficial
df %>% select(SPS30, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
11.28 |
7.11 |
1.35 |
6.25 |
9.89 |
14.65 |
62.7 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.14 |
14.37 |
0.00 |
16.00 |
22.00 |
32.00 |
124.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.509
ggplot(df, aes(x = SPS30, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 Oficial",
title = "Relationship between SPS30 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 13.5 0.819 16.5 0 11.9 15.2
## 2 SPS30 1.03 0.061 16.7 0 0.908 1.15
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial SPS30 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 22 4.51 18.2 3.82
## 2 2 15 5.69 19.4 -4.40
## 3 3 19 6.51 20.2 -1.24
## 4 4 17 4.71 18.4 -1.39
## 5 5 11 3.27 16.9 -5.91
## 6 6 8 4.75 18.4 -10.4
## 7 7 7 6.02 19.7 -12.7
## 8 8 9 8.81 22.6 -13.6
## 9 9 20 8.69 22.5 -2.48
## 10 10 41 8.06 21.8 19.2
## # ... with 793 more rows
#Caso 10: HPMA115S0 VS PMSA003
df %>% select(HPMA115S0, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
12.43 |
7.54 |
2.25 |
7.26 |
10.7 |
15.4 |
64.6 |
▇▂▁▁▁ |
| PMSA003 |
0 |
1 |
24.25 |
17.10 |
0.84 |
11.70 |
20.9 |
32.9 |
138.0 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.957
ggplot(df, aes(x = HPMA115S0, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMSA003",
title = "Relationship between HPMA115S0 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -2.76 0.337 -8.2 0 -3.43 -2.10
## 2 HPMA115S0 2.17 0.023 93.7 0 2.13 2.22
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID PMSA003 HPMA115S0 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 6.91 4.75 7.56 -0.645
## 2 2 9.69 6 10.3 -0.580
## 3 3 11.9 6.64 11.7 0.239
## 4 4 7.8 4.91 7.90 -0.102
## 5 5 4.55 3.13 4.04 0.514
## 6 6 7.64 5.24 8.62 -0.979
## 7 7 11.9 5.74 9.70 2.19
## 8 8 18.1 8.41 15.5 2.59
## 9 9 18.5 7.45 13.4 5.08
## 10 10 16.8 6.96 12.4 4.44
## # ... with 793 more rows
#Caso 11: HPMA115S0 VS PMS7003
df %>% select(HPMA115S0, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
12.43 |
7.54 |
2.25 |
7.26 |
10.7 |
15.4 |
64.6 |
▇▂▁▁▁ |
| PMS7003 |
0 |
1 |
20.51 |
14.00 |
1.00 |
10.50 |
17.7 |
27.2 |
116.0 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.963
ggplot(df, aes(x = HPMA115S0, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMS7003",
title = "Relationship between HPMA115S0 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -1.72 0.258 -6.68 0 -2.23 -1.22
## 2 HPMA115S0 1.79 0.018 101. 0 1.75 1.82
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID PMS7003 HPMA115S0 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 6.51 4.75 6.77 -0.261
## 2 2 8.59 6 9.01 -0.416
## 3 3 10.4 6.64 10.2 0.249
## 4 4 7.11 4.91 7.06 0.053
## 5 5 4.76 3.13 3.87 0.886
## 6 6 7.36 5.24 7.65 -0.287
## 7 7 11.4 5.74 8.54 2.86
## 8 8 15.4 8.41 13.3 2.08
## 9 9 15.9 7.45 11.6 4.30
## 10 10 14.2 6.96 10.7 3.48
## # ... with 793 more rows
#Caso 10: HPMA115S0 VS Oficial
df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
12.43 |
7.54 |
2.25 |
7.26 |
10.7 |
15.4 |
64.6 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
25.14 |
14.37 |
0.00 |
16.00 |
22.0 |
32.0 |
124.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.514
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
title = "Relationship between HPMA115S0 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 13.0 0.84 15.4 0 11.3 14.6
## 2 HPMA115S0 0.98 0.058 17.0 0 0.867 1.09
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial HPMA115S0 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 22 4.75 17.6 4.39
## 2 2 15 6 18.8 -3.84
## 3 3 19 6.64 19.5 -0.463
## 4 4 17 4.91 17.8 -0.767
## 5 5 11 3.13 16.0 -5.02
## 6 6 8 5.24 18.1 -10.1
## 7 7 7 5.74 18.6 -11.6
## 8 8 9 8.41 21.2 -12.2
## 9 9 20 7.45 20.3 -0.257
## 10 10 41 6.96 19.8 21.2
## # ... with 793 more rows
#Caso 11: PMSA003 VS PMS7003
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
24.25 |
17.10 |
0.84 |
11.7 |
20.9 |
32.9 |
138 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.14 |
14.37 |
0.00 |
16.0 |
22.0 |
32.0 |
124 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.520
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 14.6 0.752 19.4 0 13.1 16.0
## 2 PMSA003 0.436 0.025 17.2 0 0.387 0.486
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 22 6.91 17.6 4.42
## 2 2 15 9.69 18.8 -3.79
## 3 3 19 11.9 19.8 -0.754
## 4 4 17 7.8 18.0 -0.964
## 5 5 11 4.55 16.5 -5.55
## 6 6 8 7.64 17.9 -9.90
## 7 7 7 11.9 19.8 -12.8
## 8 8 9 18.1 22.5 -13.5
## 9 9 20 18.5 22.6 -2.64
## 10 10 41 16.8 21.9 19.1
## # ... with 793 more rows
#Caso 12: PMSA003 VS Oficial
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
24.25 |
17.10 |
0.84 |
11.7 |
20.9 |
32.9 |
138 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.14 |
14.37 |
0.00 |
16.0 |
22.0 |
32.0 |
124 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.520
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 14.6 0.752 19.4 0 13.1 16.0
## 2 PMSA003 0.436 0.025 17.2 0 0.387 0.486
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 22 6.91 17.6 4.42
## 2 2 15 9.69 18.8 -3.79
## 3 3 19 11.9 19.8 -0.754
## 4 4 17 7.8 18.0 -0.964
## 5 5 11 4.55 16.5 -5.55
## 6 6 8 7.64 17.9 -9.90
## 7 7 7 11.9 19.8 -12.8
## 8 8 9 18.1 22.5 -13.5
## 9 9 20 18.5 22.6 -2.64
## 10 10 41 16.8 21.9 19.1
## # ... with 793 more rows
#Caso 13: PMS7003 VS Oficial
df %>% select(PMS7003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMS7003 |
0 |
1 |
20.51 |
14.00 |
1 |
10.5 |
17.7 |
27.2 |
116 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.14 |
14.37 |
0 |
16.0 |
22.0 |
32.0 |
124 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.525
ggplot(df, aes(x = PMS7003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMS7003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 14.1 0.766 18.4 0 12.6 15.6
## 2 PMS7003 0.539 0.031 17.4 0 0.478 0.599
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial PMS7003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 22 6.51 17.6 4.40
## 2 2 15 8.59 18.7 -3.72
## 3 3 19 10.4 19.7 -0.697
## 4 4 17 7.11 17.9 -0.925
## 5 5 11 4.76 16.7 -5.66
## 6 6 8 7.36 18.1 -10.1
## 7 7 7 11.4 20.2 -13.2
## 8 8 9 15.4 22.4 -13.4
## 9 9 20 15.9 22.7 -2.66
## 10 10 41 14.2 21.7 19.3
## # ... with 793 more rows