library(readxl)
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.1 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(moderndive)
library(skimr)
# **1 hour late station ESTACION KENNEDY VS CANAIRIOS**
# **5 different sensors: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
df <- read_excel("C:/Mediciones/KENNEDY_CONSOLIDADO_final_1h.xlsx")
View(df)
glimpse(df)
## Rows: 803
## Columns: 8
## $ Num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha <chr> "10-11-2020 24:00", "11-11-2020 01:00", "11-11-2020 02:00...
## $ Oficial <dbl> 15.0, 19.0, 17.0, 11.0, 8.0, 7.0, 9.0, 20.0, 41.0, 24.0, ...
## $ PMS7003 <dbl> 6.51, 8.59, 10.40, 7.11, 4.76, 7.36, 11.40, 15.40, 15.90,...
## $ PMSA003 <dbl> 6.91, 9.69, 11.90, 7.80, 4.55, 7.64, 11.90, 18.10, 18.50,...
## $ HPMA115S0 <dbl> 4.75, 6.00, 6.64, 4.91, 3.13, 5.24, 5.74, 8.41, 7.45, 6.9...
## $ SPS30 <dbl> 4.51, 5.69, 6.51, 4.71, 3.27, 4.75, 6.02, 8.81, 8.69, 8.0...
## $ SNGCJA5 <dbl> 3.31, 4.46, 5.20, 3.45, 2.24, 3.84, 4.91, 7.37, 7.11, 6.5...
df %>%
sample_n(size = 10)
## # A tibble: 10 x 8
## Num Fecha Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 734 11-12-2020 13:00 41 49 61.8 24.7 29.5 24.8
## 2 72 13-11-2020 23:00 32 22.7 27.3 14.5 12.2 10.7
## 3 306 23-11-2020 17:00 13 30.1 36.6 15.3 15.7 13.3
## 4 477 30-11-2020 20:00 11 15.1 18.1 7.45 8.33 6.94
## 5 353 25-11-2020 16:00 10 7.05 7.67 5.72 4.56 3.58
## 6 574 04-12-2020 21:00 27 11.3 11.5 9.3 5.74 5.22
## 7 135 16-11-2020 14:00 24 20.9 26.3 10.5 12.5 10.4
## 8 596 05-12-2020 19:00 27 19.2 22.5 11.9 10.5 8.91
## 9 736 11-12-2020 15:00 30 36.5 45.1 18.7 20.6 17.8
## 10 324 24-11-2020 11:00 19 12.7 15 8.91 7.7 6.44
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 1: SNGCJA5 VS SPS30
df %>% select(SNGCJA5, SPS30) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
9.64 |
6.51 |
0.43 |
5.04 |
8.36 |
12.70 |
54.8 |
▇▃▁▁▁ |
| SPS30 |
0 |
1 |
11.28 |
7.11 |
1.35 |
6.25 |
9.89 |
14.65 |
62.7 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ SPS30)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.998
ggplot(df, aes(x = SNGCJA5, y = SPS30)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 SPS30",
title = "Relationship between SNGCJA5 and SPS30") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(SPS30 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.772 0.029 26.7 0 0.715 0.829
## 2 SNGCJA5 1.09 0.002 439. 0 1.08 1.09
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID SPS30 SNGCJA5 SPS30_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 4.51 3.31 4.38 0.132
## 2 2 5.69 4.46 5.63 0.06
## 3 3 6.51 5.2 6.44 0.074
## 4 4 4.71 3.45 4.53 0.18
## 5 5 3.27 2.24 3.21 0.058
## 6 6 4.75 3.84 4.96 -0.205
## 7 7 6.02 4.91 6.12 -0.101
## 8 8 8.81 7.37 8.8 0.01
## 9 9 8.69 7.11 8.52 0.173
## 10 10 8.06 6.57 7.93 0.131
## # ... with 793 more rows
#Caso 2: SNGCJA5 VS HPMA115S0
df %>% select(SNGCJA5, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
9.64 |
6.51 |
0.43 |
5.04 |
8.36 |
12.7 |
54.8 |
▇▃▁▁▁ |
| HPMA115S0 |
0 |
1 |
12.43 |
7.54 |
2.25 |
7.26 |
10.70 |
15.4 |
64.6 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.968
ggplot(df, aes(x = SNGCJA5, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 HPMA115S0",
title = "Relationship between SNGCJA5 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 1.63 0.119 13.7 0 1.39 1.86
## 2 SNGCJA5 1.12 0.01 109. 0 1.1 1.14
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID HPMA115S0 SNGCJA5 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 4.75 3.31 5.34 -0.586
## 2 2 6 4.46 6.62 -0.624
## 3 3 6.64 5.2 7.45 -0.813
## 4 4 4.91 3.45 5.49 -0.582
## 5 5 3.13 2.24 4.14 -1.01
## 6 6 5.24 3.84 5.93 -0.689
## 7 7 5.74 4.91 7.13 -1.39
## 8 8 8.41 7.37 9.88 -1.48
## 9 9 7.45 7.11 9.59 -2.14
## 10 10 6.96 6.57 8.99 -2.03
## # ... with 793 more rows
#Caso 3: SNGCJA5 VS PMSA003
df %>% select(SNGCJA5, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
9.64 |
6.51 |
0.43 |
5.04 |
8.36 |
12.7 |
54.8 |
▇▃▁▁▁ |
| PMSA003 |
0 |
1 |
24.25 |
17.10 |
0.84 |
11.70 |
20.90 |
32.9 |
138.0 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.997
ggplot(df, aes(x = SNGCJA5, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMSA003",
title = "Relationship between SNGCJA5 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.997 0.089 -11.2 0 -1.17 -0.822
## 2 SNGCJA5 2.62 0.008 342. 0 2.60 2.63
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID PMSA003 SNGCJA5 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 6.91 3.31 7.67 -0.756
## 2 2 9.69 4.46 10.7 -0.986
## 3 3 11.9 5.2 12.6 -0.713
## 4 4 7.8 3.45 8.03 -0.232
## 5 5 4.55 2.24 4.87 -0.316
## 6 6 7.64 3.84 9.05 -1.41
## 7 7 11.9 4.91 11.9 0.046
## 8 8 18.1 7.37 18.3 -0.192
## 9 9 18.5 7.11 17.6 0.888
## 10 10 16.8 6.57 16.2 0.602
## # ... with 793 more rows
#Caso 4: SNGCJA5 VS PMS7003
df %>% select(SNGCJA5, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
9.64 |
6.51 |
0.43 |
5.04 |
8.36 |
12.7 |
54.8 |
▇▃▁▁▁ |
| PMS7003 |
0 |
1 |
20.51 |
14.00 |
1.00 |
10.50 |
17.70 |
27.2 |
116.0 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.995
ggplot(df, aes(x = SNGCJA5, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMS7003",
title = "Relationship between SNGCJA5 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.116 0.089 -1.30 0.193 -0.291 0.059
## 2 SNGCJA5 2.14 0.008 279. 0 2.12 2.15
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID PMS7003 SNGCJA5 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 6.51 3.31 6.96 -0.453
## 2 2 8.59 4.46 9.42 -0.832
## 3 3 10.4 5.2 11.0 -0.605
## 4 4 7.11 3.45 7.26 -0.152
## 5 5 4.76 2.24 4.68 0.085
## 6 6 7.36 3.84 8.10 -0.737
## 7 7 11.4 4.91 10.4 1.01
## 8 8 15.4 7.37 15.6 -0.246
## 9 9 15.9 7.11 15.1 0.81
## 10 10 14.2 6.57 13.9 0.265
## # ... with 793 more rows
#Caso 5: SNGCJA5 VS Oficial
df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
9.64 |
6.51 |
0.43 |
5.04 |
8.36 |
12.7 |
54.8 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.16 |
14.37 |
0.00 |
16.00 |
22.00 |
32.0 |
124.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.646
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
title = "Relationship between SNGCJA5 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 11.4 0.692 16.5 0 10.0 12.8
## 2 SNGCJA5 1.43 0.059 24.0 0 1.31 1.54
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial SNGCJA5 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 15 3.31 16.1 -1.12
## 2 2 19 4.46 17.8 1.24
## 3 3 17 5.2 18.8 -1.82
## 4 4 11 3.45 16.3 -5.32
## 5 5 8 2.24 14.6 -6.60
## 6 6 7 3.84 16.9 -9.88
## 7 7 9 4.91 18.4 -9.40
## 8 8 20 7.37 21.9 -1.91
## 9 9 41 7.11 21.5 19.5
## 10 10 24 6.57 20.8 3.23
## # ... with 793 more rows
#Caso 6: SPS30 VS HPMA115S0
df %>% select(SPS30, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
11.28 |
7.11 |
1.35 |
6.25 |
9.89 |
14.65 |
62.7 |
▇▃▁▁▁ |
| HPMA115S0 |
0 |
1 |
12.43 |
7.54 |
2.25 |
7.26 |
10.70 |
15.40 |
64.6 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.957
ggplot(df, aes(x = SPS30, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 HPMA115S0",
title = "Relationship between SPS30 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.992 0.145 6.84 0 0.707 1.28
## 2 SPS30 1.01 0.011 93.2 0 0.993 1.04
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID HPMA115S0 SPS30 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 4.75 4.51 5.57 -0.817
## 2 2 6 5.69 6.76 -0.764
## 3 3 6.64 6.51 7.60 -0.956
## 4 4 4.91 4.71 5.77 -0.86
## 5 5 3.13 3.27 4.31 -1.18
## 6 6 5.24 4.75 5.81 -0.571
## 7 7 5.74 6.02 7.10 -1.36
## 8 8 8.41 8.81 9.93 -1.52
## 9 9 7.45 8.69 9.81 -2.36
## 10 10 6.96 8.06 9.17 -2.21
## # ... with 793 more rows
#Caso 7: SPS30 VS PMSA003
df %>% select(SPS30, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
11.28 |
7.11 |
1.35 |
6.25 |
9.89 |
14.65 |
62.7 |
▇▃▁▁▁ |
| PMSA003 |
0 |
1 |
24.25 |
17.10 |
0.84 |
11.70 |
20.90 |
32.90 |
138.0 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.996
ggplot(df, aes(x = SPS30, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMSA003",
title = "Relationship between SPS30 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -2.79 0.097 -28.8 0 -2.98 -2.60
## 2 SPS30 2.40 0.007 330. 0 2.38 2.41
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID PMSA003 SPS30 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 6.91 4.51 8.02 -1.11
## 2 2 9.69 5.69 10.8 -1.16
## 3 3 11.9 6.51 12.8 -0.916
## 4 4 7.8 4.71 8.50 -0.701
## 5 5 4.55 3.27 5.05 -0.499
## 6 6 7.64 4.75 8.60 -0.957
## 7 7 11.9 6.02 11.6 0.259
## 8 8 18.1 8.81 18.3 -0.229
## 9 9 18.5 8.69 18.0 0.458
## 10 10 16.8 8.06 16.5 0.269
## # ... with 793 more rows
#Caso 8: SPS30 VS PMS7003
df %>% select(SPS30, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
11.28 |
7.11 |
1.35 |
6.25 |
9.89 |
14.65 |
62.7 |
▇▃▁▁▁ |
| PMS7003 |
0 |
1 |
20.51 |
14.00 |
1.00 |
10.50 |
17.70 |
27.20 |
116.0 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.992
ggplot(df, aes(x = SPS30, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMS7003",
title = "Relationship between SPS30 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -1.53 0.114 -13.4 0 -1.75 -1.30
## 2 SPS30 1.95 0.009 228. 0 1.94 1.97
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID PMS7003 SPS30 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 6.51 4.51 7.28 -0.774
## 2 2 8.59 5.69 9.59 -1
## 3 3 10.4 6.51 11.2 -0.793
## 4 4 7.11 4.71 7.68 -0.565
## 5 5 4.76 3.27 4.86 -0.101
## 6 6 7.36 4.75 7.75 -0.393
## 7 7 11.4 6.02 10.2 1.16
## 8 8 15.4 8.81 15.7 -0.287
## 9 9 15.9 8.69 15.5 0.447
## 10 10 14.2 8.06 14.2 -0.022
## # ... with 793 more rows
#Caso 9: SPS30 VS Oficial
df %>% select(SPS30, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
11.28 |
7.11 |
1.35 |
6.25 |
9.89 |
14.65 |
62.7 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.16 |
14.37 |
0.00 |
16.00 |
22.00 |
32.00 |
124.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.629
ggplot(df, aes(x = SPS30, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 Oficial",
title = "Relationship between SPS30 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 10.8 0.74 14.6 0 9.36 12.3
## 2 SPS30 1.27 0.056 22.9 0 1.16 1.38
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial SPS30 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 15 4.51 16.5 -1.55
## 2 2 19 5.69 18.0 0.953
## 3 3 17 6.51 19.1 -2.09
## 4 4 11 4.71 16.8 -5.80
## 5 5 8 3.27 15.0 -6.97
## 6 6 7 4.75 16.9 -9.85
## 7 7 9 6.02 18.5 -9.47
## 8 8 20 8.81 22.0 -2.02
## 9 9 41 8.69 21.9 19.1
## 10 10 24 8.06 21.1 2.94
## # ... with 793 more rows
#Caso 10: HPMA115S0 VS PMSA003
df %>% select(HPMA115S0, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
12.43 |
7.54 |
2.25 |
7.26 |
10.7 |
15.4 |
64.6 |
▇▂▁▁▁ |
| PMSA003 |
0 |
1 |
24.25 |
17.10 |
0.84 |
11.70 |
20.9 |
32.9 |
138.0 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.957
ggplot(df, aes(x = HPMA115S0, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMSA003",
title = "Relationship between HPMA115S0 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -2.76 0.337 -8.2 0 -3.43 -2.10
## 2 HPMA115S0 2.17 0.023 93.7 0 2.13 2.22
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID PMSA003 HPMA115S0 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 6.91 4.75 7.56 -0.645
## 2 2 9.69 6 10.3 -0.580
## 3 3 11.9 6.64 11.7 0.239
## 4 4 7.8 4.91 7.90 -0.102
## 5 5 4.55 3.13 4.04 0.514
## 6 6 7.64 5.24 8.62 -0.979
## 7 7 11.9 5.74 9.70 2.19
## 8 8 18.1 8.41 15.5 2.59
## 9 9 18.5 7.45 13.4 5.08
## 10 10 16.8 6.96 12.4 4.44
## # ... with 793 more rows
#Caso 11: HPMA115S0 VS PMS7003
df %>% select(HPMA115S0, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
12.43 |
7.54 |
2.25 |
7.26 |
10.7 |
15.4 |
64.6 |
▇▂▁▁▁ |
| PMS7003 |
0 |
1 |
20.51 |
14.00 |
1.00 |
10.50 |
17.7 |
27.2 |
116.0 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.963
ggplot(df, aes(x = HPMA115S0, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMS7003",
title = "Relationship between HPMA115S0 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -1.72 0.258 -6.68 0 -2.23 -1.22
## 2 HPMA115S0 1.79 0.018 101. 0 1.75 1.82
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID PMS7003 HPMA115S0 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 6.51 4.75 6.77 -0.261
## 2 2 8.59 6 9.01 -0.416
## 3 3 10.4 6.64 10.2 0.249
## 4 4 7.11 4.91 7.06 0.053
## 5 5 4.76 3.13 3.87 0.886
## 6 6 7.36 5.24 7.65 -0.287
## 7 7 11.4 5.74 8.54 2.86
## 8 8 15.4 8.41 13.3 2.08
## 9 9 15.9 7.45 11.6 4.30
## 10 10 14.2 6.96 10.7 3.48
## # ... with 793 more rows
#Caso 10: HPMA115S0 VS Oficial
df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
12.43 |
7.54 |
2.25 |
7.26 |
10.7 |
15.4 |
64.6 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
25.16 |
14.37 |
0.00 |
16.00 |
22.0 |
32.0 |
124.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.654
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
title = "Relationship between HPMA115S0 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 9.65 0.741 13.0 0 8.20 11.1
## 2 HPMA115S0 1.25 0.051 24.5 0 1.15 1.35
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial HPMA115S0 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 15 4.75 15.6 -0.575
## 2 2 19 6 17.1 1.87
## 3 3 17 6.64 17.9 -0.931
## 4 4 11 4.91 15.8 -4.77
## 5 5 8 3.13 13.6 -5.56
## 6 6 7 5.24 16.2 -9.19
## 7 7 9 5.74 16.8 -7.81
## 8 8 20 8.41 20.1 -0.139
## 9 9 41 7.45 18.9 22.1
## 10 10 24 6.96 18.3 5.67
## # ... with 793 more rows
#Caso 11: PMSA003 VS PMS7003
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
24.25 |
17.10 |
0.84 |
11.7 |
20.9 |
32.9 |
138 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.16 |
14.37 |
0.00 |
16.0 |
22.0 |
32.0 |
124 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.647
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 12.0 0.671 17.8 0 10.6 13.3
## 2 PMSA003 0.544 0.023 24.0 0 0.5 0.588
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 15 6.91 15.7 -0.725
## 2 2 19 9.69 17.2 1.76
## 3 3 17 11.9 18.4 -1.44
## 4 4 11 7.8 16.2 -5.21
## 5 5 8 4.55 14.4 -6.44
## 6 6 7 7.64 16.1 -9.12
## 7 7 9 11.9 18.4 -9.44
## 8 8 20 18.1 21.8 -1.81
## 9 9 41 18.5 22.0 19.0
## 10 10 24 16.8 21.1 2.90
## # ... with 793 more rows
#Caso 12: PMSA003 VS Oficial
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
24.25 |
17.10 |
0.84 |
11.7 |
20.9 |
32.9 |
138 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.16 |
14.37 |
0.00 |
16.0 |
22.0 |
32.0 |
124 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.647
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 12.0 0.671 17.8 0 10.6 13.3
## 2 PMSA003 0.544 0.023 24.0 0 0.5 0.588
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 15 6.91 15.7 -0.725
## 2 2 19 9.69 17.2 1.76
## 3 3 17 11.9 18.4 -1.44
## 4 4 11 7.8 16.2 -5.21
## 5 5 8 4.55 14.4 -6.44
## 6 6 7 7.64 16.1 -9.12
## 7 7 9 11.9 18.4 -9.44
## 8 8 20 18.1 21.8 -1.81
## 9 9 41 18.5 22.0 19.0
## 10 10 24 16.8 21.1 2.90
## # ... with 793 more rows
#Caso 13: PMS7003 VS Oficial
df %>% select(PMS7003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMS7003 |
0 |
1 |
20.51 |
14.00 |
1 |
10.5 |
17.7 |
27.2 |
116 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.16 |
14.37 |
0 |
16.0 |
22.0 |
32.0 |
124 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.657
ggplot(df, aes(x = PMS7003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMS7003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 11.3 0.679 16.7 0 9.99 12.7
## 2 PMS7003 0.674 0.027 24.7 0 0.621 0.728
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial PMS7003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 15 6.51 15.7 -0.712
## 2 2 19 8.59 17.1 1.88
## 3 3 17 10.4 18.3 -1.34
## 4 4 11 7.11 16.1 -5.12
## 5 5 8 4.76 14.5 -6.53
## 6 6 7 7.36 16.3 -9.28
## 7 7 9 11.4 19.0 -10.0
## 8 8 20 15.4 21.7 -1.71
## 9 9 41 15.9 22.0 19.0
## 10 10 24 14.2 20.9 3.10
## # ... with 793 more rows