library(readxl)
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.1 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(moderndive)
library(skimr)
# **2 hours late station ESTACION KENNEDY VS CANAIRIOS**
# **5 different sensors: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
df <- read_excel("C:/Mediciones/KENNEDY_CONSOLIDADO_final_2h.xlsx")
View(df)
glimpse(df)
## Rows: 803
## Columns: 8
## $ Num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha <chr> "10-11-2020 24:00", "11-11-2020 01:00", "11-11-2020 02:00...
## $ Oficial <dbl> 19.0, 17.0, 11.0, 8.0, 7.0, 9.0, 20.0, 41.0, 24.0, 23.0, ...
## $ PMS7003 <dbl> 6.51, 8.59, 10.40, 7.11, 4.76, 7.36, 11.40, 15.40, 15.90,...
## $ PMSA003 <dbl> 6.91, 9.69, 11.90, 7.80, 4.55, 7.64, 11.90, 18.10, 18.50,...
## $ HPMA115S0 <dbl> 4.75, 6.00, 6.64, 4.91, 3.13, 5.24, 5.74, 8.41, 7.45, 6.9...
## $ SPS30 <dbl> 4.51, 5.69, 6.51, 4.71, 3.27, 4.75, 6.02, 8.81, 8.69, 8.0...
## $ SNGCJA5 <dbl> 3.31, 4.46, 5.20, 3.45, 2.24, 3.84, 4.91, 7.37, 7.11, 6.5...
df %>%
sample_n(size = 10)
## # A tibble: 10 x 8
## Num Fecha Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 676 09-12-2020 03:00 26 21.1 24.7 17 12.2 10.7
## 2 652 08-12-2020 03:00 47 65.3 77.3 33.7 32.5 28.9
## 3 743 11-12-2020 22:00 48 43.1 51.5 24 23 19.7
## 4 733 11-12-2020 12:00 41 49.3 61.7 25.3 28.9 24.8
## 5 250 21-11-2020 09:00 35 24 27.8 13.4 12 10.3
## 6 472 30-11-2020 15:00 25 21.6 26.9 12.1 12.1 10
## 7 661 08-12-2020 12:00 33 11.2 13.7 7.78 6.91 5.41
## 8 654 08-12-2020 05:00 42 45.9 55.3 24.4 23.2 20.3
## 9 398 27-11-2020 13:00 20 27 34.2 14.3 16.3 13.6
## 10 55 13-11-2020 06:00 17 12.3 13.5 5.91 6.47 5.26
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 1: SNGCJA5 VS SPS30
df %>% select(SNGCJA5, SPS30) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
9.64 |
6.51 |
0.43 |
5.04 |
8.36 |
12.70 |
54.8 |
▇▃▁▁▁ |
| SPS30 |
0 |
1 |
11.28 |
7.11 |
1.35 |
6.25 |
9.89 |
14.65 |
62.7 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ SPS30)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.998
ggplot(df, aes(x = SNGCJA5, y = SPS30)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 SPS30",
title = "Relationship between SNGCJA5 and SPS30") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(SPS30 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.772 0.029 26.7 0 0.715 0.829
## 2 SNGCJA5 1.09 0.002 439. 0 1.08 1.09
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID SPS30 SNGCJA5 SPS30_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 4.51 3.31 4.38 0.132
## 2 2 5.69 4.46 5.63 0.06
## 3 3 6.51 5.2 6.44 0.074
## 4 4 4.71 3.45 4.53 0.18
## 5 5 3.27 2.24 3.21 0.058
## 6 6 4.75 3.84 4.96 -0.205
## 7 7 6.02 4.91 6.12 -0.101
## 8 8 8.81 7.37 8.8 0.01
## 9 9 8.69 7.11 8.52 0.173
## 10 10 8.06 6.57 7.93 0.131
## # ... with 793 more rows
#Caso 2: SNGCJA5 VS HPMA115S0
df %>% select(SNGCJA5, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
9.64 |
6.51 |
0.43 |
5.04 |
8.36 |
12.7 |
54.8 |
▇▃▁▁▁ |
| HPMA115S0 |
0 |
1 |
12.43 |
7.54 |
2.25 |
7.26 |
10.70 |
15.4 |
64.6 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.968
ggplot(df, aes(x = SNGCJA5, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 HPMA115S0",
title = "Relationship between SNGCJA5 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 1.63 0.119 13.7 0 1.39 1.86
## 2 SNGCJA5 1.12 0.01 109. 0 1.1 1.14
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID HPMA115S0 SNGCJA5 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 4.75 3.31 5.34 -0.586
## 2 2 6 4.46 6.62 -0.624
## 3 3 6.64 5.2 7.45 -0.813
## 4 4 4.91 3.45 5.49 -0.582
## 5 5 3.13 2.24 4.14 -1.01
## 6 6 5.24 3.84 5.93 -0.689
## 7 7 5.74 4.91 7.13 -1.39
## 8 8 8.41 7.37 9.88 -1.48
## 9 9 7.45 7.11 9.59 -2.14
## 10 10 6.96 6.57 8.99 -2.03
## # ... with 793 more rows
#Caso 3: SNGCJA5 VS PMSA003
df %>% select(SNGCJA5, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
9.64 |
6.51 |
0.43 |
5.04 |
8.36 |
12.7 |
54.8 |
▇▃▁▁▁ |
| PMSA003 |
0 |
1 |
24.25 |
17.10 |
0.84 |
11.70 |
20.90 |
32.9 |
138.0 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.997
ggplot(df, aes(x = SNGCJA5, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMSA003",
title = "Relationship between SNGCJA5 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.997 0.089 -11.2 0 -1.17 -0.822
## 2 SNGCJA5 2.62 0.008 342. 0 2.60 2.63
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID PMSA003 SNGCJA5 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 6.91 3.31 7.67 -0.756
## 2 2 9.69 4.46 10.7 -0.986
## 3 3 11.9 5.2 12.6 -0.713
## 4 4 7.8 3.45 8.03 -0.232
## 5 5 4.55 2.24 4.87 -0.316
## 6 6 7.64 3.84 9.05 -1.41
## 7 7 11.9 4.91 11.9 0.046
## 8 8 18.1 7.37 18.3 -0.192
## 9 9 18.5 7.11 17.6 0.888
## 10 10 16.8 6.57 16.2 0.602
## # ... with 793 more rows
#Caso 4: SNGCJA5 VS PMS7003
df %>% select(SNGCJA5, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
9.64 |
6.51 |
0.43 |
5.04 |
8.36 |
12.7 |
54.8 |
▇▃▁▁▁ |
| PMS7003 |
0 |
1 |
20.51 |
14.00 |
1.00 |
10.50 |
17.70 |
27.2 |
116.0 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.995
ggplot(df, aes(x = SNGCJA5, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMS7003",
title = "Relationship between SNGCJA5 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.116 0.089 -1.30 0.193 -0.291 0.059
## 2 SNGCJA5 2.14 0.008 279. 0 2.12 2.15
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID PMS7003 SNGCJA5 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 6.51 3.31 6.96 -0.453
## 2 2 8.59 4.46 9.42 -0.832
## 3 3 10.4 5.2 11.0 -0.605
## 4 4 7.11 3.45 7.26 -0.152
## 5 5 4.76 2.24 4.68 0.085
## 6 6 7.36 3.84 8.10 -0.737
## 7 7 11.4 4.91 10.4 1.01
## 8 8 15.4 7.37 15.6 -0.246
## 9 9 15.9 7.11 15.1 0.81
## 10 10 14.2 6.57 13.9 0.265
## # ... with 793 more rows
#Caso 5: SNGCJA5 VS Oficial
df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
9.64 |
6.51 |
0.43 |
5.04 |
8.36 |
12.7 |
54.8 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.18 |
14.37 |
0.00 |
16.00 |
22.00 |
32.5 |
124.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.786
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
title = "Relationship between SNGCJA5 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 8.45 0.561 15.1 0 7.35 9.55
## 2 SNGCJA5 1.74 0.048 36.0 0 1.64 1.83
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial SNGCJA5 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 19 3.31 14.2 4.81
## 2 2 17 4.46 16.2 0.811
## 3 3 11 5.2 17.5 -6.47
## 4 4 8 3.45 14.4 -6.44
## 5 5 7 2.24 12.3 -5.34
## 6 6 9 3.84 15.1 -6.11
## 7 7 20 4.91 17.0 3.03
## 8 8 41 7.37 21.2 19.8
## 9 9 24 7.11 20.8 3.21
## 10 10 23 6.57 19.8 3.15
## # ... with 793 more rows
#Caso 6: SPS30 VS HPMA115S0
df %>% select(SPS30, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
11.28 |
7.11 |
1.35 |
6.25 |
9.89 |
14.65 |
62.7 |
▇▃▁▁▁ |
| HPMA115S0 |
0 |
1 |
12.43 |
7.54 |
2.25 |
7.26 |
10.70 |
15.40 |
64.6 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.957
ggplot(df, aes(x = SPS30, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 HPMA115S0",
title = "Relationship between SPS30 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.992 0.145 6.84 0 0.707 1.28
## 2 SPS30 1.01 0.011 93.2 0 0.993 1.04
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID HPMA115S0 SPS30 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 4.75 4.51 5.57 -0.817
## 2 2 6 5.69 6.76 -0.764
## 3 3 6.64 6.51 7.60 -0.956
## 4 4 4.91 4.71 5.77 -0.86
## 5 5 3.13 3.27 4.31 -1.18
## 6 6 5.24 4.75 5.81 -0.571
## 7 7 5.74 6.02 7.10 -1.36
## 8 8 8.41 8.81 9.93 -1.52
## 9 9 7.45 8.69 9.81 -2.36
## 10 10 6.96 8.06 9.17 -2.21
## # ... with 793 more rows
#Caso 7: SPS30 VS PMSA003
df %>% select(SPS30, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
11.28 |
7.11 |
1.35 |
6.25 |
9.89 |
14.65 |
62.7 |
▇▃▁▁▁ |
| PMSA003 |
0 |
1 |
24.25 |
17.10 |
0.84 |
11.70 |
20.90 |
32.90 |
138.0 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.996
ggplot(df, aes(x = SPS30, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMSA003",
title = "Relationship between SPS30 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -2.79 0.097 -28.8 0 -2.98 -2.60
## 2 SPS30 2.40 0.007 330. 0 2.38 2.41
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID PMSA003 SPS30 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 6.91 4.51 8.02 -1.11
## 2 2 9.69 5.69 10.8 -1.16
## 3 3 11.9 6.51 12.8 -0.916
## 4 4 7.8 4.71 8.50 -0.701
## 5 5 4.55 3.27 5.05 -0.499
## 6 6 7.64 4.75 8.60 -0.957
## 7 7 11.9 6.02 11.6 0.259
## 8 8 18.1 8.81 18.3 -0.229
## 9 9 18.5 8.69 18.0 0.458
## 10 10 16.8 8.06 16.5 0.269
## # ... with 793 more rows
#Caso 8: SPS30 VS PMS7003
df %>% select(SPS30, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
11.28 |
7.11 |
1.35 |
6.25 |
9.89 |
14.65 |
62.7 |
▇▃▁▁▁ |
| PMS7003 |
0 |
1 |
20.51 |
14.00 |
1.00 |
10.50 |
17.70 |
27.20 |
116.0 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.992
ggplot(df, aes(x = SPS30, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMS7003",
title = "Relationship between SPS30 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -1.53 0.114 -13.4 0 -1.75 -1.30
## 2 SPS30 1.95 0.009 228. 0 1.94 1.97
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID PMS7003 SPS30 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 6.51 4.51 7.28 -0.774
## 2 2 8.59 5.69 9.59 -1
## 3 3 10.4 6.51 11.2 -0.793
## 4 4 7.11 4.71 7.68 -0.565
## 5 5 4.76 3.27 4.86 -0.101
## 6 6 7.36 4.75 7.75 -0.393
## 7 7 11.4 6.02 10.2 1.16
## 8 8 15.4 8.81 15.7 -0.287
## 9 9 15.9 8.69 15.5 0.447
## 10 10 14.2 8.06 14.2 -0.022
## # ... with 793 more rows
#Caso 9: SPS30 VS Oficial
df %>% select(SPS30, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
11.28 |
7.11 |
1.35 |
6.25 |
9.89 |
14.65 |
62.7 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.18 |
14.37 |
0.00 |
16.00 |
22.00 |
32.50 |
124.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.766
ggplot(df, aes(x = SPS30, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 Oficial",
title = "Relationship between SPS30 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 7.71 0.612 12.6 0 6.51 8.91
## 2 SPS30 1.55 0.046 33.8 0 1.46 1.64
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial SPS30 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 19 4.51 14.7 4.30
## 2 2 17 5.69 16.5 0.475
## 3 3 11 6.51 17.8 -6.80
## 4 4 8 4.71 15.0 -7.01
## 5 5 7 3.27 12.8 -5.78
## 6 6 9 4.75 15.1 -6.07
## 7 7 20 6.02 17.0 2.96
## 8 8 41 8.81 21.4 19.6
## 9 9 24 8.69 21.2 2.83
## 10 10 23 8.06 20.2 2.80
## # ... with 793 more rows
#Caso 10: HPMA115S0 VS PMSA003
df %>% select(HPMA115S0, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
12.43 |
7.54 |
2.25 |
7.26 |
10.7 |
15.4 |
64.6 |
▇▂▁▁▁ |
| PMSA003 |
0 |
1 |
24.25 |
17.10 |
0.84 |
11.70 |
20.9 |
32.9 |
138.0 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.957
ggplot(df, aes(x = HPMA115S0, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMSA003",
title = "Relationship between HPMA115S0 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -2.76 0.337 -8.2 0 -3.43 -2.10
## 2 HPMA115S0 2.17 0.023 93.7 0 2.13 2.22
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID PMSA003 HPMA115S0 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 6.91 4.75 7.56 -0.645
## 2 2 9.69 6 10.3 -0.580
## 3 3 11.9 6.64 11.7 0.239
## 4 4 7.8 4.91 7.90 -0.102
## 5 5 4.55 3.13 4.04 0.514
## 6 6 7.64 5.24 8.62 -0.979
## 7 7 11.9 5.74 9.70 2.19
## 8 8 18.1 8.41 15.5 2.59
## 9 9 18.5 7.45 13.4 5.08
## 10 10 16.8 6.96 12.4 4.44
## # ... with 793 more rows
#Caso 11: HPMA115S0 VS PMS7003
df %>% select(HPMA115S0, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
12.43 |
7.54 |
2.25 |
7.26 |
10.7 |
15.4 |
64.6 |
▇▂▁▁▁ |
| PMS7003 |
0 |
1 |
20.51 |
14.00 |
1.00 |
10.50 |
17.7 |
27.2 |
116.0 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.963
ggplot(df, aes(x = HPMA115S0, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMS7003",
title = "Relationship between HPMA115S0 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -1.72 0.258 -6.68 0 -2.23 -1.22
## 2 HPMA115S0 1.79 0.018 101. 0 1.75 1.82
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID PMS7003 HPMA115S0 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 6.51 4.75 6.77 -0.261
## 2 2 8.59 6 9.01 -0.416
## 3 3 10.4 6.64 10.2 0.249
## 4 4 7.11 4.91 7.06 0.053
## 5 5 4.76 3.13 3.87 0.886
## 6 6 7.36 5.24 7.65 -0.287
## 7 7 11.4 5.74 8.54 2.86
## 8 8 15.4 8.41 13.3 2.08
## 9 9 15.9 7.45 11.6 4.30
## 10 10 14.2 6.96 10.7 3.48
## # ... with 793 more rows
#Caso 10: HPMA115S0 VS Oficial
df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
12.43 |
7.54 |
2.25 |
7.26 |
10.7 |
15.4 |
64.6 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
25.18 |
14.37 |
0.00 |
16.00 |
22.0 |
32.5 |
124.0 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.813
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
title = "Relationship between HPMA115S0 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 5.91 0.571 10.4 0 4.79 7.03
## 2 HPMA115S0 1.55 0.039 39.5 0 1.47 1.63
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial HPMA115S0 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 19 4.75 13.3 5.72
## 2 2 17 6 15.2 1.79
## 3 3 11 6.64 16.2 -5.20
## 4 4 8 4.91 13.5 -5.52
## 5 5 7 3.13 10.8 -3.76
## 6 6 9 5.24 14.0 -5.04
## 7 7 20 5.74 14.8 5.19
## 8 8 41 8.41 18.9 22.1
## 9 9 24 7.45 17.5 6.54
## 10 10 23 6.96 16.7 6.30
## # ... with 793 more rows
#Caso 11: PMSA003 VS PMS7003
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
24.25 |
17.10 |
0.84 |
11.7 |
20.9 |
32.9 |
138 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.18 |
14.37 |
0.00 |
16.0 |
22.0 |
32.5 |
124 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.785
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 9.19 0.546 16.8 0 8.12 10.3
## 2 PMSA003 0.66 0.018 35.9 0 0.624 0.696
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 19 6.91 13.8 5.25
## 2 2 17 9.69 15.6 1.42
## 3 3 11 11.9 17.0 -6.04
## 4 4 8 7.8 14.3 -6.34
## 5 5 7 4.55 12.2 -5.19
## 6 6 9 7.64 14.2 -5.23
## 7 7 20 11.9 17.0 2.96
## 8 8 41 18.1 21.1 19.9
## 9 9 24 18.5 21.4 2.60
## 10 10 23 16.8 20.3 2.73
## # ... with 793 more rows
#Caso 12: PMSA003 VS Oficial
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
24.25 |
17.10 |
0.84 |
11.7 |
20.9 |
32.9 |
138 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.18 |
14.37 |
0.00 |
16.0 |
22.0 |
32.5 |
124 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.785
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 9.19 0.546 16.8 0 8.12 10.3
## 2 PMSA003 0.66 0.018 35.9 0 0.624 0.696
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 19 6.91 13.8 5.25
## 2 2 17 9.69 15.6 1.42
## 3 3 11 11.9 17.0 -6.04
## 4 4 8 7.8 14.3 -6.34
## 5 5 7 4.55 12.2 -5.19
## 6 6 9 7.64 14.2 -5.23
## 7 7 20 11.9 17.0 2.96
## 8 8 41 18.1 21.1 19.9
## 9 9 24 18.5 21.4 2.60
## 10 10 23 16.8 20.3 2.73
## # ... with 793 more rows
#Caso 13: PMS7003 VS Oficial
df %>% select(PMS7003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
803 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMS7003 |
0 |
1 |
20.51 |
14.00 |
1 |
10.5 |
17.7 |
27.2 |
116 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
25.18 |
14.37 |
0 |
16.0 |
22.0 |
32.5 |
124 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.800
ggplot(df, aes(x = PMS7003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMS7003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 8.34 0.54 15.4 0 7.27 9.40
## 2 PMS7003 0.822 0.022 37.8 0 0.779 0.864
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 803 x 5
## ID Oficial PMS7003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 19 6.51 13.7 5.32
## 2 2 17 8.59 15.4 1.61
## 3 3 11 10.4 16.9 -5.88
## 4 4 8 7.11 14.2 -6.18
## 5 5 7 4.76 12.2 -5.24
## 6 6 9 7.36 14.4 -5.38
## 7 7 20 11.4 17.7 2.3
## 8 8 41 15.4 21.0 20.0
## 9 9 24 15.9 21.4 2.60
## 10 10 23 14.2 20.0 3.00
## # ... with 793 more rows