library(readxl)
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.1 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(moderndive)
library(skimr)
# **December ESTACION PAIBA VS CANAIRIOS**
# **5 different sensors: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
df <- read_excel("C:/Mediciones/PAIBA_CANAIRIOS_DIC.xlsx")
View(df)
glimpse(df)
## Rows: 697
## Columns: 8
## $ Num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha <dttm> 2020-12-01 00:00:00, 2020-12-01 01:00:00, 2020-12-01 02:...
## $ Oficial <dbl> 8.12, 7.27, 10.32, 9.91, 12.52, 13.31, 14.70, 21.35, 23.8...
## $ PMS7003 <dbl> 7.218182, 8.224138, 11.259259, 14.290909, 15.464286, 13.2...
## $ PMSA003 <dbl> 5.363636, 6.724138, 10.407407, 14.545455, 15.285714, 12.4...
## $ HPMA115S0 <dbl> 5.345455, 5.413793, 7.425926, 9.490909, 9.982143, 8.19642...
## $ SPS30 <dbl> 4.363636, 4.862069, 6.740741, 8.654545, 9.178571, 7.28571...
## $ SNGCJA5 <dbl> 2.600000, 3.068966, 4.666667, 6.236364, 6.642857, 5.30357...
df %>%
sample_n(size = 10)
## # A tibble: 10 x 8
## Num Fecha Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
## <dbl> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 436 2020-12-19 03:00:00 11.5 8.74 7.40 5.43 5.34 3.36
## 2 428 2020-12-18 19:00:00 29.1 34.5 35.6 18.9 18.4 13.3
## 3 245 2020-12-11 04:00:00 24.4 38.7 43.3 23.8 23.4 17.3
## 4 129 2020-12-06 08:00:00 8.07 3.71 2.8 2.67 2.56 1.18
## 5 78 2020-12-04 05:00:00 21.7 21.5 22.9 10.9 10.9 8.80
## 6 621 2020-12-26 20:00:00 12.3 12.6 11.9 8.29 8.25 5.14
## 7 71 2020-12-03 22:00:00 22.7 27.9 29.9 16.5 15.9 11.5
## 8 95 2020-12-04 22:00:00 9.73 4.11 3.85 3.36 2.87 1.55
## 9 148 2020-12-07 03:00:00 11.2 8.67 7.76 5.44 5.28 3.57
## 10 622 2020-12-26 21:00:00 12.0 5.74 3.65 4.09 3.93 2.04
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 1: SNGCJA5 VS SPS30
df %>% select(SNGCJA5, SPS30) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
697 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
7.94 |
6.43 |
0.10 |
2.96 |
6.56 |
10.75 |
40.80 |
▇▃▁▁▁ |
| SPS30 |
0 |
1 |
11.29 |
8.46 |
1.17 |
4.85 |
9.30 |
15.11 |
51.67 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ SPS30)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.995
ggplot(df, aes(x = SNGCJA5, y = SPS30)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 SPS30",
title = "Relationship between SNGCJA5 and SPS30") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(SPS30 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.907 0.051 17.9 0 0.807 1.01
## 2 SNGCJA5 1.31 0.005 263. 0 1.30 1.32
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 697 x 5
## ID SPS30 SNGCJA5 SPS30_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 4.36 2.6 4.31 0.056
## 2 2 4.86 3.07 4.92 -0.06
## 3 3 6.74 4.67 7.01 -0.271
## 4 4 8.65 6.24 9.06 -0.411
## 5 5 9.18 6.64 9.60 -0.418
## 6 6 7.29 5.30 7.84 -0.559
## 7 7 6.33 4.72 7.09 -0.755
## 8 8 10.8 8.52 12.1 -1.23
## 9 9 16.7 12.5 17.3 -0.598
## 10 10 16.9 12.4 17.1 -0.225
## # ... with 687 more rows
#Caso 2: SNGCJA5 VS HPMA115S0
df %>% select(SNGCJA5, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
697 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
7.94 |
6.43 |
0.10 |
2.96 |
6.56 |
10.75 |
40.80 |
▇▃▁▁▁ |
| HPMA115S0 |
0 |
1 |
11.76 |
8.49 |
1.35 |
5.31 |
9.60 |
15.41 |
51.89 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.991
ggplot(df, aes(x = SNGCJA5, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 HPMA115S0",
title = "Relationship between SNGCJA5 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 1.38 0.07 19.9 0 1.25 1.52
## 2 SNGCJA5 1.31 0.007 192. 0 1.29 1.32
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 697 x 5
## ID HPMA115S0 SNGCJA5 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 5.34 2.6 4.78 0.563
## 2 2 5.41 3.07 5.40 0.018
## 3 3 7.43 4.67 7.48 -0.059
## 4 4 9.49 6.24 9.54 -0.047
## 5 5 9.98 6.64 10.1 -0.087
## 6 6 8.20 5.30 8.32 -0.121
## 7 7 7.31 4.72 7.56 -0.248
## 8 8 11.7 8.52 12.5 -0.784
## 9 9 17 12.5 17.8 -0.752
## 10 10 17.8 12.4 17.6 0.214
## # ... with 687 more rows
#Caso 3: SNGCJA5 VS PMSA003
df %>% select(SNGCJA5, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
697 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
7.94 |
6.43 |
0.10 |
2.96 |
6.56 |
10.75 |
40.80 |
▇▃▁▁▁ |
| PMSA003 |
0 |
1 |
20.29 |
17.42 |
0.18 |
6.54 |
16.23 |
28.48 |
108.45 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.998
ggplot(df, aes(x = SNGCJA5, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMSA003",
title = "Relationship between SNGCJA5 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -1.14 0.069 -16.5 0 -1.28 -1.01
## 2 SNGCJA5 2.70 0.007 398. 0 2.69 2.71
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 697 x 5
## ID PMSA003 SNGCJA5 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 5.36 2.6 5.88 -0.515
## 2 2 6.72 3.07 7.15 -0.422
## 3 3 10.4 4.67 11.5 -1.05
## 4 4 14.5 6.24 15.7 -1.16
## 5 5 15.3 6.64 16.8 -1.51
## 6 6 12.5 5.30 13.2 -0.7
## 7 7 11.5 4.72 11.6 -0.15
## 8 8 22.8 8.52 21.9 0.89
## 9 9 34.1 12.5 32.7 1.46
## 10 10 32.8 12.4 32.3 0.439
## # ... with 687 more rows
#Caso 4: SNGCJA5 VS PMS7003
df %>% select(SNGCJA5, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
697 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
7.94 |
6.43 |
0.10 |
2.96 |
6.56 |
10.75 |
40.80 |
▇▃▁▁▁ |
| PMS7003 |
0 |
1 |
20.12 |
15.64 |
0.96 |
7.84 |
16.61 |
27.35 |
93.25 |
▇▃▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.988
ggplot(df, aes(x = SNGCJA5, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMS7003",
title = "Relationship between SNGCJA5 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 1.07 0.147 7.28 0 0.78 1.36
## 2 SNGCJA5 2.40 0.014 167. 0 2.37 2.43
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 697 x 5
## ID PMS7003 SNGCJA5 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 7.22 2.6 7.31 -0.092
## 2 2 8.22 3.07 8.44 -0.212
## 3 3 11.3 4.67 12.3 -1.01
## 4 4 14.3 6.24 16.0 -1.75
## 5 5 15.5 6.64 17.0 -1.55
## 6 6 13.3 5.30 13.8 -0.515
## 7 7 11.9 4.72 12.4 -0.471
## 8 8 22.1 8.52 21.5 0.578
## 9 9 32.0 12.5 31.1 0.897
## 10 10 30.2 12.4 30.8 -0.606
## # ... with 687 more rows
#Caso 5: SNGCJA5 VS Oficial
df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
697 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
7.94 |
6.43 |
0.10 |
2.96 |
6.56 |
10.75 |
40.80 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
18.30 |
9.22 |
3.47 |
11.12 |
16.86 |
23.87 |
62.22 |
▇▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.606
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
title = "Relationship between SNGCJA5 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 11.4 0.442 25.8 0 10.5 12.3
## 2 SNGCJA5 0.869 0.043 20.1 0 0.784 0.954
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 697 x 5
## ID Oficial SNGCJA5 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 8.12 2.6 13.7 -5.54
## 2 2 7.27 3.07 14.1 -6.80
## 3 3 10.3 4.67 15.5 -5.13
## 4 4 9.91 6.24 16.8 -6.91
## 5 5 12.5 6.64 17.2 -4.65
## 6 6 13.3 5.30 16.0 -2.70
## 7 7 14.7 4.72 15.5 -0.805
## 8 8 21.4 8.52 18.8 2.55
## 9 9 23.9 12.5 22.3 1.59
## 10 10 22.9 12.4 22.2 0.745
## # ... with 687 more rows
#Caso 6: SPS30 VS HPMA115S0
df %>% select(SPS30, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
697 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
11.29 |
8.46 |
1.17 |
4.85 |
9.3 |
15.11 |
51.67 |
▇▃▁▁▁ |
| HPMA115S0 |
0 |
1 |
11.76 |
8.49 |
1.35 |
5.31 |
9.6 |
15.41 |
51.89 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.995
ggplot(df, aes(x = SPS30, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 HPMA115S0",
title = "Relationship between SPS30 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.483 0.053 9.05 0 0.378 0.588
## 2 SPS30 0.999 0.004 264. 0 0.992 1.01
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 697 x 5
## ID HPMA115S0 SPS30 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 5.34 4.36 4.84 0.503
## 2 2 5.41 4.86 5.34 0.074
## 3 3 7.43 6.74 7.22 0.209
## 4 4 9.49 8.65 9.13 0.362
## 5 5 9.98 9.18 9.65 0.33
## 6 6 8.20 7.29 7.76 0.435
## 7 7 7.31 6.33 6.81 0.504
## 8 8 11.7 10.8 11.3 0.448
## 9 9 17 16.7 17.2 -0.151
## 10 10 17.8 16.9 17.4 0.442
## # ... with 687 more rows
#Caso 7: SPS30 VS PMSA003
df %>% select(SPS30, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
697 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
11.29 |
8.46 |
1.17 |
4.85 |
9.30 |
15.11 |
51.67 |
▇▃▁▁▁ |
| PMSA003 |
0 |
1 |
20.29 |
17.42 |
0.18 |
6.54 |
16.23 |
28.48 |
108.45 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.995
ggplot(df, aes(x = SPS30, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMSA003",
title = "Relationship between SPS30 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -2.83 0.114 -24.7 0 -3.05 -2.60
## 2 SPS30 2.05 0.008 253. 0 2.03 2.06
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 697 x 5
## ID PMSA003 SPS30 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 5.36 4.36 6.11 -0.747
## 2 2 6.72 4.86 7.13 -0.407
## 3 3 10.4 6.74 11.0 -0.571
## 4 4 14.5 8.65 14.9 -0.353
## 5 5 15.3 9.18 16.0 -0.686
## 6 6 12.5 7.29 12.1 0.387
## 7 7 11.5 6.33 10.1 1.33
## 8 8 22.8 10.8 19.3 3.43
## 9 9 34.1 16.7 31.3 2.78
## 10 10 32.8 16.9 31.8 0.998
## # ... with 687 more rows
#Caso 8: SPS30 VS PMS7003
df %>% select(SPS30, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
697 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
11.29 |
8.46 |
1.17 |
4.85 |
9.30 |
15.11 |
51.67 |
▇▃▁▁▁ |
| PMS7003 |
0 |
1 |
20.12 |
15.64 |
0.96 |
7.84 |
16.61 |
27.35 |
93.25 |
▇▃▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.993
ggplot(df, aes(x = SPS30, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMS7003",
title = "Relationship between SPS30 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.602 0.117 -5.17 0 -0.831 -0.374
## 2 SPS30 1.84 0.008 222. 0 1.82 1.85
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 697 x 5
## ID PMS7003 SPS30 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 7.22 4.36 7.41 -0.19
## 2 2 8.22 4.86 8.32 -0.099
## 3 3 11.3 6.74 11.8 -0.512
## 4 4 14.3 8.65 15.3 -0.994
## 5 5 15.5 9.18 16.2 -0.782
## 6 6 13.3 7.29 12.8 0.514
## 7 7 11.9 6.33 11.0 0.917
## 8 8 22.1 10.8 19.3 2.84
## 9 9 32.0 16.7 30.0 1.99
## 10 10 30.2 16.9 30.4 -0.197
## # ... with 687 more rows
#Caso 9: SPS30 VS Oficial
df %>% select(SPS30, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
697 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
11.29 |
8.46 |
1.17 |
4.85 |
9.30 |
15.11 |
51.67 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
18.30 |
9.22 |
3.47 |
11.12 |
16.86 |
23.87 |
62.22 |
▇▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.573
ggplot(df, aes(x = SPS30, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 Oficial",
title = "Relationship between SPS30 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 11.2 0.478 23.5 0 10.3 12.2
## 2 SPS30 0.625 0.034 18.4 0 0.559 0.692
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 697 x 5
## ID Oficial SPS30 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 8.12 4.36 14.0 -5.85
## 2 2 7.27 4.86 14.3 -7.01
## 3 3 10.3 6.74 15.5 -5.13
## 4 4 9.91 8.65 16.6 -6.74
## 5 5 12.5 9.18 17.0 -4.46
## 6 6 13.3 7.29 15.8 -2.48
## 7 7 14.7 6.33 15.2 -0.498
## 8 8 21.4 10.8 18.0 3.35
## 9 9 23.9 16.7 21.7 2.20
## 10 10 22.9 16.9 21.8 1.11
## # ... with 687 more rows
#Caso 10: HPMA115S0 VS PMSA003
df %>% select(HPMA115S0, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
697 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
11.76 |
8.49 |
1.35 |
5.31 |
9.60 |
15.41 |
51.89 |
▇▃▁▁▁ |
| PMSA003 |
0 |
1 |
20.29 |
17.42 |
0.18 |
6.54 |
16.23 |
28.48 |
108.45 |
▇▃▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.988
ggplot(df, aes(x = HPMA115S0, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMSA003",
title = "Relationship between HPMA115S0 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -3.55 0.171 -20.7 0 -3.88 -3.21
## 2 HPMA115S0 2.03 0.012 172. 0 2.00 2.05
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 697 x 5
## ID PMSA003 HPMA115S0 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 5.36 5.34 7.29 -1.92
## 2 2 6.72 5.41 7.43 -0.703
## 3 3 10.4 7.43 11.5 -1.10
## 4 4 14.5 9.49 15.7 -1.15
## 5 5 15.3 9.98 16.7 -1.40
## 6 6 12.5 8.20 13.1 -0.586
## 7 7 11.5 7.31 11.3 0.192
## 8 8 22.8 11.7 20.3 2.51
## 9 9 34.1 17 30.9 3.21
## 10 10 32.8 17.8 32.5 0.23
## # ... with 687 more rows
#Caso 11: HPMA115S0 VS PMS7003
df %>% select(HPMA115S0, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
697 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
11.76 |
8.49 |
1.35 |
5.31 |
9.60 |
15.41 |
51.89 |
▇▃▁▁▁ |
| PMS7003 |
0 |
1 |
20.12 |
15.64 |
0.96 |
7.84 |
16.61 |
27.35 |
93.25 |
▇▃▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.993
ggplot(df, aes(x = HPMA115S0, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMS7003",
title = "Relationship between HPMA115S0 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -1.39 0.119 -11.7 0 -1.62 -1.15
## 2 HPMA115S0 1.83 0.008 224. 0 1.81 1.84
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 697 x 5
## ID PMS7003 HPMA115S0 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 7.22 5.34 8.39 -1.17
## 2 2 8.22 5.41 8.52 -0.291
## 3 3 11.3 7.43 12.2 -0.935
## 4 4 14.3 9.49 16.0 -1.68
## 5 5 15.5 9.98 16.9 -1.40
## 6 6 13.3 8.20 13.6 -0.318
## 7 7 11.9 7.31 12.0 -0.048
## 8 8 22.1 11.7 20.1 2.02
## 9 9 32.0 17 29.7 2.32
## 10 10 30.2 17.8 31.2 -0.955
## # ... with 687 more rows
#Caso 10: HPMA115S0 VS Oficial
df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
697 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
11.76 |
8.49 |
1.35 |
5.31 |
9.60 |
15.41 |
51.89 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
18.30 |
9.22 |
3.47 |
11.12 |
16.86 |
23.87 |
62.22 |
▇▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.616
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
title = "Relationship between HPMA115S0 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 10.4 0.471 22.2 0 9.50 11.4
## 2 HPMA115S0 0.669 0.032 20.6 0 0.605 0.733
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 697 x 5
## ID Oficial HPMA115S0 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 8.12 5.34 14.0 -5.88
## 2 2 7.27 5.41 14.0 -6.78
## 3 3 10.3 7.43 15.4 -5.08
## 4 4 9.91 9.49 16.8 -6.87
## 5 5 12.5 9.98 17.1 -4.59
## 6 6 13.3 8.20 15.9 -2.60
## 7 7 14.7 7.31 15.3 -0.621
## 8 8 21.4 11.7 18.3 3.07
## 9 9 23.9 17 21.8 2.07
## 10 10 22.9 17.8 22.3 0.576
## # ... with 687 more rows
#Caso 11: PMSA003 VS PMS7003
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
697 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
20.29 |
17.42 |
0.18 |
6.54 |
16.23 |
28.48 |
108.45 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
18.30 |
9.22 |
3.47 |
11.12 |
16.86 |
23.87 |
62.22 |
▇▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.607
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 11.8 0.427 27.6 0 10.9 12.6
## 2 PMSA003 0.322 0.016 20.2 0 0.290 0.353
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 697 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 8.12 5.36 13.5 -5.37
## 2 2 7.27 6.72 13.9 -6.66
## 3 3 10.3 10.4 15.1 -4.80
## 4 4 9.91 14.5 16.4 -6.54
## 5 5 12.5 15.3 16.7 -4.16
## 6 6 13.3 12.5 15.8 -2.47
## 7 7 14.7 11.5 15.5 -0.758
## 8 8 21.4 22.8 19.1 2.26
## 9 9 23.9 34.1 22.7 1.12
## 10 10 22.9 32.8 22.3 0.605
## # ... with 687 more rows
#Caso 12: PMSA003 VS Oficial
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
697 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
20.29 |
17.42 |
0.18 |
6.54 |
16.23 |
28.48 |
108.45 |
▇▃▁▁▁ |
| Oficial |
0 |
1 |
18.30 |
9.22 |
3.47 |
11.12 |
16.86 |
23.87 |
62.22 |
▇▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.607
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 11.8 0.427 27.6 0 10.9 12.6
## 2 PMSA003 0.322 0.016 20.2 0 0.290 0.353
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 697 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 8.12 5.36 13.5 -5.37
## 2 2 7.27 6.72 13.9 -6.66
## 3 3 10.3 10.4 15.1 -4.80
## 4 4 9.91 14.5 16.4 -6.54
## 5 5 12.5 15.3 16.7 -4.16
## 6 6 13.3 12.5 15.8 -2.47
## 7 7 14.7 11.5 15.5 -0.758
## 8 8 21.4 22.8 19.1 2.26
## 9 9 23.9 34.1 22.7 1.12
## 10 10 22.9 32.8 22.3 0.605
## # ... with 687 more rows
#Caso 13: PMS7003 VS Oficial
df %>% select(PMS7003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
697 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMS7003 |
0 |
1 |
20.12 |
15.64 |
0.96 |
7.84 |
16.61 |
27.35 |
93.25 |
▇▃▂▁▁ |
| Oficial |
0 |
1 |
18.30 |
9.22 |
3.47 |
11.12 |
16.86 |
23.87 |
62.22 |
▇▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.614
ggplot(df, aes(x = PMS7003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMS7003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 11.0 0.45 24.5 0 10.1 11.9
## 2 PMS7003 0.362 0.018 20.5 0 0.327 0.397
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 697 x 5
## ID Oficial PMS7003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 8.12 7.22 13.6 -5.50
## 2 2 7.27 8.22 14.0 -6.72
## 3 3 10.3 11.3 15.1 -4.77
## 4 4 9.91 14.3 16.2 -6.28
## 5 5 12.5 15.5 16.6 -4.09
## 6 6 13.3 13.3 15.8 -2.51
## 7 7 14.7 11.9 15.3 -0.634
## 8 8 21.4 22.1 19.0 2.34
## 9 9 23.9 32.0 22.6 1.27
## 10 10 22.9 30.2 21.9 0.964
## # ... with 687 more rows