library(readxl)
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.1 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(moderndive)
library(skimr)
# **ESTACION KENNEDY VS CANAIRIOS**
# **5 different sensors: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
df <- read_excel("C:/Mediciones/FERIAS_CANAIRIOS.xlsx")
#View(df)
glimpse(df)
## Rows: 1,115
## Columns: 8
## $ Num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha <chr> "12-11-2020 24:00", "13-11-2020 01:00", "13-11-2020 02:00...
## $ Oficial <dbl> 3.7, 0.3, 5.9, 8.1, 3.7, 4.0, 4.8, 6.0, 8.3, 9.8, 16.5, 6...
## $ PMS7003 <dbl> 0.00, 0.29, 0.71, 1.40, 3.12, 4.16, 6.13, 7.61, 8.40, 20....
## $ PMSA003 <dbl> 0.0169, 0.0000, 1.6100, 0.4100, 1.7200, 2.6500, 4.0400, 5...
## $ HPMA115S0 <dbl> 18.6, 19.1, 19.4, 19.7, 20.4, 20.6, 22.0, 22.8, 23.7, 28....
## $ SPS30 <dbl> 0.90, 1.02, 1.25, 1.78, 2.60, 3.00, 4.11, 4.94, 5.48, 11....
## $ SNGCJA5 <dbl> 0.00, 0.18, 0.45, 0.90, 1.51, 1.81, 2.86, 3.39, 3.48, 8.2...
df %>%
sample_n(size = 10)
## # A tibble: 10 x 8
## Num Fecha Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1208 12-01-2021 10:00 10 0.0364 0.0182 23.2 1.05 12.1
## 2 98 17-11-2020 02:00 7.2 7.21 5.31 25.4 5.28 3.97
## 3 909 23-12-2020 03:00 21 6.57 4.43 25.3 4.29 3.25
## 4 1203 12-01-2021 05:00 7 0.491 0.151 22.2 1.15 15.7
## 5 1115 08-01-2021 16:00 23 14.4 13.3 31.6 9.62 6.96
## 6 538 04-12-2020 19:00 10.3 12.4 11.4 28.1 7.25 6.5
## 7 780 17-12-2020 05:00 10 4.09 3.12 23.9 2.96 2.02
## 8 1021 28-12-2020 03:00 14 7.76 6.21 27.5 5.9 4.31
## 9 400 29-11-2020 16:00 3.6 1.05 0.0545 21.4 1.13 1.27
## 10 1040 05-01-2021 15:00 20 15 14.8 30.5 9.23 7.77
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 1: SNGCJA5 VS SPS30
df %>% select(SNGCJA5, SPS30) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.69 |
9.75 |
0.00 |
1.53 |
4.81 |
9.76 |
236.87 |
▇▁▁▁▁ |
| SPS30 |
0 |
1 |
7.32 |
6.66 |
0.08 |
1.97 |
5.52 |
10.65 |
51.50 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ SPS30)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.496
ggplot(df, aes(x = SNGCJA5, y = SPS30)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 SPS30",
title = "Relationship between SNGCJA5 and SPS30") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(SPS30 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 5.05 0.21 24.0 0 4.64 5.46
## 2 SNGCJA5 0.339 0.018 19.1 0 0.304 0.374
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID SPS30 SNGCJA5 SPS30_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.9 0 5.05 -4.15
## 2 2 1.02 0.18 5.11 -4.09
## 3 3 1.25 0.45 5.20 -3.96
## 4 4 1.78 0.9 5.36 -3.58
## 5 5 2.6 1.51 5.56 -2.96
## 6 6 3 1.81 5.66 -2.66
## 7 7 4.11 2.86 6.02 -1.91
## 8 8 4.94 3.39 6.20 -1.26
## 9 9 5.48 3.48 6.23 -0.751
## 10 10 11.7 8.23 7.84 3.86
## # ... with 1,105 more rows
#Caso 2: SNGCJA5 VS HPMA115S0
df %>% select(SNGCJA5, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.69 |
9.75 |
0.0 |
1.53 |
4.81 |
9.76 |
236.87 |
▇▁▁▁▁ |
| HPMA115S0 |
0 |
1 |
28.48 |
7.27 |
18.6 |
23.10 |
26.40 |
31.05 |
77.70 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.500
ggplot(df, aes(x = SNGCJA5, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 HPMA115S0",
title = "Relationship between SNGCJA5 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 26.0 0.229 114. 0 25.5 26.4
## 2 SNGCJA5 0.373 0.019 19.2 0 0.335 0.411
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID HPMA115S0 SNGCJA5 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 18.6 0 26.0 -7.39
## 2 2 19.1 0.18 26.1 -6.96
## 3 3 19.4 0.45 26.2 -6.76
## 4 4 19.7 0.9 26.3 -6.63
## 5 5 20.4 1.51 26.6 -6.16
## 6 6 20.6 1.81 26.7 -6.07
## 7 7 22 2.86 27.1 -5.06
## 8 8 22.8 3.39 27.3 -4.46
## 9 9 23.7 3.48 27.3 -3.59
## 10 10 28.6 8.23 29.1 -0.459
## # ... with 1,105 more rows
#Caso 3: SNGCJA5 VS PMSA003
df %>% select(SNGCJA5, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.69 |
9.75 |
0 |
1.53 |
4.81 |
9.76 |
236.87 |
▇▁▁▁▁ |
| PMSA003 |
0 |
1 |
10.46 |
11.99 |
0 |
0.85 |
6.48 |
16.20 |
89.00 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.503
ggplot(df, aes(x = SNGCJA5, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMSA003",
title = "Relationship between SNGCJA5 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 6.33 0.376 16.8 0 5.59 7.07
## 2 SNGCJA5 0.618 0.032 19.4 0 0.556 0.681
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID PMSA003 SNGCJA5 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.017 0 6.33 -6.31
## 2 2 0 0.18 6.44 -6.44
## 3 3 1.61 0.45 6.61 -5.00
## 4 4 0.41 0.9 6.88 -6.47
## 5 5 1.72 1.51 7.26 -5.54
## 6 6 2.65 1.81 7.45 -4.80
## 7 7 4.04 2.86 8.10 -4.06
## 8 8 5.35 3.39 8.42 -3.07
## 9 9 6.25 3.48 8.48 -2.23
## 10 10 18.5 8.23 11.4 7.08
## # ... with 1,105 more rows
#Caso 4: SNGCJA5 VS PMS7003
df %>% select(SNGCJA5, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.69 |
9.75 |
0 |
1.53 |
4.81 |
9.76 |
236.87 |
▇▁▁▁▁ |
| PMS7003 |
0 |
1 |
11.60 |
11.87 |
0 |
1.94 |
8.34 |
17.90 |
91.80 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.496
ggplot(df, aes(x = SNGCJA5, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMS7003",
title = "Relationship between SNGCJA5 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 7.56 0.374 20.2 0 6.83 8.30
## 2 SNGCJA5 0.604 0.032 19.1 0 0.542 0.666
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID PMS7003 SNGCJA5 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0 0 7.56 -7.56
## 2 2 0.290 0.18 7.67 -7.38
## 3 3 0.71 0.45 7.84 -7.13
## 4 4 1.4 0.9 8.11 -6.71
## 5 5 3.12 1.51 8.48 -5.36
## 6 6 4.16 1.81 8.66 -4.50
## 7 7 6.13 2.86 9.29 -3.16
## 8 8 7.61 3.39 9.61 -2.00
## 9 9 8.4 3.48 9.67 -1.27
## 10 10 20.2 8.23 12.5 7.66
## # ... with 1,105 more rows
#Caso 5: SNGCJA5 VS Oficial
df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.69 |
9.75 |
0 |
1.53 |
4.81 |
9.76 |
236.87 |
▇▁▁▁▁ |
| Oficial |
0 |
1 |
12.80 |
8.32 |
0 |
6.50 |
11.20 |
17.70 |
55.40 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.275
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
title = "Relationship between SNGCJA5 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 11.2 0.291 38.7 0 10.7 11.8
## 2 SNGCJA5 0.234 0.025 9.53 0 0.186 0.283
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID Oficial SNGCJA5 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 3.7 0 11.2 -7.53
## 2 2 0.3 0.18 11.3 -11.0
## 3 3 5.9 0.45 11.3 -5.44
## 4 4 8.1 0.9 11.4 -3.34
## 5 5 3.7 1.51 11.6 -7.89
## 6 6 4 1.81 11.7 -7.66
## 7 7 4.8 2.86 11.9 -7.10
## 8 8 6 3.39 12.0 -6.03
## 9 9 8.3 3.48 12.0 -3.75
## 10 10 9.8 8.23 13.2 -3.36
## # ... with 1,105 more rows
#Caso 6: SPS30 VS HPMA115S0
df %>% select(SPS30, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
7.32 |
6.66 |
0.08 |
1.97 |
5.52 |
10.65 |
51.5 |
▇▂▁▁▁ |
| HPMA115S0 |
0 |
1 |
28.48 |
7.27 |
18.60 |
23.10 |
26.40 |
31.05 |
77.7 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.983
ggplot(df, aes(x = SPS30, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 HPMA115S0",
title = "Relationship between SPS30 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 20.6 0.059 348. 0 20.5 20.7
## 2 SPS30 1.07 0.006 179. 0 1.06 1.08
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID HPMA115S0 SPS30 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 18.6 0.9 21.6 -3.00
## 2 2 19.1 1.02 21.7 -2.62
## 3 3 19.4 1.25 22.0 -2.57
## 4 4 19.7 1.78 22.5 -2.84
## 5 5 20.4 2.6 23.4 -3.02
## 6 6 20.6 3 23.8 -3.25
## 7 7 22 4.11 25.0 -3.04
## 8 8 22.8 4.94 25.9 -3.13
## 9 9 23.7 5.48 26.5 -2.81
## 10 10 28.6 11.7 33.2 -4.59
## # ... with 1,105 more rows
#Caso 7: SPS30 VS PMSA003
df %>% select(SPS30, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
7.32 |
6.66 |
0.08 |
1.97 |
5.52 |
10.65 |
51.5 |
▇▂▁▁▁ |
| PMSA003 |
0 |
1 |
10.46 |
11.99 |
0.00 |
0.85 |
6.48 |
16.20 |
89.0 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.996
ggplot(df, aes(x = SPS30, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMSA003",
title = "Relationship between SPS30 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -2.65 0.048 -55.2 0 -2.75 -2.56
## 2 SPS30 1.79 0.005 369. 0 1.78 1.80
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID PMSA003 SPS30 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.017 0.9 -1.04 1.06
## 2 2 0 1.02 -0.826 0.826
## 3 3 1.61 1.25 -0.414 2.02
## 4 4 0.41 1.78 0.536 -0.126
## 5 5 1.72 2.6 2.01 -0.286
## 6 6 2.65 3 2.72 -0.073
## 7 7 4.04 4.11 4.71 -0.672
## 8 8 5.35 4.94 6.2 -0.85
## 9 9 6.25 5.48 7.17 -0.918
## 10 10 18.5 11.7 18.3 0.183
## # ... with 1,105 more rows
#Caso 8: SPS30 VS PMS7003
df %>% select(SPS30, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
7.32 |
6.66 |
0.08 |
1.97 |
5.52 |
10.65 |
51.5 |
▇▂▁▁▁ |
| PMS7003 |
0 |
1 |
11.60 |
11.87 |
0.00 |
1.94 |
8.34 |
17.90 |
91.8 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.996
ggplot(df, aes(x = SPS30, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMS7003",
title = "Relationship between SPS30 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -1.38 0.049 -28.1 0 -1.48 -1.29
## 2 SPS30 1.78 0.005 357. 0 1.76 1.78
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID PMS7003 SPS30 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0 0.9 0.213 -0.213
## 2 2 0.290 1.02 0.426 -0.136
## 3 3 0.71 1.25 0.834 -0.124
## 4 4 1.4 1.78 1.78 -0.375
## 5 5 3.12 2.6 3.23 -0.11
## 6 6 4.16 3 3.94 0.219
## 7 7 6.13 4.11 5.91 0.219
## 8 8 7.61 4.94 7.38 0.226
## 9 9 8.4 5.48 8.34 0.057
## 10 10 20.2 11.7 19.4 0.817
## # ... with 1,105 more rows
#Caso 9: SPS30 VS Oficial
df %>% select(SPS30, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
7.32 |
6.66 |
0.08 |
1.97 |
5.52 |
10.65 |
51.5 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.80 |
8.32 |
0.00 |
6.50 |
11.20 |
17.70 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.565
ggplot(df, aes(x = SPS30, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 Oficial",
title = "Relationship between SPS30 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 7.64 0.306 25.0 0 7.04 8.24
## 2 SPS30 0.705 0.031 22.8 0 0.645 0.766
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID Oficial SPS30 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 3.7 0.9 8.27 -4.57
## 2 2 0.3 1.02 8.36 -8.06
## 3 3 5.9 1.25 8.52 -2.62
## 4 4 8.1 1.78 8.89 -0.793
## 5 5 3.7 2.6 9.47 -5.77
## 6 6 4 3 9.75 -5.75
## 7 7 4.8 4.11 10.5 -5.74
## 8 8 6 4.94 11.1 -5.12
## 9 9 8.3 5.48 11.5 -3.20
## 10 10 9.8 11.7 15.9 -6.09
## # ... with 1,105 more rows
#Caso 10: HPMA115S0 VS PMSA003
df %>% select(HPMA115S0, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
28.48 |
7.27 |
18.6 |
23.10 |
26.40 |
31.05 |
77.7 |
▇▂▁▁▁ |
| PMSA003 |
0 |
1 |
10.46 |
11.99 |
0.0 |
0.85 |
6.48 |
16.20 |
89.0 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.984
ggplot(df, aes(x = HPMA115S0, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMSA003",
title = "Relationship between HPMA115S0 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -35.8 0.256 -140. 0 -36.3 -35.2
## 2 HPMA115S0 1.62 0.009 186. 0 1.60 1.64
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID PMSA003 HPMA115S0 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.017 18.6 -5.58 5.59
## 2 2 0 19.1 -4.76 4.76
## 3 3 1.61 19.4 -4.28 5.89
## 4 4 0.41 19.7 -3.79 4.2
## 5 5 1.72 20.4 -2.65 4.37
## 6 6 2.65 20.6 -2.33 4.98
## 7 7 4.04 22 -0.058 4.10
## 8 8 5.35 22.8 1.24 4.11
## 9 9 6.25 23.7 2.7 3.55
## 10 10 18.5 28.6 10.6 7.85
## # ... with 1,105 more rows
#Caso 11: HPMA115S0 VS PMS7003
df %>% select(HPMA115S0, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
28.48 |
7.27 |
18.6 |
23.10 |
26.40 |
31.05 |
77.7 |
▇▂▁▁▁ |
| PMS7003 |
0 |
1 |
11.60 |
11.87 |
0.0 |
1.94 |
8.34 |
17.90 |
91.8 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.978
ggplot(df, aes(x = HPMA115S0, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMS7003",
title = "Relationship between HPMA115S0 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -33.9 0.298 -114. 0 -34.5 -33.3
## 2 HPMA115S0 1.60 0.01 158. 0 1.58 1.62
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID PMS7003 HPMA115S0 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0 18.6 -4.18 4.18
## 2 2 0.290 19.1 -3.39 3.68
## 3 3 0.71 19.4 -2.91 3.62
## 4 4 1.4 19.7 -2.43 3.83
## 5 5 3.12 20.4 -1.31 4.43
## 6 6 4.16 20.6 -0.99 5.15
## 7 7 6.13 22 1.25 4.88
## 8 8 7.61 22.8 2.52 5.08
## 9 9 8.4 23.7 3.96 4.44
## 10 10 20.2 28.6 11.8 8.41
## # ... with 1,105 more rows
#Caso 10: HPMA115S0 VS Oficial
df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
28.48 |
7.27 |
18.6 |
23.1 |
26.4 |
31.05 |
77.7 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.80 |
8.32 |
0.0 |
6.5 |
11.2 |
17.70 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.557
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
title = "Relationship between HPMA115S0 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -5.35 0.837 -6.40 0 -7.00 -3.71
## 2 HPMA115S0 0.637 0.028 22.4 0 0.581 0.693
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID Oficial HPMA115S0 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 3.7 18.6 6.5 -2.8
## 2 2 0.3 19.1 6.82 -6.52
## 3 3 5.9 19.4 7.01 -1.11
## 4 4 8.1 19.7 7.20 0.899
## 5 5 3.7 20.4 7.65 -3.95
## 6 6 4 20.6 7.78 -3.78
## 7 7 4.8 22 8.67 -3.87
## 8 8 6 22.8 9.18 -3.18
## 9 9 8.3 23.7 9.75 -1.45
## 10 10 9.8 28.6 12.9 -3.07
## # ... with 1,105 more rows
#Caso 11: PMSA003 VS PMS7003
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
10.46 |
11.99 |
0 |
0.85 |
6.48 |
16.2 |
89.0 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.80 |
8.32 |
0 |
6.50 |
11.20 |
17.7 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.560
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 8.73 0.274 31.9 0 8.20 9.27
## 2 PMSA003 0.389 0.017 22.5 0 0.355 0.422
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 3.7 0.017 8.74 -5.04
## 2 2 0.3 0 8.73 -8.43
## 3 3 5.9 1.61 9.36 -3.46
## 4 4 8.1 0.41 8.89 -0.793
## 5 5 3.7 1.72 9.40 -5.70
## 6 6 4 2.65 9.76 -5.76
## 7 7 4.8 4.04 10.3 -5.50
## 8 8 6 5.35 10.8 -4.81
## 9 9 8.3 6.25 11.2 -2.86
## 10 10 9.8 18.5 15.9 -6.12
## # ... with 1,105 more rows
#Caso 12: PMSA003 VS Oficial
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
10.46 |
11.99 |
0 |
0.85 |
6.48 |
16.2 |
89.0 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.80 |
8.32 |
0 |
6.50 |
11.20 |
17.7 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.560
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 8.73 0.274 31.9 0 8.20 9.27
## 2 PMSA003 0.389 0.017 22.5 0 0.355 0.422
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 3.7 0.017 8.74 -5.04
## 2 2 0.3 0 8.73 -8.43
## 3 3 5.9 1.61 9.36 -3.46
## 4 4 8.1 0.41 8.89 -0.793
## 5 5 3.7 1.72 9.40 -5.70
## 6 6 4 2.65 9.76 -5.76
## 7 7 4.8 4.04 10.3 -5.50
## 8 8 6 5.35 10.8 -4.81
## 9 9 8.3 6.25 11.2 -2.86
## 10 10 9.8 18.5 15.9 -6.12
## # ... with 1,105 more rows
#Caso 13: PMS7003 VS Oficial
df %>% select(PMS7003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMS7003 |
0 |
1 |
11.6 |
11.87 |
0 |
1.94 |
8.34 |
17.9 |
91.8 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.8 |
8.32 |
0 |
6.50 |
11.20 |
17.7 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.563
ggplot(df, aes(x = PMS7003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMS7003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 8.22 0.288 28.5 0 7.66 8.79
## 2 PMS7003 0.394 0.017 22.7 0 0.36 0.428
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID Oficial PMS7003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 3.7 0 8.22 -4.52
## 2 2 0.3 0.290 8.34 -8.04
## 3 3 5.9 0.71 8.50 -2.60
## 4 4 8.1 1.4 8.78 -0.675
## 5 5 3.7 3.12 9.45 -5.75
## 6 6 4 4.16 9.86 -5.86
## 7 7 4.8 6.13 10.6 -5.84
## 8 8 6 7.61 11.2 -5.22
## 9 9 8.3 8.4 11.5 -3.24
## 10 10 9.8 20.2 16.2 -6.39
## # ... with 1,105 more rows