library(readxl)
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.1 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(moderndive)
library(skimr)
# **1 hour late ESTACION KENNEDY VS CANAIRIOS**
# **5 different sensors: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
df <- read_excel("C:/Mediciones/FERIAS_CANAIRIOS_1h.xlsx")
#View(df)
glimpse(df)
## Rows: 1,114
## Columns: 8
## $ Num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha <chr> "12-11-2020 24:00", "13-11-2020 01:00", "13-11-2020 02:00...
## $ Oficial <dbl> 0.3, 5.9, 8.1, 3.7, 4.0, 4.8, 6.0, 8.3, 9.8, 16.5, 6.2, 1...
## $ PMS7003 <dbl> 0.00, 0.29, 0.71, 1.40, 3.12, 4.16, 6.13, 7.61, 8.40, 20....
## $ PMSA003 <dbl> 0.0169, 0.0000, 1.6100, 0.4100, 1.7200, 2.6500, 4.0400, 5...
## $ HPMA115S0 <dbl> 18.6, 19.1, 19.4, 19.7, 20.4, 20.6, 22.0, 22.8, 23.7, 28....
## $ SPS30 <dbl> 0.90, 1.02, 1.25, 1.78, 2.60, 3.00, 4.11, 4.94, 5.48, 11....
## $ SNGCJA5 <dbl> 0.00, 0.18, 0.45, 0.90, 1.51, 1.81, 2.86, 3.39, 3.48, 8.2...
df %>%
sample_n(size = 10)
## # A tibble: 10 x 8
## Num Fecha Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 167 19-11-2020 24:00 12.8 29.9 26.9 38 17.2 13.9
## 2 1099 08-01-2021 02:00 15 7.89 6.28 26.9 5.30 3.35
## 3 1210 12-01-2021 12:00 8 0.0175 0 23.3 1.02 12.5
## 4 622 10-12-2020 16:00 35.4 53.3 54.3 57.9 31.8 27.1
## 5 192 21-11-2020 01:00 2.9 1.37 0.44 22 1.86 1.09
## 6 478 02-12-2020 22:00 6 0.0345 0 21.3 0.97 0.0517
## 7 7 13-11-2020 06:00 6 6.13 4.04 22 4.11 2.86
## 8 153 19-11-2020 10:00 4.8 4.91 3.33 23.8 3.82 2.04
## 9 23 13-11-2020 22:00 22.3 18.5 15.6 29.4 10 8.21
## 10 204 21-11-2020 13:00 6.9 0.0351 0.0175 22.1 0.94 0
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 1: SNGCJA5 VS SPS30
df %>% select(SNGCJA5, SPS30) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1114 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.68 |
9.75 |
0.00 |
1.53 |
4.80 |
9.75 |
236.87 |
▇▁▁▁▁ |
| SPS30 |
0 |
1 |
7.32 |
6.66 |
0.08 |
1.97 |
5.53 |
10.67 |
51.50 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ SPS30)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.497
ggplot(df, aes(x = SNGCJA5, y = SPS30)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 SPS30",
title = "Relationship between SNGCJA5 and SPS30") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(SPS30 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 5.06 0.21 24.1 0 4.64 5.47
## 2 SNGCJA5 0.339 0.018 19.1 0 0.305 0.374
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
## ID SPS30 SNGCJA5 SPS30_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.9 0 5.06 -4.16
## 2 2 1.02 0.18 5.12 -4.10
## 3 3 1.25 0.45 5.21 -3.96
## 4 4 1.78 0.9 5.36 -3.58
## 5 5 2.6 1.51 5.57 -2.97
## 6 6 3 1.81 5.67 -2.67
## 7 7 4.11 2.86 6.03 -1.92
## 8 8 4.94 3.39 6.21 -1.27
## 9 9 5.48 3.48 6.24 -0.757
## 10 10 11.7 8.23 7.85 3.85
## # ... with 1,104 more rows
#Caso 2: SNGCJA5 VS HPMA115S0
df %>% select(SNGCJA5, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1114 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.68 |
9.75 |
0.0 |
1.53 |
4.8 |
9.75 |
236.87 |
▇▁▁▁▁ |
| HPMA115S0 |
0 |
1 |
28.49 |
7.27 |
18.6 |
23.10 |
26.4 |
31.08 |
77.70 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.501
ggplot(df, aes(x = SNGCJA5, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 HPMA115S0",
title = "Relationship between SNGCJA5 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 26.0 0.229 114. 0 25.5 26.4
## 2 SNGCJA5 0.373 0.019 19.3 0 0.335 0.411
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
## ID HPMA115S0 SNGCJA5 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 18.6 0 26.0 -7.40
## 2 2 19.1 0.18 26.1 -6.96
## 3 3 19.4 0.45 26.2 -6.76
## 4 4 19.7 0.9 26.3 -6.63
## 5 5 20.4 1.51 26.6 -6.16
## 6 6 20.6 1.81 26.7 -6.07
## 7 7 22 2.86 27.1 -5.06
## 8 8 22.8 3.39 27.3 -4.46
## 9 9 23.7 3.48 27.3 -3.60
## 10 10 28.6 8.23 29.1 -0.468
## # ... with 1,104 more rows
#Caso 3: SNGCJA5 VS PMSA003
df %>% select(SNGCJA5, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1114 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.68 |
9.75 |
0 |
1.53 |
4.80 |
9.75 |
236.87 |
▇▁▁▁▁ |
| PMSA003 |
0 |
1 |
10.47 |
11.99 |
0 |
0.85 |
6.49 |
16.20 |
89.00 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.504
ggplot(df, aes(x = SNGCJA5, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMSA003",
title = "Relationship between SNGCJA5 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 6.33 0.376 16.8 0 5.60 7.07
## 2 SNGCJA5 0.62 0.032 19.5 0 0.557 0.682
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
## ID PMSA003 SNGCJA5 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.017 0 6.33 -6.32
## 2 2 0 0.18 6.44 -6.44
## 3 3 1.61 0.45 6.61 -5.00
## 4 4 0.41 0.9 6.89 -6.48
## 5 5 1.72 1.51 7.27 -5.55
## 6 6 2.65 1.81 7.45 -4.80
## 7 7 4.04 2.86 8.10 -4.07
## 8 8 5.35 3.39 8.43 -3.08
## 9 9 6.25 3.48 8.49 -2.24
## 10 10 18.5 8.23 11.4 7.07
## # ... with 1,104 more rows
#Caso 4: SNGCJA5 VS PMS7003
df %>% select(SNGCJA5, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1114 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.68 |
9.75 |
0 |
1.53 |
4.80 |
9.75 |
236.87 |
▇▁▁▁▁ |
| PMS7003 |
0 |
1 |
11.61 |
11.87 |
0 |
1.95 |
8.37 |
17.90 |
91.80 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.497
ggplot(df, aes(x = SNGCJA5, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMS7003",
title = "Relationship between SNGCJA5 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 7.57 0.374 20.2 0 6.84 8.30
## 2 SNGCJA5 0.606 0.032 19.1 0 0.543 0.668
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
## ID PMS7003 SNGCJA5 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0 0 7.57 -7.57
## 2 2 0.290 0.18 7.68 -7.39
## 3 3 0.71 0.45 7.84 -7.13
## 4 4 1.4 0.9 8.11 -6.71
## 5 5 3.12 1.51 8.48 -5.36
## 6 6 4.16 1.81 8.67 -4.51
## 7 7 6.13 2.86 9.30 -3.17
## 8 8 7.61 3.39 9.62 -2.01
## 9 9 8.4 3.48 9.68 -1.28
## 10 10 20.2 8.23 12.6 7.65
## # ... with 1,104 more rows
#Caso 5: SNGCJA5 VS Oficial
df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1114 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.68 |
9.75 |
0 |
1.53 |
4.8 |
9.75 |
236.87 |
▇▁▁▁▁ |
| Oficial |
0 |
1 |
12.81 |
8.32 |
0 |
6.50 |
11.2 |
17.70 |
55.40 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.347
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
title = "Relationship between SNGCJA5 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 10.8 0.283 38.2 0 10.3 11.4
## 2 SNGCJA5 0.296 0.024 12.4 0 0.249 0.343
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
## ID Oficial SNGCJA5 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.3 0 10.8 -10.5
## 2 2 5.9 0.18 10.9 -4.98
## 3 3 8.1 0.45 11.0 -2.86
## 4 4 3.7 0.9 11.1 -7.39
## 5 5 4 1.51 11.3 -7.28
## 6 6 4.8 1.81 11.4 -6.56
## 7 7 6 2.86 11.7 -5.68
## 8 8 8.3 3.39 11.8 -3.53
## 9 9 9.8 3.48 11.9 -2.06
## 10 10 16.5 8.23 13.3 3.23
## # ... with 1,104 more rows
#Caso 6: SPS30 VS HPMA115S0
df %>% select(SPS30, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1114 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
7.32 |
6.66 |
0.08 |
1.97 |
5.53 |
10.67 |
51.5 |
▇▂▁▁▁ |
| HPMA115S0 |
0 |
1 |
28.49 |
7.27 |
18.60 |
23.10 |
26.40 |
31.08 |
77.7 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.983
ggplot(df, aes(x = SPS30, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 HPMA115S0",
title = "Relationship between SPS30 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 20.6 0.059 347. 0 20.5 20.7
## 2 SPS30 1.07 0.006 179. 0 1.06 1.08
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
## ID HPMA115S0 SPS30 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 18.6 0.9 21.6 -2.99
## 2 2 19.1 1.02 21.7 -2.62
## 3 3 19.4 1.25 22.0 -2.57
## 4 4 19.7 1.78 22.5 -2.84
## 5 5 20.4 2.6 23.4 -3.02
## 6 6 20.6 3 23.8 -3.25
## 7 7 22 4.11 25.0 -3.04
## 8 8 22.8 4.94 25.9 -3.13
## 9 9 23.7 5.48 26.5 -2.81
## 10 10 28.6 11.7 33.2 -4.59
## # ... with 1,104 more rows
#Caso 7: SPS30 VS PMSA003
df %>% select(SPS30, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1114 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
7.32 |
6.66 |
0.08 |
1.97 |
5.53 |
10.67 |
51.5 |
▇▂▁▁▁ |
| PMSA003 |
0 |
1 |
10.47 |
11.99 |
0.00 |
0.85 |
6.49 |
16.20 |
89.0 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.996
ggplot(df, aes(x = SPS30, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMSA003",
title = "Relationship between SPS30 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -2.66 0.048 -55.2 0 -2.75 -2.56
## 2 SPS30 1.79 0.005 369. 0 1.78 1.80
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
## ID PMSA003 SPS30 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.017 0.9 -1.04 1.06
## 2 2 0 1.02 -0.827 0.827
## 3 3 1.61 1.25 -0.415 2.02
## 4 4 0.41 1.78 0.535 -0.125
## 5 5 1.72 2.6 2.00 -0.285
## 6 6 2.65 3 2.72 -0.072
## 7 7 4.04 4.11 4.71 -0.672
## 8 8 5.35 4.94 6.20 -0.849
## 9 9 6.25 5.48 7.17 -0.917
## 10 10 18.5 11.7 18.3 0.184
## # ... with 1,104 more rows
#Caso 8: SPS30 VS PMS7003
df %>% select(SPS30, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1114 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
7.32 |
6.66 |
0.08 |
1.97 |
5.53 |
10.67 |
51.5 |
▇▂▁▁▁ |
| PMS7003 |
0 |
1 |
11.61 |
11.87 |
0.00 |
1.95 |
8.37 |
17.90 |
91.8 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.996
ggplot(df, aes(x = SPS30, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMS7003",
title = "Relationship between SPS30 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -1.38 0.049 -28.1 0 -1.48 -1.29
## 2 SPS30 1.78 0.005 356. 0 1.76 1.78
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
## ID PMS7003 SPS30 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0 0.9 0.213 -0.213
## 2 2 0.290 1.02 0.426 -0.136
## 3 3 0.71 1.25 0.834 -0.124
## 4 4 1.4 1.78 1.78 -0.375
## 5 5 3.12 2.6 3.23 -0.111
## 6 6 4.16 3 3.94 0.219
## 7 7 6.13 4.11 5.91 0.219
## 8 8 7.61 4.94 7.38 0.226
## 9 9 8.4 5.48 8.34 0.057
## 10 10 20.2 11.7 19.4 0.817
## # ... with 1,104 more rows
#Caso 9: SPS30 VS Oficial
df %>% select(SPS30, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1114 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
7.32 |
6.66 |
0.08 |
1.97 |
5.53 |
10.67 |
51.5 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.81 |
8.32 |
0.00 |
6.50 |
11.20 |
17.70 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.712
ggplot(df, aes(x = SPS30, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 Oficial",
title = "Relationship between SPS30 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 6.3 0.26 24.2 0 5.79 6.81
## 2 SPS30 0.889 0.026 33.8 0 0.837 0.94
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
## ID Oficial SPS30 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.3 0.9 7.1 -6.8
## 2 2 5.9 1.02 7.21 -1.31
## 3 3 8.1 1.25 7.41 0.689
## 4 4 3.7 1.78 7.88 -4.18
## 5 5 4 2.6 8.61 -4.61
## 6 6 4.8 3 8.97 -4.17
## 7 7 6 4.11 9.95 -3.95
## 8 8 8.3 4.94 10.7 -2.39
## 9 9 9.8 5.48 11.2 -1.37
## 10 10 16.5 11.7 16.7 -0.196
## # ... with 1,104 more rows
#Caso 10: HPMA115S0 VS PMSA003
df %>% select(HPMA115S0, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1114 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
28.49 |
7.27 |
18.6 |
23.10 |
26.40 |
31.08 |
77.7 |
▇▂▁▁▁ |
| PMSA003 |
0 |
1 |
10.47 |
11.99 |
0.0 |
0.85 |
6.49 |
16.20 |
89.0 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.984
ggplot(df, aes(x = HPMA115S0, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMSA003",
title = "Relationship between HPMA115S0 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -35.7 0.256 -140. 0 -36.2 -35.2
## 2 HPMA115S0 1.62 0.009 186. 0 1.60 1.64
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
## ID PMSA003 HPMA115S0 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.017 18.6 -5.57 5.59
## 2 2 0 19.1 -4.76 4.76
## 3 3 1.61 19.4 -4.27 5.88
## 4 4 0.41 19.7 -3.79 4.20
## 5 5 1.72 20.4 -2.65 4.37
## 6 6 2.65 20.6 -2.33 4.98
## 7 7 4.04 22 -0.056 4.10
## 8 8 5.35 22.8 1.24 4.11
## 9 9 6.25 23.7 2.70 3.55
## 10 10 18.5 28.6 10.7 7.85
## # ... with 1,104 more rows
#Caso 11: HPMA115S0 VS PMS7003
df %>% select(HPMA115S0, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1114 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
28.49 |
7.27 |
18.6 |
23.10 |
26.40 |
31.08 |
77.7 |
▇▂▁▁▁ |
| PMS7003 |
0 |
1 |
11.61 |
11.87 |
0.0 |
1.95 |
8.37 |
17.90 |
91.8 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.978
ggplot(df, aes(x = HPMA115S0, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMS7003",
title = "Relationship between HPMA115S0 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -33.9 0.298 -114. 0 -34.5 -33.3
## 2 HPMA115S0 1.60 0.01 158. 0 1.58 1.62
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
## ID PMS7003 HPMA115S0 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0 18.6 -4.18 4.18
## 2 2 0.290 19.1 -3.38 3.67
## 3 3 0.71 19.4 -2.90 3.61
## 4 4 1.4 19.7 -2.42 3.82
## 5 5 3.12 20.4 -1.30 4.42
## 6 6 4.16 20.6 -0.986 5.15
## 7 7 6.13 22 1.25 4.88
## 8 8 7.61 22.8 2.53 5.08
## 9 9 8.4 23.7 3.97 4.43
## 10 10 20.2 28.6 11.8 8.41
## # ... with 1,104 more rows
#Caso 10: HPMA115S0 VS Oficial
df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1114 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
28.49 |
7.27 |
18.6 |
23.1 |
26.4 |
31.08 |
77.7 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.81 |
8.32 |
0.0 |
6.5 |
11.2 |
17.70 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.711
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
title = "Relationship between HPMA115S0 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -10.4 0.709 -14.6 0 -11.8 -8.97
## 2 HPMA115S0 0.813 0.024 33.7 0 0.766 0.861
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
## ID Oficial HPMA115S0 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.3 18.6 4.76 -4.46
## 2 2 5.9 19.1 5.17 0.729
## 3 3 8.1 19.4 5.42 2.68
## 4 4 3.7 19.7 5.66 -1.96
## 5 5 4 20.4 6.23 -2.23
## 6 6 4.8 20.6 6.39 -1.59
## 7 7 6 22 7.53 -1.53
## 8 8 8.3 22.8 8.18 0.12
## 9 9 9.8 23.7 8.91 0.888
## 10 10 16.5 28.6 12.9 3.60
## # ... with 1,104 more rows
#Caso 11: PMSA003 VS PMS7003
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1114 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
10.47 |
11.99 |
0 |
0.85 |
6.49 |
16.2 |
89.0 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.81 |
8.32 |
0 |
6.50 |
11.20 |
17.7 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.711
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 7.64 0.233 32.8 0 7.19 8.1
## 2 PMSA003 0.493 0.015 33.7 0 0.464 0.522
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.3 0.017 7.65 -7.35
## 2 2 5.9 0 7.64 -1.74
## 3 3 8.1 1.61 8.44 -0.337
## 4 4 3.7 0.41 7.85 -4.15
## 5 5 4 1.72 8.49 -4.49
## 6 6 4.8 2.65 8.95 -4.15
## 7 7 6 4.04 9.64 -3.64
## 8 8 8.3 5.35 10.3 -1.98
## 9 9 9.8 6.25 10.7 -0.925
## 10 10 16.5 18.5 16.8 -0.266
## # ... with 1,104 more rows
#Caso 12: PMSA003 VS Oficial
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1114 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
10.47 |
11.99 |
0 |
0.85 |
6.49 |
16.2 |
89.0 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.81 |
8.32 |
0 |
6.50 |
11.20 |
17.7 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.711
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 7.64 0.233 32.8 0 7.19 8.1
## 2 PMSA003 0.493 0.015 33.7 0 0.464 0.522
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.3 0.017 7.65 -7.35
## 2 2 5.9 0 7.64 -1.74
## 3 3 8.1 1.61 8.44 -0.337
## 4 4 3.7 0.41 7.85 -4.15
## 5 5 4 1.72 8.49 -4.49
## 6 6 4.8 2.65 8.95 -4.15
## 7 7 6 4.04 9.64 -3.64
## 8 8 8.3 5.35 10.3 -1.98
## 9 9 9.8 6.25 10.7 -0.925
## 10 10 16.5 18.5 16.8 -0.266
## # ... with 1,104 more rows
#Caso 13: PMS7003 VS Oficial
df %>% select(PMS7003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1114 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMS7003 |
0 |
1 |
11.61 |
11.87 |
0 |
1.95 |
8.37 |
17.9 |
91.8 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.81 |
8.32 |
0 |
6.50 |
11.20 |
17.7 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.713
ggplot(df, aes(x = PMS7003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMS7003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 7.00 0.244 28.6 0 6.52 7.48
## 2 PMS7003 0.5 0.015 34.0 0 0.471 0.529
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
## ID Oficial PMS7003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.3 0 7.00 -6.70
## 2 2 5.9 0.290 7.15 -1.25
## 3 3 8.1 0.71 7.36 0.742
## 4 4 3.7 1.4 7.70 -4.00
## 5 5 4 3.12 8.56 -4.56
## 6 6 4.8 4.16 9.08 -4.28
## 7 7 6 6.13 10.1 -4.07
## 8 8 8.3 7.61 10.8 -2.51
## 9 9 9.8 8.4 11.2 -1.40
## 10 10 16.5 20.2 17.1 -0.598
## # ... with 1,104 more rows