library(readxl)
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.1 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(moderndive)
library(skimr)
# **2 hours late ESTACION KENNEDY VS CANAIRIOS**
# **5 different sensors: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
df <- read_excel("C:/Mediciones/FERIAS_CANAIRIOS_2h.xlsx")
#View(df)
glimpse(df)
## Rows: 1,113
## Columns: 8
## $ Num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha <chr> "12-11-2020 24:00", "13-11-2020 01:00", "13-11-2020 02:00...
## $ Oficial <dbl> 5.9, 8.1, 3.7, 4.0, 4.8, 6.0, 8.3, 9.8, 16.5, 6.2, 10.2, ...
## $ PMS7003 <dbl> 0.00, 0.29, 0.71, 1.40, 3.12, 4.16, 6.13, 7.61, 8.40, 20....
## $ PMSA003 <dbl> 0.0169, 0.0000, 1.6100, 0.4100, 1.7200, 2.6500, 4.0400, 5...
## $ HPMA115S0 <dbl> 18.6, 19.1, 19.4, 19.7, 20.4, 20.6, 22.0, 22.8, 23.7, 28....
## $ SPS30 <dbl> 0.90, 1.02, 1.25, 1.78, 2.60, 3.00, 4.11, 4.94, 5.48, 11....
## $ SNGCJA5 <dbl> 0.00, 0.18, 0.45, 0.90, 1.51, 1.81, 2.86, 3.39, 3.48, 8.2...
df %>%
sample_n(size = 10)
## # A tibble: 10 x 8
## Num Fecha Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 623 10-12-2020 17:00 31.7 46 45.3 51 26.1 23.5
## 2 849 20-12-2020 12:00 6 0.641 0.0469 23.6 1.17 0.219
## 3 683 12-12-2020 22:00 9.7 14.3 12.8 30.6 8.8 7.27
## 4 75 16-11-2020 02:00 14.6 19.6 18.8 31.9 12.6 10.2
## 5 4 13-11-2020 03:00 4 1.4 0.41 19.7 1.78 0.9
## 6 476 02-12-2020 20:00 16.1 15.7 14 31.6 9.59 8.21
## 7 775 16-12-2020 24:00 6 1.21 0.293 22.4 1.60 0.724
## 8 139 18-11-2020 20:00 15.4 13.3 10.4 27.5 7.76 6.47
## 9 12 13-11-2020 11:00 16.5 10.8 8.72 25 6.58 4.64
## 10 370 28-11-2020 10:00 6 2.4 1.1 23.9 2.62 1.5
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 1: SNGCJA5 VS SPS30
df %>% select(SNGCJA5, SPS30) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1113 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.67 |
9.75 |
0.00 |
1.53 |
4.80 |
9.75 |
236.87 |
▇▁▁▁▁ |
| SPS30 |
0 |
1 |
7.33 |
6.66 |
0.08 |
1.97 |
5.53 |
10.70 |
51.50 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ SPS30)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.499
ggplot(df, aes(x = SNGCJA5, y = SPS30)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 SPS30",
title = "Relationship between SNGCJA5 and SPS30") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(SPS30 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 5.06 0.21 24.1 0 4.65 5.47
## 2 SNGCJA5 0.34 0.018 19.2 0 0.306 0.375
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
## ID SPS30 SNGCJA5 SPS30_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.9 0 5.06 -4.16
## 2 2 1.02 0.18 5.12 -4.10
## 3 3 1.25 0.45 5.21 -3.96
## 4 4 1.78 0.9 5.36 -3.58
## 5 5 2.6 1.51 5.57 -2.97
## 6 6 3 1.81 5.67 -2.67
## 7 7 4.11 2.86 6.03 -1.92
## 8 8 4.94 3.39 6.21 -1.27
## 9 9 5.48 3.48 6.24 -0.763
## 10 10 11.7 8.23 7.86 3.84
## # ... with 1,103 more rows
#Caso 2: SNGCJA5 VS HPMA115S0
df %>% select(SNGCJA5, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1113 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.67 |
9.75 |
0.0 |
1.53 |
4.8 |
9.75 |
236.87 |
▇▁▁▁▁ |
| HPMA115S0 |
0 |
1 |
28.49 |
7.27 |
18.6 |
23.10 |
26.4 |
31.10 |
77.70 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.502
ggplot(df, aes(x = SNGCJA5, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 HPMA115S0",
title = "Relationship between SNGCJA5 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 26.0 0.229 114. 0 25.5 26.4
## 2 SNGCJA5 0.374 0.019 19.3 0 0.336 0.412
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
## ID HPMA115S0 SNGCJA5 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 18.6 0 26.0 -7.40
## 2 2 19.1 0.18 26.1 -6.96
## 3 3 19.4 0.45 26.2 -6.77
## 4 4 19.7 0.9 26.3 -6.64
## 5 5 20.4 1.51 26.6 -6.16
## 6 6 20.6 1.81 26.7 -6.08
## 7 7 22 2.86 27.1 -5.07
## 8 8 22.8 3.39 27.3 -4.47
## 9 9 23.7 3.48 27.3 -3.6
## 10 10 28.6 8.23 29.1 -0.478
## # ... with 1,103 more rows
#Caso 3: SNGCJA5 VS PMSA003
df %>% select(SNGCJA5, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1113 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.67 |
9.75 |
0 |
1.53 |
4.8 |
9.75 |
236.87 |
▇▁▁▁▁ |
| PMSA003 |
0 |
1 |
10.48 |
11.99 |
0 |
0.85 |
6.5 |
16.20 |
89.00 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.505
ggplot(df, aes(x = SNGCJA5, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMSA003",
title = "Relationship between SNGCJA5 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 6.34 0.376 16.9 0 5.60 7.07
## 2 SNGCJA5 0.621 0.032 19.5 0 0.559 0.684
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
## ID PMSA003 SNGCJA5 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.017 0 6.34 -6.32
## 2 2 0 0.18 6.45 -6.45
## 3 3 1.61 0.45 6.62 -5.01
## 4 4 0.41 0.9 6.90 -6.49
## 5 5 1.72 1.51 7.28 -5.56
## 6 6 2.65 1.81 7.46 -4.81
## 7 7 4.04 2.86 8.11 -4.07
## 8 8 5.35 3.39 8.44 -3.09
## 9 9 6.25 3.48 8.50 -2.25
## 10 10 18.5 8.23 11.4 7.05
## # ... with 1,103 more rows
#Caso 4: SNGCJA5 VS PMS7003
df %>% select(SNGCJA5, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1113 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.67 |
9.75 |
0 |
1.53 |
4.8 |
9.75 |
236.87 |
▇▁▁▁▁ |
| PMS7003 |
0 |
1 |
11.62 |
11.87 |
0 |
1.97 |
8.4 |
17.90 |
91.80 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.499
ggplot(df, aes(x = SNGCJA5, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMS7003",
title = "Relationship between SNGCJA5 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 7.57 0.374 20.3 0 6.84 8.31
## 2 SNGCJA5 0.607 0.032 19.2 0 0.545 0.669
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
## ID PMS7003 SNGCJA5 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0 0 7.57 -7.57
## 2 2 0.290 0.18 7.68 -7.39
## 3 3 0.71 0.45 7.85 -7.14
## 4 4 1.4 0.9 8.12 -6.72
## 5 5 3.12 1.51 8.49 -5.37
## 6 6 4.16 1.81 8.67 -4.51
## 7 7 6.13 2.86 9.31 -3.18
## 8 8 7.61 3.39 9.63 -2.02
## 9 9 8.4 3.48 9.69 -1.29
## 10 10 20.2 8.23 12.6 7.63
## # ... with 1,103 more rows
#Caso 5: SNGCJA5 VS Oficial
df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1113 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.67 |
9.75 |
0 |
1.53 |
4.8 |
9.75 |
236.87 |
▇▁▁▁▁ |
| Oficial |
0 |
1 |
12.82 |
8.31 |
0 |
6.50 |
11.2 |
17.70 |
55.40 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.405
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
title = "Relationship between SNGCJA5 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 10.5 0.276 38.1 0 9.97 11.1
## 2 SNGCJA5 0.345 0.023 14.8 0 0.299 0.391
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
## ID Oficial SNGCJA5 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 5.9 0 10.5 -4.62
## 2 2 8.1 0.18 10.6 -2.48
## 3 3 3.7 0.45 10.7 -6.97
## 4 4 4 0.9 10.8 -6.83
## 5 5 4.8 1.51 11.0 -6.24
## 6 6 6 1.81 11.1 -5.14
## 7 7 8.3 2.86 11.5 -3.20
## 8 8 9.8 3.39 11.7 -1.89
## 9 9 16.5 3.48 11.7 4.78
## 10 10 6.2 8.23 13.4 -7.16
## # ... with 1,103 more rows
#Caso 6: SPS30 VS HPMA115S0
df %>% select(SPS30, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1113 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
7.33 |
6.66 |
0.08 |
1.97 |
5.53 |
10.7 |
51.5 |
▇▂▁▁▁ |
| HPMA115S0 |
0 |
1 |
28.49 |
7.27 |
18.60 |
23.10 |
26.40 |
31.1 |
77.7 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.983
ggplot(df, aes(x = SPS30, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 HPMA115S0",
title = "Relationship between SPS30 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 20.6 0.059 347. 0 20.5 20.7
## 2 SPS30 1.07 0.006 179. 0 1.06 1.08
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
## ID HPMA115S0 SPS30 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 18.6 0.9 21.6 -2.99
## 2 2 19.1 1.02 21.7 -2.62
## 3 3 19.4 1.25 22.0 -2.57
## 4 4 19.7 1.78 22.5 -2.84
## 5 5 20.4 2.6 23.4 -3.02
## 6 6 20.6 3 23.8 -3.25
## 7 7 22 4.11 25.0 -3.04
## 8 8 22.8 4.94 25.9 -3.13
## 9 9 23.7 5.48 26.5 -2.81
## 10 10 28.6 11.7 33.2 -4.59
## # ... with 1,103 more rows
#Caso 7: SPS30 VS PMSA003
df %>% select(SPS30, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1113 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
7.33 |
6.66 |
0.08 |
1.97 |
5.53 |
10.7 |
51.5 |
▇▂▁▁▁ |
| PMSA003 |
0 |
1 |
10.48 |
11.99 |
0.00 |
0.85 |
6.50 |
16.2 |
89.0 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.996
ggplot(df, aes(x = SPS30, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMSA003",
title = "Relationship between SPS30 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -2.66 0.048 -55.1 0 -2.75 -2.56
## 2 SPS30 1.79 0.005 368. 0 1.78 1.80
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
## ID PMSA003 SPS30 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.017 0.9 -1.04 1.06
## 2 2 0 1.02 -0.828 0.828
## 3 3 1.61 1.25 -0.416 2.03
## 4 4 0.41 1.78 0.534 -0.124
## 5 5 1.72 2.6 2.00 -0.284
## 6 6 2.65 3 2.72 -0.071
## 7 7 4.04 4.11 4.71 -0.671
## 8 8 5.35 4.94 6.20 -0.849
## 9 9 6.25 5.48 7.17 -0.917
## 10 10 18.5 11.7 18.3 0.184
## # ... with 1,103 more rows
#Caso 8: SPS30 VS PMS7003
df %>% select(SPS30, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1113 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
7.33 |
6.66 |
0.08 |
1.97 |
5.53 |
10.7 |
51.5 |
▇▂▁▁▁ |
| PMS7003 |
0 |
1 |
11.62 |
11.87 |
0.00 |
1.97 |
8.40 |
17.9 |
91.8 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.996
ggplot(df, aes(x = SPS30, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMS7003",
title = "Relationship between SPS30 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -1.38 0.049 -28.0 0 -1.48 -1.29
## 2 SPS30 1.78 0.005 356. 0 1.76 1.78
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
## ID PMS7003 SPS30 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0 0.9 0.214 -0.214
## 2 2 0.290 1.02 0.427 -0.137
## 3 3 0.71 1.25 0.835 -0.125
## 4 4 1.4 1.78 1.78 -0.376
## 5 5 3.12 2.6 3.23 -0.111
## 6 6 4.16 3 3.94 0.219
## 7 7 6.13 4.11 5.91 0.219
## 8 8 7.61 4.94 7.38 0.225
## 9 9 8.4 5.48 8.34 0.057
## 10 10 20.2 11.7 19.4 0.817
## # ... with 1,103 more rows
#Caso 9: SPS30 VS Oficial
df %>% select(SPS30, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1113 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
7.33 |
6.66 |
0.08 |
1.97 |
5.53 |
10.7 |
51.5 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.82 |
8.31 |
0.00 |
6.50 |
11.20 |
17.7 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.807
ggplot(df, aes(x = SPS30, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 Oficial",
title = "Relationship between SPS30 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 5.44 0.219 24.8 0 5.01 5.87
## 2 SPS30 1.01 0.022 45.5 0 0.964 1.05
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
## ID Oficial SPS30 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 5.9 0.9 6.34 -0.445
## 2 2 8.1 1.02 6.47 1.63
## 3 3 3.7 1.25 6.70 -3.00
## 4 4 4 1.78 7.23 -3.23
## 5 5 4.8 2.6 8.06 -3.26
## 6 6 6 3 8.46 -2.46
## 7 7 8.3 4.11 9.58 -1.28
## 8 8 9.8 4.94 10.4 -0.613
## 9 9 16.5 5.48 11.0 5.54
## 10 10 6.2 11.7 17.2 -11.0
## # ... with 1,103 more rows
#Caso 10: HPMA115S0 VS PMSA003
df %>% select(HPMA115S0, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1113 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
28.49 |
7.27 |
18.6 |
23.10 |
26.4 |
31.1 |
77.7 |
▇▂▁▁▁ |
| PMSA003 |
0 |
1 |
10.48 |
11.99 |
0.0 |
0.85 |
6.5 |
16.2 |
89.0 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.984
ggplot(df, aes(x = HPMA115S0, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMSA003",
title = "Relationship between HPMA115S0 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -35.7 0.256 -139. 0 -36.2 -35.2
## 2 HPMA115S0 1.62 0.009 186. 0 1.60 1.64
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
## ID PMSA003 HPMA115S0 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.017 18.6 -5.57 5.59
## 2 2 0 19.1 -4.76 4.76
## 3 3 1.61 19.4 -4.27 5.88
## 4 4 0.41 19.7 -3.78 4.19
## 5 5 1.72 20.4 -2.65 4.37
## 6 6 2.65 20.6 -2.32 4.97
## 7 7 4.04 22 -0.053 4.09
## 8 8 5.35 22.8 1.24 4.11
## 9 9 6.25 23.7 2.70 3.55
## 10 10 18.5 28.6 10.7 7.85
## # ... with 1,103 more rows
#Caso 11: HPMA115S0 VS PMS7003
df %>% select(HPMA115S0, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1113 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
28.49 |
7.27 |
18.6 |
23.10 |
26.4 |
31.1 |
77.7 |
▇▂▁▁▁ |
| PMS7003 |
0 |
1 |
11.62 |
11.87 |
0.0 |
1.97 |
8.4 |
17.9 |
91.8 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.978
ggplot(df, aes(x = HPMA115S0, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMS7003",
title = "Relationship between HPMA115S0 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -33.9 0.298 -114. 0 -34.5 -33.3
## 2 HPMA115S0 1.60 0.01 158. 0 1.58 1.62
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
## ID PMS7003 HPMA115S0 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0 18.6 -4.18 4.18
## 2 2 0.290 19.1 -3.38 3.67
## 3 3 0.71 19.4 -2.90 3.61
## 4 4 1.4 19.7 -2.42 3.82
## 5 5 3.12 20.4 -1.30 4.42
## 6 6 4.16 20.6 -0.981 5.14
## 7 7 6.13 22 1.25 4.88
## 8 8 7.61 22.8 2.53 5.08
## 9 9 8.4 23.7 3.97 4.43
## 10 10 20.2 28.6 11.8 8.41
## # ... with 1,103 more rows
#Caso 10: HPMA115S0 VS Oficial
df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1113 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
28.49 |
7.27 |
18.6 |
23.1 |
26.4 |
31.1 |
77.7 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.82 |
8.31 |
0.0 |
6.5 |
11.2 |
17.7 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.809
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
title = "Relationship between HPMA115S0 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -13.5 0.593 -22.8 0 -14.7 -12.3
## 2 HPMA115S0 0.924 0.02 45.8 0 0.884 0.963
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
## ID Oficial HPMA115S0 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 5.9 18.6 3.68 2.22
## 2 2 8.1 19.1 4.14 3.96
## 3 3 3.7 19.4 4.42 -0.716
## 4 4 4 19.7 4.69 -0.693
## 5 5 4.8 20.4 5.34 -0.54
## 6 6 6 20.6 5.52 0.475
## 7 7 8.3 22 6.82 1.48
## 8 8 9.8 22.8 7.56 2.24
## 9 9 16.5 23.7 8.39 8.11
## 10 10 6.2 28.6 12.9 -6.72
## # ... with 1,103 more rows
#Caso 11: PMSA003 VS PMS7003
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1113 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
10.48 |
11.99 |
0 |
0.85 |
6.5 |
16.2 |
89.0 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.82 |
8.31 |
0 |
6.50 |
11.2 |
17.7 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.806
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 6.96 0.196 35.5 0 6.58 7.35
## 2 PMSA003 0.559 0.012 45.3 0 0.534 0.583
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 5.9 0.017 6.97 -1.07
## 2 2 8.1 0 6.96 1.14
## 3 3 3.7 1.61 7.86 -4.16
## 4 4 4 0.41 7.19 -3.19
## 5 5 4.8 1.72 7.92 -3.12
## 6 6 6 2.65 8.44 -2.44
## 7 7 8.3 4.04 9.22 -0.921
## 8 8 9.8 5.35 9.95 -0.152
## 9 9 16.5 6.25 10.5 6.04
## 10 10 6.2 18.5 17.3 -11.1
## # ... with 1,103 more rows
#Caso 12: PMSA003 VS Oficial
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1113 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
10.48 |
11.99 |
0 |
0.85 |
6.5 |
16.2 |
89.0 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.82 |
8.31 |
0 |
6.50 |
11.2 |
17.7 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.806
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 6.96 0.196 35.5 0 6.58 7.35
## 2 PMSA003 0.559 0.012 45.3 0 0.534 0.583
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 5.9 0.017 6.97 -1.07
## 2 2 8.1 0 6.96 1.14
## 3 3 3.7 1.61 7.86 -4.16
## 4 4 4 0.41 7.19 -3.19
## 5 5 4.8 1.72 7.92 -3.12
## 6 6 6 2.65 8.44 -2.44
## 7 7 8.3 4.04 9.22 -0.921
## 8 8 9.8 5.35 9.95 -0.152
## 9 9 16.5 6.25 10.5 6.04
## 10 10 6.2 18.5 17.3 -11.1
## # ... with 1,103 more rows
#Caso 13: PMS7003 VS Oficial
df %>% select(PMS7003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1113 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMS7003 |
0 |
1 |
11.62 |
11.87 |
0 |
1.97 |
8.4 |
17.9 |
91.8 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.82 |
8.31 |
0 |
6.50 |
11.2 |
17.7 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.811
ggplot(df, aes(x = PMS7003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMS7003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 6.22 0.204 30.5 0 5.82 6.62
## 2 PMS7003 0.567 0.012 46.1 0 0.543 0.592
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
## ID Oficial PMS7003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 5.9 0 6.22 -0.321
## 2 2 8.1 0.290 6.39 1.71
## 3 3 3.7 0.71 6.62 -2.92
## 4 4 4 1.4 7.02 -3.02
## 5 5 4.8 3.12 7.99 -3.19
## 6 6 6 4.16 8.58 -2.58
## 7 7 8.3 6.13 9.7 -1.4
## 8 8 9.8 7.61 10.5 -0.74
## 9 9 16.5 8.4 11.0 5.51
## 10 10 6.2 20.2 17.7 -11.5
## # ... with 1,103 more rows