library(readxl)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.6 v purrr 0.3.4
## v tibble 3.1.3 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(moderndive)
library(skimr)
# **ESTACION FERIAS VS SENSORES BAJO COSTO**
# **5 sensores diferentes: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
# **Comparaciones entre sensores de bajo costo y con la estación oficial**
df <- read_excel("C:/Mediciones/FERIAS.xlsx")
View(df)
glimpse(df)
## Rows: 1,115
## Columns: 8
## $ Num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 1~
## $ Fecha <chr> "12-11-2020 24:00", "13-11-2020 01:00", "13-11-2020 02:00", ~
## $ Oficial <dbl> 3.7, 0.3, 5.9, 8.1, 3.7, 4.0, 4.8, 6.0, 8.3, 9.8, 16.5, 6.2,~
## $ PMS7003 <dbl> 0.00, 0.29, 0.71, 1.40, 3.12, 4.16, 6.13, 7.61, 8.40, 20.20,~
## $ PMSA003 <dbl> 0.0169, 0.0000, 1.6100, 0.4100, 1.7200, 2.6500, 4.0400, 5.35~
## $ HPMA115S0 <dbl> 18.6, 19.1, 19.4, 19.7, 20.4, 20.6, 22.0, 22.8, 23.7, 28.6, ~
## $ SPS30 <dbl> 0.90, 1.02, 1.25, 1.78, 2.60, 3.00, 4.11, 4.94, 5.48, 11.70,~
## $ SNGCJA5 <dbl> 0.00, 0.18, 0.45, 0.90, 1.51, 1.81, 2.86, 3.39, 3.48, 8.23, ~
df %>%
sample_n(size = 10)
## # A tibble: 10 x 8
## Num Fecha Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 277 24-11-2020 14:00 18.9 31.3 30.3 39 17.8 14.5
## 2 81 16-11-2020 09:00 6.9 2.15 0.94 21.7 2.24 1.06
## 3 517 03-12-2020 22:00 18 18.7 16.6 31.5 11.1 9.16
## 4 1118 08-01-2021 19:00 7 0.932 0.508 23 1.51 0.458
## 5 32 14-11-2020 07:00 26.3 31.6 29.9 43 19.5 16.4
## 6 904 22-12-2020 22:00 26 8.98 7.08 27.5 5.37 4.33
## 7 859 20-12-2020 22:00 8 3.65 1.96 23.9 2.88 1.92
## 8 329 26-11-2020 18:00 12 11.7 9.36 27.8 6.96 5.87
## 9 97 17-11-2020 01:00 10.5 7.92 6.23 25.1 5.67 4.38
## 10 961 25-12-2020 15:00 17 6.67 5.06 27 5.06 3.43
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 1: SNGCJA5 VS SPS30
df %>% select(SNGCJA5, SPS30) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.69 |
9.75 |
0.00 |
1.53 |
4.81 |
9.76 |
236.87 |
▇▁▁▁▁ |
| SPS30 |
0 |
1 |
7.32 |
6.66 |
0.08 |
1.97 |
5.52 |
10.65 |
51.50 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ SPS30)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.496
ggplot(df, aes(x = SNGCJA5, y = SPS30)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 SPS30",
title = "Relationship between SNGCJA5 and SPS30") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(SPS30 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 5.05 0.21 24.0 0 4.64 5.46
## 2 SNGCJA5 0.339 0.018 19.1 0 0.304 0.374
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID SPS30 SNGCJA5 SPS30_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.9 0 5.05 -4.15
## 2 2 1.02 0.18 5.11 -4.09
## 3 3 1.25 0.45 5.20 -3.96
## 4 4 1.78 0.9 5.36 -3.58
## 5 5 2.6 1.51 5.56 -2.96
## 6 6 3 1.81 5.66 -2.66
## 7 7 4.11 2.86 6.02 -1.91
## 8 8 4.94 3.39 6.20 -1.26
## 9 9 5.48 3.48 6.23 -0.751
## 10 10 11.7 8.23 7.84 3.86
## # ... with 1,105 more rows
#Caso 2: SNGCJA5 VS HPMA115S0
df %>% select(SNGCJA5, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.69 |
9.75 |
0.0 |
1.53 |
4.81 |
9.76 |
236.87 |
▇▁▁▁▁ |
| HPMA115S0 |
0 |
1 |
28.48 |
7.27 |
18.6 |
23.10 |
26.40 |
31.05 |
77.70 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.500
ggplot(df, aes(x = SNGCJA5, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 HPMA115S0",
title = "Relationship between SNGCJA5 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 26.0 0.229 114. 0 25.5 26.4
## 2 SNGCJA5 0.373 0.019 19.2 0 0.335 0.411
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID HPMA115S0 SNGCJA5 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 18.6 0 26.0 -7.39
## 2 2 19.1 0.18 26.1 -6.96
## 3 3 19.4 0.45 26.2 -6.76
## 4 4 19.7 0.9 26.3 -6.63
## 5 5 20.4 1.51 26.6 -6.16
## 6 6 20.6 1.81 26.7 -6.07
## 7 7 22 2.86 27.1 -5.06
## 8 8 22.8 3.39 27.3 -4.46
## 9 9 23.7 3.48 27.3 -3.59
## 10 10 28.6 8.23 29.1 -0.459
## # ... with 1,105 more rows
#Caso 3: SNGCJA5 VS PMSA003
df %>% select(SNGCJA5, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.69 |
9.75 |
0 |
1.53 |
4.81 |
9.76 |
236.87 |
▇▁▁▁▁ |
| PMSA003 |
0 |
1 |
10.46 |
11.99 |
0 |
0.85 |
6.48 |
16.20 |
89.00 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.503
ggplot(df, aes(x = SNGCJA5, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMSA003",
title = "Relationship between SNGCJA5 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 6.33 0.376 16.8 0 5.59 7.07
## 2 SNGCJA5 0.618 0.032 19.4 0 0.556 0.681
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID PMSA003 SNGCJA5 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.017 0 6.33 -6.31
## 2 2 0 0.18 6.44 -6.44
## 3 3 1.61 0.45 6.61 -5.00
## 4 4 0.41 0.9 6.88 -6.47
## 5 5 1.72 1.51 7.26 -5.54
## 6 6 2.65 1.81 7.45 -4.80
## 7 7 4.04 2.86 8.10 -4.06
## 8 8 5.35 3.39 8.42 -3.07
## 9 9 6.25 3.48 8.48 -2.23
## 10 10 18.5 8.23 11.4 7.08
## # ... with 1,105 more rows
#Caso 4: SNGCJA5 VS PMS7003
df %>% select(SNGCJA5, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.69 |
9.75 |
0 |
1.53 |
4.81 |
9.76 |
236.87 |
▇▁▁▁▁ |
| PMS7003 |
0 |
1 |
11.60 |
11.87 |
0 |
1.94 |
8.34 |
17.90 |
91.80 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.496
ggplot(df, aes(x = SNGCJA5, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMS7003",
title = "Relationship between SNGCJA5 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 7.56 0.374 20.2 0 6.83 8.30
## 2 SNGCJA5 0.604 0.032 19.1 0 0.542 0.666
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID PMS7003 SNGCJA5 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0 0 7.56 -7.56
## 2 2 0.29 0.18 7.67 -7.38
## 3 3 0.71 0.45 7.84 -7.13
## 4 4 1.4 0.9 8.11 -6.71
## 5 5 3.12 1.51 8.48 -5.36
## 6 6 4.16 1.81 8.66 -4.50
## 7 7 6.13 2.86 9.29 -3.16
## 8 8 7.61 3.39 9.61 -2.00
## 9 9 8.4 3.48 9.67 -1.27
## 10 10 20.2 8.23 12.5 7.66
## # ... with 1,105 more rows
#Caso 5: SNGCJA5 VS Oficial
df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.69 |
9.75 |
0 |
1.53 |
4.81 |
9.76 |
236.87 |
▇▁▁▁▁ |
| Oficial |
0 |
1 |
12.80 |
8.32 |
0 |
6.50 |
11.20 |
17.70 |
55.40 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.275
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
title = "Relationship between SNGCJA5 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 11.2 0.291 38.7 0 10.7 11.8
## 2 SNGCJA5 0.234 0.025 9.53 0 0.186 0.283
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID Oficial SNGCJA5 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 3.7 0 11.2 -7.53
## 2 2 0.3 0.18 11.3 -11.0
## 3 3 5.9 0.45 11.3 -5.44
## 4 4 8.1 0.9 11.4 -3.34
## 5 5 3.7 1.51 11.6 -7.89
## 6 6 4 1.81 11.7 -7.66
## 7 7 4.8 2.86 11.9 -7.10
## 8 8 6 3.39 12.0 -6.03
## 9 9 8.3 3.48 12.0 -3.75
## 10 10 9.8 8.23 13.2 -3.36
## # ... with 1,105 more rows
#Caso 6: SPS30 VS HPMA115S0
df %>% select(SPS30, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
7.32 |
6.66 |
0.08 |
1.97 |
5.52 |
10.65 |
51.5 |
▇▂▁▁▁ |
| HPMA115S0 |
0 |
1 |
28.48 |
7.27 |
18.60 |
23.10 |
26.40 |
31.05 |
77.7 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.983
ggplot(df, aes(x = SPS30, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 HPMA115S0",
title = "Relationship between SPS30 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 20.6 0.059 348. 0 20.5 20.7
## 2 SPS30 1.07 0.006 179. 0 1.06 1.08
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID HPMA115S0 SPS30 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 18.6 0.9 21.6 -3.00
## 2 2 19.1 1.02 21.7 -2.62
## 3 3 19.4 1.25 22.0 -2.57
## 4 4 19.7 1.78 22.5 -2.84
## 5 5 20.4 2.6 23.4 -3.02
## 6 6 20.6 3 23.8 -3.25
## 7 7 22 4.11 25.0 -3.04
## 8 8 22.8 4.94 25.9 -3.13
## 9 9 23.7 5.48 26.5 -2.81
## 10 10 28.6 11.7 33.2 -4.59
## # ... with 1,105 more rows
#Caso 7: SPS30 VS PMSA003
df %>% select(SPS30, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
7.32 |
6.66 |
0.08 |
1.97 |
5.52 |
10.65 |
51.5 |
▇▂▁▁▁ |
| PMSA003 |
0 |
1 |
10.46 |
11.99 |
0.00 |
0.85 |
6.48 |
16.20 |
89.0 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.996
ggplot(df, aes(x = SPS30, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMSA003",
title = "Relationship between SPS30 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -2.65 0.048 -55.2 0 -2.75 -2.56
## 2 SPS30 1.79 0.005 369. 0 1.78 1.80
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID PMSA003 SPS30 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.017 0.9 -1.04 1.06
## 2 2 0 1.02 -0.826 0.826
## 3 3 1.61 1.25 -0.414 2.02
## 4 4 0.41 1.78 0.536 -0.126
## 5 5 1.72 2.6 2.01 -0.286
## 6 6 2.65 3 2.72 -0.073
## 7 7 4.04 4.11 4.71 -0.672
## 8 8 5.35 4.94 6.2 -0.85
## 9 9 6.25 5.48 7.17 -0.918
## 10 10 18.5 11.7 18.3 0.183
## # ... with 1,105 more rows
#Caso 8: SPS30 VS PMS7003
df %>% select(SPS30, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
7.32 |
6.66 |
0.08 |
1.97 |
5.52 |
10.65 |
51.5 |
▇▂▁▁▁ |
| PMS7003 |
0 |
1 |
11.60 |
11.87 |
0.00 |
1.94 |
8.34 |
17.90 |
91.8 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.996
ggplot(df, aes(x = SPS30, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMS7003",
title = "Relationship between SPS30 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -1.38 0.049 -28.1 0 -1.48 -1.29
## 2 SPS30 1.78 0.005 357. 0 1.76 1.78
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID PMS7003 SPS30 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0 0.9 0.213 -0.213
## 2 2 0.29 1.02 0.426 -0.136
## 3 3 0.71 1.25 0.834 -0.124
## 4 4 1.4 1.78 1.78 -0.375
## 5 5 3.12 2.6 3.23 -0.11
## 6 6 4.16 3 3.94 0.219
## 7 7 6.13 4.11 5.91 0.219
## 8 8 7.61 4.94 7.38 0.226
## 9 9 8.4 5.48 8.34 0.057
## 10 10 20.2 11.7 19.4 0.817
## # ... with 1,105 more rows
#Caso 9: SPS30 VS Oficial
df %>% select(SPS30, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
7.32 |
6.66 |
0.08 |
1.97 |
5.52 |
10.65 |
51.5 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.80 |
8.32 |
0.00 |
6.50 |
11.20 |
17.70 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.565
ggplot(df, aes(x = SPS30, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 Oficial",
title = "Relationship between SPS30 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 7.64 0.306 25.0 0 7.04 8.24
## 2 SPS30 0.705 0.031 22.8 0 0.645 0.766
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID Oficial SPS30 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 3.7 0.9 8.27 -4.57
## 2 2 0.3 1.02 8.36 -8.06
## 3 3 5.9 1.25 8.52 -2.62
## 4 4 8.1 1.78 8.89 -0.793
## 5 5 3.7 2.6 9.47 -5.77
## 6 6 4 3 9.75 -5.75
## 7 7 4.8 4.11 10.5 -5.74
## 8 8 6 4.94 11.1 -5.12
## 9 9 8.3 5.48 11.5 -3.20
## 10 10 9.8 11.7 15.9 -6.09
## # ... with 1,105 more rows
#Caso 10: HPMA115S0 VS PMSA003
df %>% select(HPMA115S0, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
28.48 |
7.27 |
18.6 |
23.10 |
26.40 |
31.05 |
77.7 |
▇▂▁▁▁ |
| PMSA003 |
0 |
1 |
10.46 |
11.99 |
0.0 |
0.85 |
6.48 |
16.20 |
89.0 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.984
ggplot(df, aes(x = HPMA115S0, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMSA003",
title = "Relationship between HPMA115S0 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -35.8 0.256 -140. 0 -36.3 -35.2
## 2 HPMA115S0 1.62 0.009 186. 0 1.60 1.64
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID PMSA003 HPMA115S0 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.017 18.6 -5.58 5.59
## 2 2 0 19.1 -4.76 4.76
## 3 3 1.61 19.4 -4.28 5.89
## 4 4 0.41 19.7 -3.79 4.2
## 5 5 1.72 20.4 -2.65 4.37
## 6 6 2.65 20.6 -2.33 4.98
## 7 7 4.04 22 -0.058 4.10
## 8 8 5.35 22.8 1.24 4.11
## 9 9 6.25 23.7 2.7 3.55
## 10 10 18.5 28.6 10.6 7.85
## # ... with 1,105 more rows
#Caso 11: HPMA115S0 VS PMS7003
df %>% select(HPMA115S0, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
28.48 |
7.27 |
18.6 |
23.10 |
26.40 |
31.05 |
77.7 |
▇▂▁▁▁ |
| PMS7003 |
0 |
1 |
11.60 |
11.87 |
0.0 |
1.94 |
8.34 |
17.90 |
91.8 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.978
ggplot(df, aes(x = HPMA115S0, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMS7003",
title = "Relationship between HPMA115S0 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -33.9 0.298 -114. 0 -34.5 -33.3
## 2 HPMA115S0 1.60 0.01 158. 0 1.58 1.62
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID PMS7003 HPMA115S0 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0 18.6 -4.18 4.18
## 2 2 0.29 19.1 -3.39 3.68
## 3 3 0.71 19.4 -2.91 3.62
## 4 4 1.4 19.7 -2.43 3.83
## 5 5 3.12 20.4 -1.31 4.43
## 6 6 4.16 20.6 -0.99 5.15
## 7 7 6.13 22 1.25 4.88
## 8 8 7.61 22.8 2.52 5.08
## 9 9 8.4 23.7 3.96 4.44
## 10 10 20.2 28.6 11.8 8.41
## # ... with 1,105 more rows
#Caso 10: HPMA115S0 VS Oficial
df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
28.48 |
7.27 |
18.6 |
23.1 |
26.4 |
31.05 |
77.7 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.80 |
8.32 |
0.0 |
6.5 |
11.2 |
17.70 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.557
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
title = "Relationship between HPMA115S0 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -5.35 0.837 -6.40 0 -7.00 -3.71
## 2 HPMA115S0 0.637 0.028 22.4 0 0.581 0.693
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID Oficial HPMA115S0 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 3.7 18.6 6.5 -2.8
## 2 2 0.3 19.1 6.82 -6.52
## 3 3 5.9 19.4 7.01 -1.11
## 4 4 8.1 19.7 7.20 0.899
## 5 5 3.7 20.4 7.65 -3.95
## 6 6 4 20.6 7.78 -3.78
## 7 7 4.8 22 8.67 -3.87
## 8 8 6 22.8 9.18 -3.18
## 9 9 8.3 23.7 9.75 -1.45
## 10 10 9.8 28.6 12.9 -3.07
## # ... with 1,105 more rows
#Caso 11: PMSA003 VS PMS7003
df %>% select(PMSA003, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
10.46 |
11.99 |
0 |
0.85 |
6.48 |
16.2 |
89.0 |
▇▂▁▁▁ |
| PMS7003 |
0 |
1 |
11.60 |
11.87 |
0 |
1.94 |
8.34 |
17.9 |
91.8 |
▇▂▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.996
ggplot(df, aes(x = PMSA003, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 PMS7003",
title = "Relationship between PMSA003 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 1.28 0.042 30.5 0 1.2 1.36
## 2 PMSA003 0.987 0.003 374. 0 0.982 0.992
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID PMS7003 PMSA003 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0 0.017 1.30 -1.30
## 2 2 0.29 0 1.28 -0.992
## 3 3 0.71 1.61 2.87 -2.16
## 4 4 1.4 0.41 1.69 -0.287
## 5 5 3.12 1.72 2.98 0.141
## 6 6 4.16 2.65 3.90 0.263
## 7 7 6.13 4.04 5.27 0.862
## 8 8 7.61 5.35 6.56 1.05
## 9 9 8.4 6.25 7.45 0.951
## 10 10 20.2 18.5 19.5 0.664
## # ... with 1,105 more rows
#Caso 12: PMSA003 VS Oficial
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
10.46 |
11.99 |
0 |
0.85 |
6.48 |
16.2 |
89.0 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.80 |
8.32 |
0 |
6.50 |
11.20 |
17.7 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.560
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 8.73 0.274 31.9 0 8.20 9.27
## 2 PMSA003 0.389 0.017 22.5 0 0.355 0.422
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 3.7 0.017 8.74 -5.04
## 2 2 0.3 0 8.73 -8.43
## 3 3 5.9 1.61 9.36 -3.46
## 4 4 8.1 0.41 8.89 -0.793
## 5 5 3.7 1.72 9.40 -5.70
## 6 6 4 2.65 9.76 -5.76
## 7 7 4.8 4.04 10.3 -5.50
## 8 8 6 5.35 10.8 -4.81
## 9 9 8.3 6.25 11.2 -2.86
## 10 10 9.8 18.5 15.9 -6.12
## # ... with 1,105 more rows
#Caso 13: PMS7003 VS Oficial
df %>% select(PMS7003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1115 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMS7003 |
0 |
1 |
11.6 |
11.87 |
0 |
1.94 |
8.34 |
17.9 |
91.8 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.8 |
8.32 |
0 |
6.50 |
11.20 |
17.7 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.563
ggplot(df, aes(x = PMS7003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMS7003", y = "PM25 Oficial",
title = "Relationship between PMS7003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model, digits = 11)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 8.22 0.288 28.5 0 7.66 8.79
## 2 PMS7003 0.394 0.0174 22.7 0 0.360 0.428
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,115 x 5
## ID Oficial PMS7003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 3.7 0 8.22 -4.52
## 2 2 0.3 0.29 8.34 -8.04
## 3 3 5.9 0.71 8.50 -2.60
## 4 4 8.1 1.4 8.78 -0.675
## 5 5 3.7 3.12 9.45 -5.75
## 6 6 4 4.16 9.86 -5.86
## 7 7 4.8 6.13 10.6 -5.84
## 8 8 6 7.61 11.2 -5.22
## 9 9 8.3 8.4 11.5 -3.24
## 10 10 9.8 20.2 16.2 -6.39
## # ... with 1,105 more rows
# **ESTACION FERIAS -1hora VS SENSORES BAJO COSTO**
# **Prueba con los valores de la estacion Ferias retrasada 1 hora
# **5 sensores diferentes: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
# **Comparaciones con la estación oficial*
df <- read_excel("C:/Mediciones/FERIAS_1h.xlsx")
View(df)
glimpse(df)
## Rows: 1,114
## Columns: 8
## $ Num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 1~
## $ Fecha <chr> "12-11-2020 24:00", "13-11-2020 01:00", "13-11-2020 02:00", ~
## $ Oficial <dbl> 0.3, 5.9, 8.1, 3.7, 4.0, 4.8, 6.0, 8.3, 9.8, 16.5, 6.2, 10.2~
## $ PMS7003 <dbl> 0.00, 0.29, 0.71, 1.40, 3.12, 4.16, 6.13, 7.61, 8.40, 20.20,~
## $ PMSA003 <dbl> 0.0169, 0.0000, 1.6100, 0.4100, 1.7200, 2.6500, 4.0400, 5.35~
## $ HPMA115S0 <dbl> 18.6, 19.1, 19.4, 19.7, 20.4, 20.6, 22.0, 22.8, 23.7, 28.6, ~
## $ SPS30 <dbl> 0.90, 1.02, 1.25, 1.78, 2.60, 3.00, 4.11, 4.94, 5.48, 11.70,~
## $ SNGCJA5 <dbl> 0.00, 0.18, 0.45, 0.90, 1.51, 1.81, 2.86, 3.39, 3.48, 8.23, ~
df %>%
sample_n(size = 10)
## # A tibble: 10 x 8
## Num Fecha Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 122 18-11-2020 03:00 8.6 7.83 6.21 24.8 6.09 4.07
## 2 383 28-11-2020 23:00 13.2 8.57 6.41 25.2 5.66 4.09
## 3 999 27-12-2020 05:00 12 6.56 5.63 26.5 5.14 3.42
## 4 88 16-11-2020 16:00 19.7 27.9 28 37.9 17.7 14.1
## 5 293 25-11-2020 06:00 8.4 5.21 2.91 22.8 3.27 2.23
## 6 1104 08-01-2021 05:00 15 11.8 10.8 28.5 7.5 5
## 7 662 12-12-2020 04:00 7.7 9.91 7.91 27.4 6.27 4.98
## 8 140 18-11-2020 21:00 15.4 17.2 14.4 29.6 9.61 8.23
## 9 1152 10-01-2021 02:00 7 2.98 1.73 23.5 2.77 237.
## 10 225 22-11-2020 10:00 19.5 14.5 13.5 30.4 10.2 6.77
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 14: SNGCJA5 VS Oficial-1h
df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1114 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.68 |
9.75 |
0 |
1.53 |
4.8 |
9.75 |
236.87 |
▇▁▁▁▁ |
| Oficial |
0 |
1 |
12.81 |
8.32 |
0 |
6.50 |
11.2 |
17.70 |
55.40 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.347
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
title = "Relationship between SNGCJA5 and Oficial-1hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 10.8 0.283 38.2 0 10.3 11.4
## 2 SNGCJA5 0.296 0.024 12.4 0 0.249 0.343
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
## ID Oficial SNGCJA5 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.3 0 10.8 -10.5
## 2 2 5.9 0.18 10.9 -4.98
## 3 3 8.1 0.45 11.0 -2.86
## 4 4 3.7 0.9 11.1 -7.39
## 5 5 4 1.51 11.3 -7.28
## 6 6 4.8 1.81 11.4 -6.56
## 7 7 6 2.86 11.7 -5.68
## 8 8 8.3 3.39 11.8 -3.53
## 9 9 9.8 3.48 11.9 -2.06
## 10 10 16.5 8.23 13.3 3.23
## # ... with 1,104 more rows
#Caso 15: SPS30 VS Oficial-1
df %>% select(SPS30, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1114 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
7.32 |
6.66 |
0.08 |
1.97 |
5.53 |
10.67 |
51.5 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.81 |
8.32 |
0.00 |
6.50 |
11.20 |
17.70 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.712
ggplot(df, aes(x = SPS30, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 Oficial",
title = "Relationship between SPS30 and Oficial-1hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 6.3 0.26 24.2 0 5.79 6.81
## 2 SPS30 0.889 0.026 33.8 0 0.837 0.94
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
## ID Oficial SPS30 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.3 0.9 7.1 -6.8
## 2 2 5.9 1.02 7.21 -1.31
## 3 3 8.1 1.25 7.41 0.689
## 4 4 3.7 1.78 7.88 -4.18
## 5 5 4 2.6 8.61 -4.61
## 6 6 4.8 3 8.97 -4.17
## 7 7 6 4.11 9.95 -3.95
## 8 8 8.3 4.94 10.7 -2.39
## 9 9 9.8 5.48 11.2 -1.37
## 10 10 16.5 11.7 16.7 -0.196
## # ... with 1,104 more rows
#Caso 16: HPMA115S0 VS Oficial-1h
df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1114 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
28.49 |
7.27 |
18.6 |
23.1 |
26.4 |
31.08 |
77.7 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.81 |
8.32 |
0.0 |
6.5 |
11.2 |
17.70 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.711
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
title = "Relationship between HPMA115S0 and Oficial-1hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -10.4 0.709 -14.6 0 -11.8 -8.97
## 2 HPMA115S0 0.813 0.024 33.7 0 0.766 0.861
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
## ID Oficial HPMA115S0 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.3 18.6 4.76 -4.46
## 2 2 5.9 19.1 5.17 0.729
## 3 3 8.1 19.4 5.42 2.68
## 4 4 3.7 19.7 5.66 -1.96
## 5 5 4 20.4 6.23 -2.23
## 6 6 4.8 20.6 6.39 -1.59
## 7 7 6 22 7.53 -1.53
## 8 8 8.3 22.8 8.18 0.12
## 9 9 9.8 23.7 8.91 0.888
## 10 10 16.5 28.6 12.9 3.60
## # ... with 1,104 more rows
#Caso 17: PMSA003 VS Oficial-1h
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1114 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
10.47 |
11.99 |
0 |
0.85 |
6.49 |
16.2 |
89.0 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.81 |
8.32 |
0 |
6.50 |
11.20 |
17.7 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.711
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial-1hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 7.64 0.233 32.8 0 7.19 8.1
## 2 PMSA003 0.493 0.015 33.7 0 0.464 0.522
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.3 0.017 7.65 -7.35
## 2 2 5.9 0 7.64 -1.74
## 3 3 8.1 1.61 8.44 -0.337
## 4 4 3.7 0.41 7.85 -4.15
## 5 5 4 1.72 8.49 -4.49
## 6 6 4.8 2.65 8.95 -4.15
## 7 7 6 4.04 9.64 -3.64
## 8 8 8.3 5.35 10.3 -1.98
## 9 9 9.8 6.25 10.7 -0.925
## 10 10 16.5 18.5 16.8 -0.266
## # ... with 1,104 more rows
#Caso 18: PMS7003 VS Oficial-1h
df %>% select(PMS7003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1114 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMS7003 |
0 |
1 |
11.61 |
11.87 |
0 |
1.95 |
8.37 |
17.9 |
91.8 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.81 |
8.32 |
0 |
6.50 |
11.20 |
17.7 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.713
ggplot(df, aes(x = PMS7003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMS7003", y = "PM25 Oficial",
title = "Relationship between PMS7003 and Oficial-1hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model, digits = 11)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 7.00 0.244 28.7 0 6.52 7.48
## 2 PMS7003 0.500 0.0147 34.0 0 0.471 0.529
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,114 x 5
## ID Oficial PMS7003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.3 0 7.00 -6.70
## 2 2 5.9 0.29 7.15 -1.25
## 3 3 8.1 0.71 7.36 0.742
## 4 4 3.7 1.4 7.70 -4.00
## 5 5 4 3.12 8.56 -4.56
## 6 6 4.8 4.16 9.08 -4.28
## 7 7 6 6.13 10.1 -4.07
## 8 8 8.3 7.61 10.8 -2.51
## 9 9 9.8 8.4 11.2 -1.40
## 10 10 16.5 20.2 17.1 -0.598
## # ... with 1,104 more rows
# **ESTACION FERIAS -2horas VS SENSORES BAJO COSTO**
# **Prueba con los valores de la estacion Ferias retrasada 2 horas
# **5 sensores diferentes: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
# **Comparaciones con la estación oficial*
df <- read_excel("C:/Mediciones/FERIAS_2h.xlsx")
View(df)
glimpse(df)
## Rows: 1,113
## Columns: 8
## $ Num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 1~
## $ Fecha <chr> "12-11-2020 24:00", "13-11-2020 01:00", "13-11-2020 02:00", ~
## $ Oficial <dbl> 5.9, 8.1, 3.7, 4.0, 4.8, 6.0, 8.3, 9.8, 16.5, 6.2, 10.2, 16.~
## $ PMS7003 <dbl> 0.00, 0.29, 0.71, 1.40, 3.12, 4.16, 6.13, 7.61, 8.40, 20.20,~
## $ PMSA003 <dbl> 0.0169, 0.0000, 1.6100, 0.4100, 1.7200, 2.6500, 4.0400, 5.35~
## $ HPMA115S0 <dbl> 18.6, 19.1, 19.4, 19.7, 20.4, 20.6, 22.0, 22.8, 23.7, 28.6, ~
## $ SPS30 <dbl> 0.90, 1.02, 1.25, 1.78, 2.60, 3.00, 4.11, 4.94, 5.48, 11.70,~
## $ SNGCJA5 <dbl> 0.00, 0.18, 0.45, 0.90, 1.51, 1.81, 2.86, 3.39, 3.48, 8.23, ~
df %>%
sample_n(size = 10)
## # A tibble: 10 x 8
## Num Fecha Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1031 28-12-2020 18:00 16 13 11 29 8 6
## 2 287 24-11-2020 24:00 9.9 6.69 4.71 23.9 4.38 3.29
## 3 1177 11-01-2021 03:00 8 0 0 21.9 0.407 15.6
## 4 493 03-12-2020 10:00 3.2 0.44 0.0962 22.2 1.13 0.15
## 5 452 01-12-2020 20:00 12.5 5.34 3.23 23.8 3.55 2.5
## 6 885 21-12-2020 24:00 8 3.71 2.55 24.6 3.13 2.16
## 7 911 23-12-2020 05:00 24 11.3 9.18 27.6 6.18 4.84
## 8 192 21-11-2020 01:00 3.6 1.37 0.44 22 1.86 1.09
## 9 983 26-12-2020 13:00 13 2.5 1.5 24.8 2.79 1.57
## 10 976 26-12-2020 06:00 10 10.3 9.92 28.3 7.54 5.54
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 19: SNGCJA5 VS Oficial-2h
df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1113 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
6.67 |
9.75 |
0 |
1.53 |
4.8 |
9.75 |
236.87 |
▇▁▁▁▁ |
| Oficial |
0 |
1 |
12.82 |
8.31 |
0 |
6.50 |
11.2 |
17.70 |
55.40 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.405
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
title = "Relationship between SNGCJA5 and Oficial-2hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 10.5 0.276 38.1 0 9.97 11.1
## 2 SNGCJA5 0.345 0.023 14.8 0 0.299 0.391
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
## ID Oficial SNGCJA5 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 5.9 0 10.5 -4.62
## 2 2 8.1 0.18 10.6 -2.48
## 3 3 3.7 0.45 10.7 -6.97
## 4 4 4 0.9 10.8 -6.83
## 5 5 4.8 1.51 11.0 -6.24
## 6 6 6 1.81 11.1 -5.14
## 7 7 8.3 2.86 11.5 -3.20
## 8 8 9.8 3.39 11.7 -1.89
## 9 9 16.5 3.48 11.7 4.78
## 10 10 6.2 8.23 13.4 -7.16
## # ... with 1,103 more rows
#Caso 20: SPS30 VS Oficial-2
df %>% select(SPS30, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1113 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
7.33 |
6.66 |
0.08 |
1.97 |
5.53 |
10.7 |
51.5 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.82 |
8.31 |
0.00 |
6.50 |
11.20 |
17.7 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.807
ggplot(df, aes(x = SPS30, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 Oficial",
title = "Relationship between SPS30 and Oficial-2hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 5.44 0.219 24.8 0 5.01 5.87
## 2 SPS30 1.01 0.022 45.5 0 0.964 1.05
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
## ID Oficial SPS30 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 5.9 0.9 6.34 -0.445
## 2 2 8.1 1.02 6.47 1.63
## 3 3 3.7 1.25 6.70 -3.00
## 4 4 4 1.78 7.23 -3.23
## 5 5 4.8 2.6 8.06 -3.26
## 6 6 6 3 8.46 -2.46
## 7 7 8.3 4.11 9.58 -1.28
## 8 8 9.8 4.94 10.4 -0.613
## 9 9 16.5 5.48 11.0 5.54
## 10 10 6.2 11.7 17.2 -11.0
## # ... with 1,103 more rows
#Caso 21: HPMA115S0 VS Oficial-2h
df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1113 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
28.49 |
7.27 |
18.6 |
23.1 |
26.4 |
31.1 |
77.7 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.82 |
8.31 |
0.0 |
6.5 |
11.2 |
17.7 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.809
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
title = "Relationship between HPMA115S0 and Oficial-2hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -13.5 0.593 -22.8 0 -14.7 -12.3
## 2 HPMA115S0 0.924 0.02 45.8 0 0.884 0.963
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
## ID Oficial HPMA115S0 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 5.9 18.6 3.68 2.22
## 2 2 8.1 19.1 4.14 3.96
## 3 3 3.7 19.4 4.42 -0.716
## 4 4 4 19.7 4.69 -0.693
## 5 5 4.8 20.4 5.34 -0.54
## 6 6 6 20.6 5.52 0.475
## 7 7 8.3 22 6.82 1.48
## 8 8 9.8 22.8 7.56 2.24
## 9 9 16.5 23.7 8.39 8.11
## 10 10 6.2 28.6 12.9 -6.72
## # ... with 1,103 more rows
#Caso 22: PMSA003 VS Oficial-2h
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1113 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
10.48 |
11.99 |
0 |
0.85 |
6.5 |
16.2 |
89.0 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.82 |
8.31 |
0 |
6.50 |
11.2 |
17.7 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.806
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial-2hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 6.96 0.196 35.5 0 6.58 7.35
## 2 PMSA003 0.559 0.012 45.3 0 0.534 0.583
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 5.9 0.017 6.97 -1.07
## 2 2 8.1 0 6.96 1.14
## 3 3 3.7 1.61 7.86 -4.16
## 4 4 4 0.41 7.19 -3.19
## 5 5 4.8 1.72 7.92 -3.12
## 6 6 6 2.65 8.44 -2.44
## 7 7 8.3 4.04 9.22 -0.921
## 8 8 9.8 5.35 9.95 -0.152
## 9 9 16.5 6.25 10.5 6.04
## 10 10 6.2 18.5 17.3 -11.1
## # ... with 1,103 more rows
#Caso 23: PMS7003 VS Oficial-2h
df %>% select(PMS7003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1113 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMS7003 |
0 |
1 |
11.62 |
11.87 |
0 |
1.97 |
8.4 |
17.9 |
91.8 |
▇▂▁▁▁ |
| Oficial |
0 |
1 |
12.82 |
8.31 |
0 |
6.50 |
11.2 |
17.7 |
55.4 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.811
ggplot(df, aes(x = PMS7003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMS7003", y = "PM25 Oficial",
title = "Relationship between PMS7003 and Oficial-2hour") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model, digits = 11)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 6.22 0.204 30.5 0 5.82 6.62
## 2 PMS7003 0.567 0.0123 46.1 0 0.543 0.592
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,113 x 5
## ID Oficial PMS7003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 5.9 0 6.22 -0.321
## 2 2 8.1 0.29 6.39 1.71
## 3 3 3.7 0.71 6.62 -2.92
## 4 4 4 1.4 7.02 -3.02
## 5 5 4.8 3.12 7.99 -3.19
## 6 6 6 4.16 8.58 -2.58
## 7 7 8.3 6.13 9.7 -1.4
## 8 8 9.8 7.61 10.5 -0.74
## 9 9 16.5 8.4 11.0 5.51
## 10 10 6.2 20.2 17.7 -11.5
## # ... with 1,103 more rows