library(readxl)
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.1 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(moderndive)
library(skimr)
# **ESTACION PAIBA VS CANAIRIOS**
# **5 different sensors: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
df <- read_excel("C:/Mediciones/PAIBA_CANAIRIOS.xlsx")
View(df)
glimpse(df)
## Rows: 1,241
## Columns: 8
## $ Num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha <dttm> 2020-11-07 00:00:00, 2020-11-07 01:00:00, 2020-11-07 02:...
## $ Oficial <dbl> 15.99, 12.09, 10.31, 13.64, 16.51, 18.62, 18.97, 22.07, 2...
## $ PMS7003 <dbl> 33.574074, 27.057692, 22.096154, 37.192308, 46.180000, 47...
## $ PMSA003 <dbl> 31.666667, 25.134615, 20.115385, 35.923077, 44.300000, 47...
## $ HPMA115S0 <dbl> 19.333333, 14.730769, 12.230769, 19.750000, 24.360000, 25...
## $ SPS30 <dbl> 17.185185, 14.115385, 11.634615, 19.442308, 23.260000, 24...
## $ SNGCJA5 <dbl> 13.259259, 10.634615, 8.673077, 14.846154, 17.900000, 19....
df %>%
sample_n(size = 10)
## # A tibble: 10 x 8
## Num Fecha Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
## <dbl> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 542 2020-11-30 21:00:00 8.65 18.7 18.3 11.3 10.5 7.45
## 2 1216 2020-12-28 23:00:00 11.1 7.62 6.13 5.25 5.09 2.93
## 3 1095 2020-12-23 22:00:00 12 19.3 20.0 10.6 10.8 7.55
## 4 700 2020-12-07 11:00:00 32.2 30.2 28.5 16.2 14.9 10.7
## 5 808 2020-12-11 23:00:00 18.2 18.0 18.5 11.4 10.3 7.87
## 6 1108 2020-12-24 11:00:00 12.8 5.21 3.86 3.72 3.61 1.68
## 7 408 2020-11-25 07:00:00 9.7 21.9 22.8 11.3 11.1 8.63
## 8 255 2020-11-17 14:00:00 9.67 16.8 12.6 9.81 7.93 5.80
## 9 160 2020-11-13 15:00:00 10.9 17.4 15.5 11.4 8.89 6.82
## 10 724 2020-12-08 11:00:00 12.7 1.94 0.774 2.32 1.72 0.434
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 1: SNGCJA5 VS SPS30
df %>% select(SNGCJA5, SPS30) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
8.87 |
6.48 |
0.05 |
3.80 |
7.47 |
12.32 |
40.80 |
▇▅▂▁▁ |
| SPS30 |
0 |
1 |
12.25 |
8.36 |
1.03 |
5.78 |
10.41 |
16.59 |
51.67 |
▇▅▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ SPS30)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.995
ggplot(df, aes(x = SNGCJA5, y = SPS30)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 SPS30",
title = "Relationship between SNGCJA5 and SPS30") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(SPS30 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.875 0.042 20.9 0 0.793 0.957
## 2 SNGCJA5 1.28 0.004 336. 0 1.27 1.29
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID SPS30 SNGCJA5 SPS30_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 17.2 13.3 17.9 -0.688
## 2 2 14.1 10.6 14.5 -0.393
## 3 3 11.6 8.67 12.0 -0.359
## 4 4 19.4 14.8 19.9 -0.465
## 5 5 23.3 17.9 23.8 -0.563
## 6 6 25.0 19.3 25.7 -0.679
## 7 7 23.9 18.4 24.5 -0.56
## 8 8 27.5 22.1 29.2 -1.76
## 9 9 28.4 22.9 30.2 -1.75
## 10 10 39.4 30.7 40.3 -0.885
## # ... with 1,231 more rows
#Caso 2: SNGCJA5 VS HPMA115S0
df %>% select(SNGCJA5, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
8.87 |
6.48 |
0.05 |
3.80 |
7.47 |
12.32 |
40.80 |
▇▅▂▁▁ |
| HPMA115S0 |
0 |
1 |
12.88 |
8.60 |
1.35 |
6.15 |
11.05 |
17.25 |
51.89 |
▇▅▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.990
ggplot(df, aes(x = SNGCJA5, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 HPMA115S0",
title = "Relationship between SNGCJA5 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 1.24 0.059 20.9 0 1.12 1.36
## 2 SNGCJA5 1.31 0.005 243. 0 1.30 1.32
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID HPMA115S0 SNGCJA5 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 19.3 13.3 18.6 0.688
## 2 2 14.7 10.6 15.2 -0.469
## 3 3 12.2 8.67 12.6 -0.394
## 4 4 19.8 14.8 20.7 -0.978
## 5 5 24.4 17.9 24.7 -0.377
## 6 6 25 19.3 26.6 -1.62
## 7 7 24.5 18.4 25.4 -0.926
## 8 8 29.3 22.1 30.3 -0.987
## 9 9 30.8 22.9 31.3 -0.447
## 10 10 42.3 30.7 41.6 0.698
## # ... with 1,231 more rows
#Caso 3: SNGCJA5 VS PMSA003
df %>% select(SNGCJA5, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
8.87 |
6.48 |
0.05 |
3.80 |
7.47 |
12.32 |
40.80 |
▇▅▂▁▁ |
| PMSA003 |
0 |
1 |
22.22 |
17.04 |
0.04 |
8.44 |
18.74 |
31.61 |
108.45 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.996
ggplot(df, aes(x = SNGCJA5, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMSA003",
title = "Relationship between SNGCJA5 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.991 0.074 -13.3 0 -1.14 -0.845
## 2 SNGCJA5 2.62 0.007 387. 0 2.60 2.63
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID PMSA003 SNGCJA5 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 31.7 13.3 33.7 -2.04
## 2 2 25.1 10.6 26.8 -1.70
## 3 3 20.1 8.67 21.7 -1.59
## 4 4 35.9 14.8 37.9 -1.93
## 5 5 44.3 17.9 45.8 -1.55
## 6 6 47.1 19.3 49.6 -2.49
## 7 7 42.2 18.4 47.2 -5.00
## 8 8 52.0 22.1 56.9 -4.90
## 9 9 55.0 22.9 58.9 -3.87
## 10 10 75.5 30.7 79.4 -3.94
## # ... with 1,231 more rows
#Caso 4: SNGCJA5 VS PMS7003
df %>% select(SNGCJA5, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
8.87 |
6.48 |
0.05 |
3.80 |
7.47 |
12.32 |
40.80 |
▇▅▂▁▁ |
| PMS7003 |
0 |
1 |
22.17 |
15.69 |
0.70 |
9.64 |
19.25 |
30.73 |
93.25 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.987
ggplot(df, aes(x = SNGCJA5, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMS7003",
title = "Relationship between SNGCJA5 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.984 0.122 8.08 0 0.745 1.22
## 2 SNGCJA5 2.39 0.011 215. 0 2.37 2.41
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID PMS7003 SNGCJA5 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 33.6 13.3 32.7 0.92
## 2 2 27.1 10.6 26.4 0.673
## 3 3 22.1 8.67 21.7 0.396
## 4 4 37.2 14.8 36.4 0.748
## 5 5 46.2 17.9 43.7 2.44
## 6 6 47.7 19.3 47.2 0.561
## 7 7 44.4 18.4 45.0 -0.576
## 8 8 52.9 22.1 53.8 -0.889
## 9 9 54.0 22.9 55.6 -1.65
## 10 10 73.7 30.7 74.4 -0.667
## # ... with 1,231 more rows
#Caso 5: SNGCJA5 VS Oficial
df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
8.87 |
6.48 |
0.05 |
3.80 |
7.47 |
12.32 |
40.80 |
▇▅▂▁▁ |
| Oficial |
0 |
1 |
15.38 |
8.41 |
2.06 |
9.51 |
13.39 |
19.63 |
62.22 |
▇▅▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.564
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
title = "Relationship between SNGCJA5 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 8.88 0.334 26.6 0 8.22 9.54
## 2 SNGCJA5 0.732 0.03 24.1 0 0.673 0.792
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID Oficial SNGCJA5 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 16.0 13.3 18.6 -2.6
## 2 2 12.1 10.6 16.7 -4.58
## 3 3 10.3 8.67 15.2 -4.92
## 4 4 13.6 14.8 19.8 -6.11
## 5 5 16.5 17.9 22.0 -5.48
## 6 6 18.6 19.3 23.0 -4.42
## 7 7 19.0 18.4 22.4 -3.40
## 8 8 22.1 22.1 25.1 -3.00
## 9 9 22.6 22.9 25.6 -3.04
## 10 10 28.7 30.7 31.4 -2.67
## # ... with 1,231 more rows
#Caso 6: SPS30 VS HPMA115S0
df %>% select(SPS30, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
12.25 |
8.36 |
1.03 |
5.78 |
10.41 |
16.59 |
51.67 |
▇▅▂▁▁ |
| HPMA115S0 |
0 |
1 |
12.88 |
8.60 |
1.35 |
6.15 |
11.05 |
17.25 |
51.89 |
▇▅▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.994
ggplot(df, aes(x = SPS30, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 HPMA115S0",
title = "Relationship between SPS30 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.357 0.047 7.56 0 0.265 0.45
## 2 SPS30 1.02 0.003 321. 0 1.02 1.03
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID HPMA115S0 SPS30 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 19.3 17.2 17.9 1.40
## 2 2 14.7 14.1 14.8 -0.065
## 3 3 12.2 11.6 12.3 -0.027
## 4 4 19.8 19.4 20.2 -0.494
## 5 5 24.4 23.3 24.1 0.211
## 6 6 25 25.0 25.9 -0.91
## 7 7 24.5 23.9 24.8 -0.341
## 8 8 29.3 27.5 28.4 0.83
## 9 9 30.8 28.4 29.5 1.36
## 10 10 42.3 39.4 40.6 1.63
## # ... with 1,231 more rows
#Caso 7: SPS30 VS PMSA003
df %>% select(SPS30, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
12.25 |
8.36 |
1.03 |
5.78 |
10.41 |
16.59 |
51.67 |
▇▅▂▁▁ |
| PMSA003 |
0 |
1 |
22.22 |
17.04 |
0.04 |
8.44 |
18.74 |
31.61 |
108.45 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.993
ggplot(df, aes(x = SPS30, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMSA003",
title = "Relationship between SPS30 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -2.57 0.1 -25.7 0 -2.77 -2.38
## 2 SPS30 2.02 0.007 300. 0 2.01 2.04
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID PMSA003 SPS30 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 31.7 17.2 32.2 -0.552
## 2 2 25.1 14.1 26.0 -0.869
## 3 3 20.1 11.6 21.0 -0.866
## 4 4 35.9 19.4 36.8 -0.865
## 5 5 44.3 23.3 44.5 -0.217
## 6 6 47.1 25.0 48.0 -0.891
## 7 7 42.2 23.9 45.9 -3.67
## 8 8 52.0 27.5 53.0 -1.06
## 9 9 55.0 28.4 55.0 -0.031
## 10 10 75.5 39.4 77.2 -1.68
## # ... with 1,231 more rows
#Caso 8: SPS30 VS PMS7003
df %>% select(SPS30, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
12.25 |
8.36 |
1.03 |
5.78 |
10.41 |
16.59 |
51.67 |
▇▅▂▁▁ |
| PMS7003 |
0 |
1 |
22.17 |
15.69 |
0.70 |
9.64 |
19.25 |
30.73 |
93.25 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.991
ggplot(df, aes(x = SPS30, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMS7003",
title = "Relationship between SPS30 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.62 0.105 -5.91 0 -0.825 -0.414
## 2 SPS30 1.86 0.007 263. 0 1.85 1.88
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID PMS7003 SPS30 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 33.6 17.2 31.4 2.21
## 2 2 27.1 14.1 25.6 1.41
## 3 3 22.1 11.6 21.0 1.06
## 4 4 37.2 19.4 35.6 1.63
## 5 5 46.2 23.3 42.7 3.51
## 6 6 47.7 25.0 45.9 1.85
## 7 7 44.4 23.9 43.9 0.492
## 8 8 52.9 27.5 50.5 2.42
## 9 9 54.0 28.4 52.3 1.65
## 10 10 73.7 39.4 72.7 1.04
## # ... with 1,231 more rows
#Caso 9: SPS30 VS Oficial
df %>% select(SPS30, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
12.25 |
8.36 |
1.03 |
5.78 |
10.41 |
16.59 |
51.67 |
▇▅▂▁▁ |
| Oficial |
0 |
1 |
15.38 |
8.41 |
2.06 |
9.51 |
13.39 |
19.63 |
62.22 |
▇▅▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.559
ggplot(df, aes(x = SPS30, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 Oficial",
title = "Relationship between SPS30 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 8.49 0.352 24.1 0 7.80 9.18
## 2 SPS30 0.562 0.024 23.7 0 0.516 0.609
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID Oficial SPS30 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 16.0 17.2 18.2 -2.16
## 2 2 12.1 14.1 16.4 -4.34
## 3 3 10.3 11.6 15.0 -4.72
## 4 4 13.6 19.4 19.4 -5.78
## 5 5 16.5 23.3 21.6 -5.06
## 6 6 18.6 25.0 22.5 -3.92
## 7 7 19.0 23.9 21.9 -2.97
## 8 8 22.1 27.5 23.9 -1.86
## 9 9 22.6 28.4 24.5 -1.89
## 10 10 28.7 39.4 30.6 -1.92
## # ... with 1,231 more rows
#Caso 10: HPMA115S0 VS PMSA003
df %>% select(HPMA115S0, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
12.88 |
8.60 |
1.35 |
6.15 |
11.05 |
17.25 |
51.89 |
▇▅▂▁▁ |
| PMSA003 |
0 |
1 |
22.22 |
17.04 |
0.04 |
8.44 |
18.74 |
31.61 |
108.45 |
▇▅▁▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.984
ggplot(df, aes(x = HPMA115S0, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMSA003",
title = "Relationship between HPMA115S0 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -2.89 0.156 -18.6 0 -3.20 -2.59
## 2 HPMA115S0 1.95 0.01 194. 0 1.93 1.97
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID PMSA003 HPMA115S0 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 31.7 19.3 34.8 -3.12
## 2 2 25.1 14.7 25.8 -0.685
## 3 3 20.1 12.2 20.9 -0.832
## 4 4 35.9 19.8 35.6 0.32
## 5 5 44.3 24.4 44.6 -0.288
## 6 6 47.1 25 45.8 1.27
## 7 7 42.2 24.5 44.8 -2.64
## 8 8 52.0 29.3 54.2 -2.21
## 9 9 55.0 30.8 57.2 -2.19
## 10 10 75.5 42.3 79.5 -4.03
## # ... with 1,231 more rows
#Caso 11: HPMA115S0 VS PMS7003
df %>% select(HPMA115S0, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
12.88 |
8.60 |
1.35 |
6.15 |
11.05 |
17.25 |
51.89 |
▇▅▂▁▁ |
| PMS7003 |
0 |
1 |
22.17 |
15.69 |
0.70 |
9.64 |
19.25 |
30.73 |
93.25 |
▇▆▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.990
ggplot(df, aes(x = HPMA115S0, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMS7003",
title = "Relationship between HPMA115S0 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -1.11 0.111 -10.0 0 -1.33 -0.895
## 2 HPMA115S0 1.81 0.007 253. 0 1.79 1.82
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID PMS7003 HPMA115S0 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 33.6 19.3 33.8 -0.252
## 2 2 27.1 14.7 25.5 1.55
## 3 3 22.1 12.2 21.0 1.11
## 4 4 37.2 19.8 34.6 2.61
## 5 5 46.2 24.4 42.9 3.27
## 6 6 47.7 25 44.1 3.66
## 7 7 44.4 24.5 43.1 1.25
## 8 8 52.9 29.3 51.8 1.11
## 9 9 54.0 30.8 54.6 -0.612
## 10 10 73.7 42.3 75.3 -1.57
## # ... with 1,231 more rows
#Caso 10: HPMA115S0 VS Oficial
df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
12.88 |
8.60 |
1.35 |
6.15 |
11.05 |
17.25 |
51.89 |
▇▅▂▁▁ |
| Oficial |
0 |
1 |
15.38 |
8.41 |
2.06 |
9.51 |
13.39 |
19.63 |
62.22 |
▇▅▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.577
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
title = "Relationship between HPMA115S0 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 8.11 0.352 23.0 0 7.42 8.80
## 2 HPMA115S0 0.564 0.023 24.8 0 0.52 0.609
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID Oficial HPMA115S0 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 16.0 19.3 19.0 -3.02
## 2 2 12.1 14.7 16.4 -4.33
## 3 3 10.3 12.2 15.0 -4.70
## 4 4 13.6 19.8 19.2 -5.61
## 5 5 16.5 24.4 21.8 -5.34
## 6 6 18.6 25 22.2 -3.59
## 7 7 19.0 24.5 21.9 -2.95
## 8 8 22.1 29.3 24.6 -2.55
## 9 9 22.6 30.8 25.5 -2.9
## 10 10 28.7 42.3 32.0 -3.24
## # ... with 1,231 more rows
#Caso 11: PMSA003 VS PMS7003
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
22.22 |
17.04 |
0.04 |
8.44 |
18.74 |
31.61 |
108.45 |
▇▅▁▁▁ |
| Oficial |
0 |
1 |
15.38 |
8.41 |
2.06 |
9.51 |
13.39 |
19.63 |
62.22 |
▇▅▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.580
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 9.00 0.32 28.1 0 8.38 9.63
## 2 PMSA003 0.287 0.011 25.1 0 0.264 0.309
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 16.0 31.7 18.1 -2.09
## 2 2 12.1 25.1 16.2 -4.12
## 3 3 10.3 20.1 14.8 -4.46
## 4 4 13.6 35.9 19.3 -5.66
## 5 5 16.5 44.3 21.7 -5.20
## 6 6 18.6 47.1 22.5 -3.89
## 7 7 19.0 42.2 21.1 -2.13
## 8 8 22.1 52.0 23.9 -1.83
## 9 9 22.6 55.0 24.8 -2.18
## 10 10 28.7 75.5 30.6 -1.93
## # ... with 1,231 more rows
#Caso 12: PMSA003 VS Oficial
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
22.22 |
17.04 |
0.04 |
8.44 |
18.74 |
31.61 |
108.45 |
▇▅▁▁▁ |
| Oficial |
0 |
1 |
15.38 |
8.41 |
2.06 |
9.51 |
13.39 |
19.63 |
62.22 |
▇▅▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.580
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 9.00 0.32 28.1 0 8.38 9.63
## 2 PMSA003 0.287 0.011 25.1 0 0.264 0.309
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 16.0 31.7 18.1 -2.09
## 2 2 12.1 25.1 16.2 -4.12
## 3 3 10.3 20.1 14.8 -4.46
## 4 4 13.6 35.9 19.3 -5.66
## 5 5 16.5 44.3 21.7 -5.20
## 6 6 18.6 47.1 22.5 -3.89
## 7 7 19.0 42.2 21.1 -2.13
## 8 8 22.1 52.0 23.9 -1.83
## 9 9 22.6 55.0 24.8 -2.18
## 10 10 28.7 75.5 30.6 -1.93
## # ... with 1,231 more rows
#Caso 13: PMS7003 VS Oficial
df %>% select(PMS7003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
1241 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMS7003 |
0 |
1 |
22.17 |
15.69 |
0.70 |
9.64 |
19.25 |
30.73 |
93.25 |
▇▆▂▁▁ |
| Oficial |
0 |
1 |
15.38 |
8.41 |
2.06 |
9.51 |
13.39 |
19.63 |
62.22 |
▇▅▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.574
ggplot(df, aes(x = PMS7003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMS7003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 8.55 0.339 25.2 0 7.89 9.22
## 2 PMS7003 0.308 0.012 24.7 0 0.283 0.332
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 1,241 x 5
## ID Oficial PMS7003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 16.0 33.6 18.9 -2.89
## 2 2 12.1 27.1 16.9 -4.79
## 3 3 10.3 22.1 15.4 -5.04
## 4 4 13.6 37.2 20.0 -6.36
## 5 5 16.5 46.2 22.8 -6.25
## 6 6 18.6 47.7 23.2 -4.62
## 7 7 19.0 44.4 22.2 -3.24
## 8 8 22.1 52.9 24.8 -2.76
## 9 9 22.6 54.0 25.2 -2.57
## 10 10 28.7 73.7 31.2 -2.52
## # ... with 1,231 more rows