library(readxl)
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.1 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(moderndive)
library(skimr)
# **November ESTACION PAIBA VS CANAIRIOS**
# **5 different sensors: PMS7003 & PMSA003 & HPMA115S0 & SPS30 & SNGCJA5**
df <- read_excel("C:/Mediciones/PAIBA_CANAIRIOS_NOV.xlsx")
View(df)
glimpse(df)
## Rows: 544
## Columns: 8
## $ Num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17...
## $ Fecha <dttm> 2020-11-07 00:00:00, 2020-11-07 01:00:00, 2020-11-07 02:...
## $ Oficial <dbl> 15.99, 12.09, 10.31, 13.64, 16.51, 18.62, 18.97, 22.07, 2...
## $ PMS7003 <dbl> 33.574074, 27.057692, 22.096154, 37.192308, 46.180000, 47...
## $ PMSA003 <dbl> 31.666667, 25.134615, 20.115385, 35.923077, 44.300000, 47...
## $ HPMA115S0 <dbl> 19.333333, 14.730769, 12.230769, 19.750000, 24.360000, 25...
## $ SPS30 <dbl> 17.185185, 14.115385, 11.634615, 19.442308, 23.260000, 24...
## $ SNGCJA5 <dbl> 13.259259, 10.634615, 8.673077, 14.846154, 17.900000, 19....
df %>%
sample_n(size = 10)
## # A tibble: 10 x 8
## Num Fecha Oficial PMS7003 PMSA003 HPMA115S0 SPS30 SNGCJA5
## <dbl> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 424 2020-11-25 23:00:00 14.7 30.3 34.2 18.5 17.6 14.1
## 2 351 2020-11-22 22:00:00 6.77 8.45 7.38 5.76 5.24 3.35
## 3 42 2020-11-08 17:00:00 12.0 27.5 25.2 14.4 13.9 10.4
## 4 54 2020-11-09 05:00:00 17.2 44.5 40.2 26.1 23.8 17.6
## 5 370 2020-11-23 17:00:00 13.5 37.3 40.3 21.4 20.5 15.4
## 6 21 2020-11-07 20:00:00 7.67 11.4 10.6 7.69 6.89 5.05
## 7 184 2020-11-14 15:00:00 13.2 25.1 23.5 13.2 12.1 9.60
## 8 50 2020-11-09 01:00:00 17.8 44.5 42.9 25.2 23.5 18.1
## 9 137 2020-11-12 16:00:00 14.0 37.5 33.9 21.2 18.3 14.1
## 10 211 2020-11-15 18:00:00 4.16 5.45 3.78 3.25 3.45 2.04
fig <- plot_ly(df, x = ~Num, y = ~PMS7003, name = 'PM2.5 PMS7003', type = 'scatter', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~PMSA003, name = 'PM2.5 PMSA003', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~HPMA115S0, name = 'PM2.5 HPMA115S0', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SPS30, name = 'PM2.5 SPS30', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 1: SNGCJA5 VS SPS30
df %>% select(SNGCJA5, SPS30) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
544 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
10.07 |
6.36 |
0.05 |
5.29 |
9.15 |
13.71 |
36.96 |
▇▇▃▁▁ |
| SPS30 |
0 |
1 |
13.47 |
8.07 |
1.03 |
7.38 |
12.33 |
18.29 |
44.85 |
▇▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ SPS30)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.995
ggplot(df, aes(x = SNGCJA5, y = SPS30)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 SPS30",
title = "Relationship between SNGCJA5 and SPS30") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(SPS30 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.75 0.062 12.0 0 0.628 0.873
## 2 SNGCJA5 1.26 0.005 241. 0 1.25 1.27
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 544 x 5
## ID SPS30 SNGCJA5 SPS30_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 17.2 13.3 17.5 -0.323
## 2 2 14.1 10.6 14.2 -0.076
## 3 3 11.6 8.67 11.7 -0.077
## 4 4 19.4 14.8 19.5 -0.071
## 5 5 23.3 17.9 23.4 -0.113
## 6 6 25.0 19.3 25.2 -0.203
## 7 7 23.9 18.4 24.0 -0.101
## 8 8 27.5 22.1 28.7 -1.23
## 9 9 28.4 22.9 29.7 -1.21
## 10 10 39.4 30.7 39.6 -0.203
## # ... with 534 more rows
#Caso 2: SNGCJA5 VS HPMA115S0
df %>% select(SNGCJA5, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
544 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
10.07 |
6.36 |
0.05 |
5.29 |
9.15 |
13.71 |
36.96 |
▇▇▃▁▁ |
| HPMA115S0 |
0 |
1 |
14.32 |
8.53 |
1.42 |
8.05 |
13.04 |
18.80 |
51.08 |
▇▇▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.988
ggplot(df, aes(x = SNGCJA5, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 HPMA115S0",
title = "Relationship between SNGCJA5 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.972 0.106 9.2 0 0.765 1.18
## 2 SNGCJA5 1.33 0.009 149. 0 1.31 1.34
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 544 x 5
## ID HPMA115S0 SNGCJA5 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 19.3 13.3 18.6 0.776
## 2 2 14.7 10.6 15.1 -0.346
## 3 3 12.2 8.67 12.5 -0.244
## 4 4 19.8 14.8 20.7 -0.912
## 5 5 24.4 17.9 24.7 -0.352
## 6 6 25 19.3 26.6 -1.61
## 7 7 24.5 18.4 25.4 -0.908
## 8 8 29.3 22.1 30.3 -1.02
## 9 9 30.8 22.9 31.3 -0.489
## 10 10 42.3 30.7 41.7 0.549
## # ... with 534 more rows
#Caso 3: SNGCJA5 VS PMSA003
df %>% select(SNGCJA5, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
544 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
10.07 |
6.36 |
0.05 |
5.29 |
9.15 |
13.71 |
36.96 |
▇▇▃▁▁ |
| PMSA003 |
0 |
1 |
24.69 |
16.22 |
0.04 |
12.43 |
22.70 |
34.49 |
95.43 |
▇▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.996
ggplot(df, aes(x = SNGCJA5, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMSA003",
title = "Relationship between SNGCJA5 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.889 0.123 -7.22 0 -1.13 -0.647
## 2 SNGCJA5 2.54 0.01 246. 0 2.52 2.56
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 544 x 5
## ID PMSA003 SNGCJA5 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 31.7 13.3 32.8 -1.13
## 2 2 25.1 10.6 26.1 -0.997
## 3 3 20.1 8.67 21.1 -1.03
## 4 4 35.9 14.8 36.8 -0.91
## 5 5 44.3 17.9 44.6 -0.292
## 6 6 47.1 19.3 48.2 -1.12
## 7 7 42.2 18.4 45.9 -3.71
## 8 8 52.0 22.1 55.3 -3.33
## 9 9 55.0 22.9 57.2 -2.24
## 10 10 75.5 30.7 77.2 -1.71
## # ... with 534 more rows
#Caso 4: SNGCJA5 VS PMS7003
df %>% select(SNGCJA5, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
544 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
10.07 |
6.36 |
0.05 |
5.29 |
9.15 |
13.71 |
36.96 |
▇▇▃▁▁ |
| PMS7003 |
0 |
1 |
24.80 |
15.38 |
0.70 |
13.55 |
22.36 |
33.59 |
87.28 |
▇▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.985
ggplot(df, aes(x = SNGCJA5, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 PMS7003",
title = "Relationship between SNGCJA5 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.79 0.211 3.74 0 0.375 1.21
## 2 SNGCJA5 2.38 0.018 134. 0 2.35 2.42
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 544 x 5
## ID PMS7003 SNGCJA5 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 33.6 13.3 32.4 1.16
## 2 2 27.1 10.6 26.2 0.907
## 3 3 22.1 8.67 21.5 0.623
## 4 4 37.2 14.8 36.2 0.998
## 5 5 46.2 17.9 43.5 2.70
## 6 6 47.7 19.3 46.9 0.827
## 7 7 44.4 18.4 44.7 -0.313
## 8 8 52.9 22.1 53.5 -0.612
## 9 9 54.0 22.9 55.3 -1.37
## 10 10 73.7 30.7 74.1 -0.357
## # ... with 534 more rows
#Caso 5: SNGCJA5 VS Oficial
df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
544 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SNGCJA5 |
0 |
1 |
10.07 |
6.36 |
0.05 |
5.29 |
9.15 |
13.71 |
36.96 |
▇▇▃▁▁ |
| Oficial |
0 |
1 |
11.63 |
5.26 |
2.06 |
7.79 |
11.01 |
14.70 |
33.11 |
▅▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.962
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
title = "Relationship between SNGCJA5 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SNGCJA5, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 3.61 0.115 31.4 0 3.38 3.84
## 2 SNGCJA5 0.797 0.01 82.5 0 0.778 0.816
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 544 x 5
## ID Oficial SNGCJA5 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 16.0 13.3 14.2 1.81
## 2 2 12.1 10.6 12.1 0.003
## 3 3 10.3 8.67 10.5 -0.213
## 4 4 13.6 14.8 15.4 -1.80
## 5 5 16.5 17.9 17.9 -1.37
## 6 6 18.6 19.3 19.0 -0.4
## 7 7 19.0 18.4 18.3 0.679
## 8 8 22.1 22.1 21.2 0.836
## 9 9 22.6 22.9 21.8 0.751
## 10 10 28.7 30.7 28.1 0.609
## # ... with 534 more rows
#Caso 6: SPS30 VS HPMA115S0
df %>% select(SPS30, HPMA115S0) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
544 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
13.47 |
8.07 |
1.03 |
7.38 |
12.33 |
18.29 |
44.85 |
▇▇▃▁▁ |
| HPMA115S0 |
0 |
1 |
14.32 |
8.53 |
1.42 |
8.05 |
13.04 |
18.80 |
51.08 |
▇▇▂▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ HPMA115S0)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.994
ggplot(df, aes(x = SPS30, y = HPMA115S0)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 HPMA115S0",
title = "Relationship between SPS30 and HPMA115S0") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(HPMA115S0 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.172 0.081 2.13 0.033 0.014 0.331
## 2 SPS30 1.05 0.005 204. 0 1.04 1.06
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 544 x 5
## ID HPMA115S0 SPS30 HPMA115S0_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 19.3 17.2 18.2 1.11
## 2 2 14.7 14.1 15.0 -0.267
## 3 3 12.2 11.6 12.4 -0.161
## 4 4 19.8 19.4 20.6 -0.843
## 5 5 24.4 23.3 24.6 -0.242
## 6 6 25 25.0 26.4 -1.41
## 7 7 24.5 23.9 25.3 -0.812
## 8 8 29.3 27.5 29.0 0.261
## 9 9 30.8 28.4 30.0 0.767
## 10 10 42.3 39.4 41.5 0.737
## # ... with 534 more rows
#Caso 7: SPS30 VS PMSA003
df %>% select(SPS30, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
544 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
13.47 |
8.07 |
1.03 |
7.38 |
12.33 |
18.29 |
44.85 |
▇▇▃▁▁ |
| PMSA003 |
0 |
1 |
24.69 |
16.22 |
0.04 |
12.43 |
22.70 |
34.49 |
95.43 |
▇▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.991
ggplot(df, aes(x = SPS30, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMSA003",
title = "Relationship between SPS30 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -2.15 0.181 -11.8 0 -2.51 -1.79
## 2 SPS30 1.99 0.012 172. 0 1.97 2.02
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 544 x 5
## ID PMSA003 SPS30 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 31.7 17.2 32.1 -0.416
## 2 2 25.1 14.1 26.0 -0.833
## 3 3 20.1 11.6 21.0 -0.911
## 4 4 35.9 19.4 36.6 -0.656
## 5 5 44.3 23.3 44.2 0.117
## 6 6 47.1 25.0 47.6 -0.501
## 7 7 42.2 23.9 45.5 -3.31
## 8 8 52.0 27.5 52.6 -0.592
## 9 9 55.0 28.4 54.5 0.471
## 10 10 75.5 39.4 76.3 -0.824
## # ... with 534 more rows
#Caso 8: SPS30 VS PMS7003
df %>% select(SPS30, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
544 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
13.47 |
8.07 |
1.03 |
7.38 |
12.33 |
18.29 |
44.85 |
▇▇▃▁▁ |
| PMS7003 |
0 |
1 |
24.80 |
15.38 |
0.70 |
13.55 |
22.36 |
33.59 |
87.28 |
▇▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.989
ggplot(df, aes(x = SPS30, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 PMS7003",
title = "Relationship between SPS30 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.6 0.191 -3.14 0.002 -0.975 -0.225
## 2 SPS30 1.88 0.012 155. 0 1.86 1.91
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 544 x 5
## ID PMS7003 SPS30 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 33.6 17.2 31.8 1.78
## 2 2 27.1 14.1 26.0 1.05
## 3 3 22.1 11.6 21.3 0.765
## 4 4 37.2 19.4 36.0 1.14
## 5 5 46.2 23.3 43.2 2.94
## 6 6 47.7 25.0 46.5 1.23
## 7 7 44.4 23.9 44.5 -0.102
## 8 8 52.9 27.5 51.2 1.74
## 9 9 54.0 28.4 53.0 0.946
## 10 10 73.7 39.4 73.6 0.075
## # ... with 534 more rows
#Caso 9: SPS30 VS Oficial
df %>% select(SPS30, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
544 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| SPS30 |
0 |
1 |
13.47 |
8.07 |
1.03 |
7.38 |
12.33 |
18.29 |
44.85 |
▇▇▃▁▁ |
| Oficial |
0 |
1 |
11.63 |
5.26 |
2.06 |
7.79 |
11.01 |
14.70 |
33.11 |
▅▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.955
ggplot(df, aes(x = SPS30, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 SPS30", y = "PM25 Oficial",
title = "Relationship between SPS30 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ SPS30, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 3.24 0.13 24.9 0 2.98 3.50
## 2 SPS30 0.623 0.008 75.1 0 0.607 0.639
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 544 x 5
## ID Oficial SPS30 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 16.0 17.2 13.9 2.04
## 2 2 12.1 14.1 12.0 0.056
## 3 3 10.3 11.6 10.5 -0.179
## 4 4 13.6 19.4 15.4 -1.71
## 5 5 16.5 23.3 17.7 -1.22
## 6 6 18.6 25.0 18.8 -0.183
## 7 7 19.0 23.9 18.1 0.823
## 8 8 22.1 27.5 20.3 1.72
## 9 9 22.6 28.4 21.0 1.63
## 10 10 28.7 39.4 27.8 0.936
## # ... with 534 more rows
#Caso 10: HPMA115S0 VS PMSA003
df %>% select(HPMA115S0, PMSA003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
544 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
14.32 |
8.53 |
1.42 |
8.05 |
13.04 |
18.80 |
51.08 |
▇▇▂▁▁ |
| PMSA003 |
0 |
1 |
24.69 |
16.22 |
0.04 |
12.43 |
22.70 |
34.49 |
95.43 |
▇▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMSA003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.979
ggplot(df, aes(x = HPMA115S0, y = PMSA003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMSA003",
title = "Relationship between HPMA115S0 and PMSA003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMSA003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -1.98 0.277 -7.15 0 -2.52 -1.43
## 2 HPMA115S0 1.86 0.017 112. 0 1.83 1.89
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 544 x 5
## ID PMSA003 HPMA115S0 PMSA003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 31.7 19.3 34.0 -2.35
## 2 2 25.1 14.7 25.4 -0.313
## 3 3 20.1 12.2 20.8 -0.678
## 4 4 35.9 19.8 34.8 1.13
## 5 5 44.3 24.4 43.4 0.925
## 6 6 47.1 25 44.6 2.54
## 7 7 42.2 24.5 43.6 -1.42
## 8 8 52.0 29.3 52.5 -0.568
## 9 9 55.0 30.8 55.4 -0.411
## 10 10 75.5 42.3 76.7 -1.25
## # ... with 534 more rows
#Caso 11: HPMA115S0 VS PMS7003
df %>% select(HPMA115S0, PMS7003) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
544 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
14.32 |
8.53 |
1.42 |
8.05 |
13.04 |
18.80 |
51.08 |
▇▇▂▁▁ |
| PMS7003 |
0 |
1 |
24.80 |
15.38 |
0.70 |
13.55 |
22.36 |
33.59 |
87.28 |
▇▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ PMS7003)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.987
ggplot(df, aes(x = HPMA115S0, y = PMS7003)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 PMS7003",
title = "Relationship between HPMA115S0 and PMS7003") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(PMS7003 ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.683 0.211 -3.24 0.001 -1.10 -0.269
## 2 HPMA115S0 1.78 0.013 141. 0 1.75 1.80
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 544 x 5
## ID PMS7003 HPMA115S0 PMS7003_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 33.6 19.3 33.7 -0.135
## 2 2 27.1 14.7 25.5 1.54
## 3 3 22.1 12.2 21.1 1.02
## 4 4 37.2 19.8 34.5 2.74
## 5 5 46.2 24.4 42.7 3.53
## 6 6 47.7 25 43.8 3.93
## 7 7 44.4 24.5 42.9 1.52
## 8 8 52.9 29.3 51.4 1.51
## 9 9 54.0 30.8 54.1 -0.171
## 10 10 73.7 42.3 74.5 -0.807
## # ... with 534 more rows
#Caso 10: HPMA115S0 VS Oficial
df %>% select(HPMA115S0, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
544 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| HPMA115S0 |
0 |
1 |
14.32 |
8.53 |
1.42 |
8.05 |
13.04 |
18.8 |
51.08 |
▇▇▂▁▁ |
| Oficial |
0 |
1 |
11.63 |
5.26 |
2.06 |
7.79 |
11.01 |
14.7 |
33.11 |
▅▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = HPMA115S0 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.959
ggplot(df, aes(x = HPMA115S0, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 HPMA115S0", y = "PM25 Oficial",
title = "Relationship between HPMA115S0 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ HPMA115S0, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 3.16 0.126 25.2 0 2.91 3.41
## 2 HPMA115S0 0.592 0.008 78.5 0 0.577 0.606
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 544 x 5
## ID Oficial HPMA115S0 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 16.0 19.3 14.6 1.39
## 2 2 12.1 14.7 11.9 0.215
## 3 3 10.3 12.2 10.4 -0.086
## 4 4 13.6 19.8 14.8 -1.20
## 5 5 16.5 24.4 17.6 -1.06
## 6 6 18.6 25 18.0 0.67
## 7 7 19.0 24.5 17.6 1.32
## 8 8 22.1 29.3 20.5 1.59
## 9 9 22.6 30.8 21.4 1.2
## 10 10 28.7 42.3 28.2 0.542
## # ... with 534 more rows
#Caso 11: PMSA003 VS PMS7003
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
544 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
24.69 |
16.22 |
0.04 |
12.43 |
22.70 |
34.49 |
95.43 |
▇▇▃▁▁ |
| Oficial |
0 |
1 |
11.63 |
5.26 |
2.06 |
7.79 |
11.01 |
14.70 |
33.11 |
▅▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.954
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 3.99 0.123 32.4 0 3.75 4.23
## 2 PMSA003 0.31 0.004 74.3 0 0.301 0.318
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 544 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 16.0 31.7 13.8 2.19
## 2 2 12.1 25.1 11.8 0.318
## 3 3 10.3 20.1 10.2 0.092
## 4 4 13.6 35.9 15.1 -1.47
## 5 5 16.5 44.3 17.7 -1.20
## 6 6 18.6 47.1 18.6 0.043
## 7 7 19.0 42.2 17.1 1.91
## 8 8 22.1 52.0 20.1 1.99
## 9 9 22.6 55.0 21.0 1.58
## 10 10 28.7 75.5 27.4 1.35
## # ... with 534 more rows
#Caso 12: PMSA003 VS Oficial
df %>% select(PMSA003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
544 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMSA003 |
0 |
1 |
24.69 |
16.22 |
0.04 |
12.43 |
22.70 |
34.49 |
95.43 |
▇▇▃▁▁ |
| Oficial |
0 |
1 |
11.63 |
5.26 |
2.06 |
7.79 |
11.01 |
14.70 |
33.11 |
▅▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMSA003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.954
ggplot(df, aes(x = PMSA003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMSA003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMSA003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 3.99 0.123 32.4 0 3.75 4.23
## 2 PMSA003 0.31 0.004 74.3 0 0.301 0.318
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 544 x 5
## ID Oficial PMSA003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 16.0 31.7 13.8 2.19
## 2 2 12.1 25.1 11.8 0.318
## 3 3 10.3 20.1 10.2 0.092
## 4 4 13.6 35.9 15.1 -1.47
## 5 5 16.5 44.3 17.7 -1.20
## 6 6 18.6 47.1 18.6 0.043
## 7 7 19.0 42.2 17.1 1.91
## 8 8 22.1 52.0 20.1 1.99
## 9 9 22.6 55.0 21.0 1.58
## 10 10 28.7 75.5 27.4 1.35
## # ... with 534 more rows
#Caso 13: PMS7003 VS Oficial
df %>% select(PMS7003, Oficial) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
544 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| PMS7003 |
0 |
1 |
24.80 |
15.38 |
0.70 |
13.55 |
22.36 |
33.59 |
87.28 |
▇▇▃▁▁ |
| Oficial |
0 |
1 |
11.63 |
5.26 |
2.06 |
7.79 |
11.01 |
14.70 |
33.11 |
▅▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = PMS7003 ~ Oficial)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.952
ggplot(df, aes(x = PMS7003, y = Oficial)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 PMS7003", y = "PM25 Oficial",
title = "Relationship between PMSA003 and Oficial") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(Oficial ~ PMS7003, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 3.56 0.131 27.1 0 3.30 3.81
## 2 PMS7003 0.326 0.004 72.4 0 0.317 0.335
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 544 x 5
## ID Oficial PMS7003 Oficial_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 16.0 33.6 14.5 1.50
## 2 2 12.1 27.1 12.4 -0.281
## 3 3 10.3 22.1 10.8 -0.444
## 4 4 13.6 37.2 15.7 -2.03
## 5 5 16.5 46.2 18.6 -2.09
## 6 6 18.6 47.7 19.1 -0.482
## 7 7 19.0 44.4 18.0 0.95
## 8 8 22.1 52.9 20.8 1.28
## 9 9 22.6 54.0 21.1 1.46
## 10 10 28.7 73.7 27.6 1.14
## # ... with 534 more rows