library(readxl)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.6 v purrr 0.3.4
## v tibble 3.1.3 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(moderndive)
library(skimr)
# **EJERCICIO CIRCULOS CONCENTRICOS**
# **4 sensores torres unicados en lugares diferentes**
df <- read_excel("C:/Mediciones/Circulos_Concentricos.xlsx")
View(df)
glimpse(df)
## Rows: 92
## Columns: 6
## $ num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, ~
## $ fecha <dttm> 2021-02-25 16:00:00, 2021-02-25 17:00:00, 2021-02-25 18:00:00,~
## $ estfer <dbl> 2.76, 2.61, 2.92, 3.12, 3.05, 4.35, 4.85, 4.93, 3.42, 5.38, 4.4~
## $ torres <dbl> 2.00, 2.35, 2.79, 3.43, 3.42, 5.00, 4.79, 4.66, 3.89, 6.13, 4.9~
## $ ponte <dbl> 3.14, 3.21, 3.30, 3.98, 3.51, 5.02, 4.61, 4.80, 4.15, 5.93, 5.1~
## $ sml <dbl> 3.72, 3.26, 4.67, 4.66, 7.75, 8.66, 9.63, 7.97, 6.89, 5.57, 5.4~
df %>%
sample_n(size = 10)
## # A tibble: 10 x 6
## num fecha estfer torres ponte sml
## <dbl> <dttm> <dbl> <dbl> <dbl> <dbl>
## 1 57 2021-02-28 00:00:00 10.0 12.2 11.4 11.1
## 2 58 2021-02-28 01:00:00 4.31 5.4 8.42 9.19
## 3 54 2021-02-27 21:00:00 11.0 13.6 11.2 14.0
## 4 73 2021-02-28 16:00:00 7.14 8.85 6.65 5.77
## 5 35 2021-02-27 02:00:00 5.35 5.59 5.72 7.94
## 6 10 2021-02-26 01:00:00 5.38 6.13 5.93 5.57
## 7 69 2021-02-28 12:00:00 1.84 1.98 2.34 2.81
## 8 3 2021-02-25 18:00:00 2.92 2.79 3.3 4.67
## 9 87 2021-03-01 06:00:00 11.3 14.5 12.7 10.7
## 10 66 2021-02-28 09:00:00 1.93 1.69 1.42 3.46
fig <- plot_ly(df, x = ~num, y = ~estfer, name = 'PM2.5 Estacion Ferias', type = 'scatter', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~torres, name = 'PM2.5 Torres del Sol', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~ponte, name = 'PM2.5 Pontenovo', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~sml, name = 'PM2.5 Santa Maria del Lago', mode = 'lines+markers')
fig
#Caso 1: ferias VS torres
df %>% select(estfer, torres) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
92 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| estfer |
0 |
1 |
7.00 |
4.91 |
0.42 |
3.10 |
6.17 |
10.48 |
22.62 |
▇▅▅▁▁ |
| torres |
0 |
1 |
7.78 |
5.65 |
0.06 |
3.43 |
6.12 |
12.31 |
25.16 |
▇▅▅▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = estfer ~ torres)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.982
ggplot(df, aes(x = estfer, y = torres)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 estfer", y = "PM25 torres",
title = "Relationship between ferias and torres") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(torres ~ estfer, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept -0.115 0.195 -0.593 0.554 -0.502 0.271
## 2 estfer 1.13 0.023 49.5 0 1.08 1.17
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 92 x 5
## ID torres estfer torres_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 2 2.76 3.00 -0.999
## 2 2 2.35 2.61 2.83 -0.479
## 3 3 2.79 2.92 3.18 -0.389
## 4 4 3.43 3.12 3.40 0.025
## 5 5 3.42 3.05 3.33 0.094
## 6 6 5 4.35 4.79 0.208
## 7 7 4.79 4.85 5.36 -0.567
## 8 8 4.66 4.93 5.45 -0.787
## 9 9 3.89 3.42 3.74 0.147
## 10 10 6.13 5.38 5.96 0.175
## # ... with 82 more rows
#Caso 2: ferias VS ponte
df %>% select(estfer, ponte) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
92 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| estfer |
0 |
1 |
7.00 |
4.91 |
0.42 |
3.10 |
6.17 |
10.48 |
22.62 |
▇▅▅▁▁ |
| ponte |
0 |
1 |
7.56 |
5.16 |
0.40 |
3.63 |
6.64 |
11.25 |
25.25 |
▇▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = estfer ~ ponte)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.964
ggplot(df, aes(x = estfer, y = ponte)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 estfer", y = "PM25 ponte",
title = "Relationship between ferias and ponte") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(ponte ~ estfer, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.48 0.253 1.90 0.061 -0.023 0.982
## 2 estfer 1.01 0.03 34.2 0 0.953 1.07
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 92 x 5
## ID ponte estfer ponte_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 3.14 2.76 3.27 -0.133
## 2 2 3.21 2.61 3.12 0.089
## 3 3 3.3 2.92 3.43 -0.134
## 4 4 3.98 3.12 3.64 0.343
## 5 5 3.51 3.05 3.57 -0.056
## 6 6 5.02 4.35 4.88 0.138
## 7 7 4.61 4.85 5.39 -0.778
## 8 8 4.8 4.93 5.47 -0.669
## 9 9 4.15 3.42 3.94 0.21
## 10 10 5.93 5.38 5.92 0.006
## # ... with 82 more rows
#Caso 3: estacion ferias VS sml
df %>% select(estfer, sml) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
92 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| estfer |
0 |
1 |
7.00 |
4.91 |
0.42 |
3.10 |
6.17 |
10.48 |
22.62 |
▇▅▅▁▁ |
| sml |
0 |
1 |
7.68 |
4.66 |
1.16 |
3.98 |
6.99 |
10.52 |
23.53 |
▇▆▃▂▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = estfer ~ sml)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.895
ggplot(df, aes(x = estfer, y = sml)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 estfer", y = "PM25 sml",
title = "Relationship between ferias and sml") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(sml ~ estfer, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 1.74 0.38 4.59 0 0.99 2.50
## 2 estfer 0.848 0.044 19.1 0 0.76 0.937
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 92 x 5
## ID sml estfer sml_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 3.72 2.76 4.08 -0.365
## 2 2 3.26 2.61 3.96 -0.698
## 3 3 4.67 2.92 4.22 0.449
## 4 4 4.66 3.12 4.39 0.269
## 5 5 7.75 3.05 4.33 3.42
## 6 6 8.66 4.35 5.43 3.23
## 7 7 9.63 4.85 5.86 3.77
## 8 8 7.97 4.93 5.93 2.04
## 9 9 6.89 3.42 4.64 2.24
## 10 10 5.57 5.38 6.31 -0.738
## # ... with 82 more rows
#Caso 4: torres VS pontenovo
df %>% select(torres, ponte) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
92 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| torres |
0 |
1 |
7.78 |
5.65 |
0.06 |
3.43 |
6.12 |
12.31 |
25.16 |
▇▅▅▁▁ |
| ponte |
0 |
1 |
7.56 |
5.16 |
0.40 |
3.63 |
6.64 |
11.25 |
25.25 |
▇▇▃▁▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = torres ~ ponte)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.968
ggplot(df, aes(x = torres, y = ponte)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 torres", y = "PM25 ponte",
title = "Relationship between torres and pontenovo") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(ponte ~ torres, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 0.676 0.232 2.92 0.004 0.215 1.14
## 2 torres 0.885 0.024 36.6 0 0.837 0.933
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 92 x 5
## ID ponte torres ponte_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 3.14 2 2.45 0.694
## 2 2 3.21 2.35 2.76 0.454
## 3 3 3.3 2.79 3.14 0.155
## 4 4 3.98 3.43 3.71 0.268
## 5 5 3.51 3.42 3.70 -0.193
## 6 6 5.02 5 5.10 -0.081
## 7 7 4.61 4.79 4.92 -0.305
## 8 8 4.8 4.66 4.8 0
## 9 9 4.15 3.89 4.12 0.031
## 10 10 5.93 6.13 6.10 -0.171
## # ... with 82 more rows
#Caso 9: torres VS sml
df %>% select(torres, sml) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
92 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| torres |
0 |
1 |
7.78 |
5.65 |
0.06 |
3.43 |
6.12 |
12.31 |
25.16 |
▇▅▅▁▁ |
| sml |
0 |
1 |
7.68 |
4.66 |
1.16 |
3.98 |
6.99 |
10.52 |
23.53 |
▇▆▃▂▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = torres ~ sml)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.890
ggplot(df, aes(x = torres, y = sml)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 torres", y = "PM25 sml",
title = "Relationship between torres and sml") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(sml ~ torres, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 1.97 0.381 5.18 0 1.22 2.73
## 2 torres 0.734 0.04 18.5 0 0.655 0.813
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 92 x 5
## ID sml torres sml_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 3.72 2 3.44 0.281
## 2 2 3.26 2.35 3.70 -0.435
## 3 3 4.67 2.79 4.02 0.652
## 4 4 4.66 3.43 4.49 0.172
## 5 5 7.75 3.42 4.48 3.27
## 6 6 8.66 5 5.64 3.02
## 7 7 9.63 4.79 5.49 4.14
## 8 8 7.97 4.66 5.39 2.58
## 9 9 6.89 3.89 4.82 2.06
## 10 10 5.57 6.13 6.47 -0.899
## # ... with 82 more rows
#Caso 10: pontenovo VS sml
df %>% select(ponte, sml) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
92 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| ponte |
0 |
1 |
7.56 |
5.16 |
0.40 |
3.63 |
6.64 |
11.25 |
25.25 |
▇▇▃▁▁ |
| sml |
0 |
1 |
7.68 |
4.66 |
1.16 |
3.98 |
6.99 |
10.52 |
23.53 |
▇▆▃▂▁ |
#**Pearson correlation coefficient original**
df %>%
get_correlation(formula = ponte ~ sml)
## # A tibble: 1 x 1
## cor
## <dbl>
## 1 0.924
ggplot(df, aes(x = ponte, y = sml)) +
geom_point(alpha = 0.2) +
labs(x = "PM25 ponte", y = "PM25 sml",
title = "Relationship between pontenovo and sml") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#**Fit regression model original:**
score_model <- lm(sml ~ ponte, data = df)
#**Get regression table original:**
get_regression_table(score_model)
## # A tibble: 2 x 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 1.38 0.332 4.15 0 0.717 2.04
## 2 ponte 0.834 0.036 23.0 0 0.762 0.906
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 92 x 5
## ID sml ponte sml_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 3.72 3.14 3.99 -0.274
## 2 2 3.26 3.21 4.05 -0.792
## 3 3 4.67 3.3 4.13 0.543
## 4 4 4.66 3.98 4.69 -0.034
## 5 5 7.75 3.51 4.30 3.45
## 6 6 8.66 5.02 5.56 3.10
## 7 7 9.63 4.61 5.22 4.41
## 8 8 7.97 4.8 5.38 2.59
## 9 9 6.89 4.15 4.84 2.05
## 10 10 5.57 5.93 6.32 -0.75
## # ... with 82 more rows