library(readxl)
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.2
## v tidyr   1.1.1     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(moderndive)
library(skimr)

df <- read_excel("C:/Aire/MedicionesX5/Paiba Final/PAIBA_CONSOLIDADO_final.xlsx")

fig <- plot_ly(df, x = ~Num, y = ~SPS30, name = 'PM2.5 SPS30', type = 'scatter', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 5: SNGCJA5 VS Oficial

df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1241
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SNGCJA5 0 1 8.87 6.48 0.05 3.80 7.47 12.32 40.80 ▇▅▂▁▁
Oficial 0 1 15.38 8.41 2.06 9.51 13.39 19.63 62.22 ▇▅▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.564
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
       title = "Relationship between SNGCJA5 and Oficial") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#Caso 9: SPS30 VS Oficial

df %>% select(SPS30, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 1241
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SPS30 0 1 12.25 8.36 1.03 5.78 10.41 16.59 51.67 ▇▅▂▁▁
Oficial 0 1 15.38 8.41 2.06 9.51 13.39 19.63 62.22 ▇▅▂▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.559
ggplot(df, aes(x = SPS30, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SPS30", y = "PM25 Oficial",
       title = "Relationship between SPS30 and Oficial") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

df <- read_excel("C:/Aire/MedicionesX5/Paiba Final/PAIBA_CONSOLIDADO_final_545.xlsx")


fig <- plot_ly(df, x = ~Num, y = ~SPS30, name = 'PM2.5 SPS30', type = 'scatter', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 5: SNGCJA5 VS Oficial

df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 544
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SNGCJA5 0 1 10.07 6.36 0.05 5.29 9.15 13.71 36.96 ▇▇▃▁▁
Oficial 0 1 11.63 5.26 2.06 7.79 11.01 14.70 33.11 ▅▇▃▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.962
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
       title = "Relationship between SNGCJA5 and Oficial") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#Caso 9: SPS30 VS Oficial

df %>% select(SPS30, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 544
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SPS30 0 1 13.47 8.07 1.03 7.38 12.33 18.29 44.85 ▇▇▃▁▁
Oficial 0 1 11.63 5.26 2.06 7.79 11.01 14.70 33.11 ▅▇▃▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.955
ggplot(df, aes(x = SPS30, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SPS30", y = "PM25 Oficial",
       title = "Relationship between SPS30 and Oficial") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

df <- read_excel("C:/Aire/MedicionesX5/Paiba Final/PAIBA_CONSOLIDADO_final_resto.xlsx")

fig <- plot_ly(df, x = ~Num, y = ~SPS30, name = 'PM2.5 SPS30', type = 'scatter', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~SNGCJA5, name = 'PM2.5 SNGCJA5', mode = 'lines+markers') 
fig <- fig %>% add_trace(y = ~Oficial, name = 'PM2.5 Oficial', mode = 'lines+markers')
fig
#Caso 5: SNGCJA5 VS Oficial

df %>% select(SNGCJA5, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 697
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SNGCJA5 0 1 7.94 6.43 0.10 2.96 6.56 10.75 40.80 ▇▃▁▁▁
Oficial 0 1 18.30 9.22 3.47 11.12 16.86 23.87 62.22 ▇▇▃▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SNGCJA5 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.606
ggplot(df, aes(x = SNGCJA5, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SNGCJA5", y = "PM25 Oficial",
       title = "Relationship between SNGCJA5 and Oficial") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#Caso 9: SPS30 VS Oficial

df %>% select(SPS30, Oficial) %>% skim()
Data summary
Name Piped data
Number of rows 697
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SPS30 0 1 11.29 8.46 1.17 4.85 9.30 15.11 51.67 ▇▃▁▁▁
Oficial 0 1 18.30 9.22 3.47 11.12 16.86 23.87 62.22 ▇▇▃▁▁
#**Pearson correlation coefficient original**

df %>% 
  get_correlation(formula = SPS30 ~ Oficial)
## # A tibble: 1 x 1
##     cor
##   <dbl>
## 1 0.573
ggplot(df, aes(x = SPS30, y = Oficial)) +
  geom_point(alpha = 0.2) +
  labs(x = "PM25 SPS30", y = "PM25 Oficial",
       title = "Relationship between SPS30 and Oficial") +  
  
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'