library(readr)
contaminacion <- read_delim("~/Desktop/contaminacion.csv",
";", escape_double = FALSE, col_types = cols(Latitude = col_integer(),
Longitude = col_integer()), trim_ws = TRUE)
names(contaminacion)[5:9]=c("temp","pm1","pm2_5","pm10","humedad")
contaminacion$temp=(contaminacion$temp-32)*(5/9)
table(contaminacion$Session_Name)
##
## acacias canasgordas
## 2503 2652
head(contaminacion)
## # A tibble: 6 x 9
## Session_Name Timestamp Latitude Longitude temp pm1 pm2_5 pm10
## <chr> <dttm> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 acacias 2022-02-17 11:17:16 33478074 -765330034 23.3 NA NA NA
## 2 acacias 2022-02-17 11:17:16 33478074 -765330034 NA NA NA NA
## 3 acacias 2022-02-17 11:17:16 33478074 -765330034 NA 14 NA NA
## 4 acacias 2022-02-17 11:17:16 33478074 -765330034 NA NA NA 18
## 5 acacias 2022-02-17 11:17:16 33478074 -765330034 NA NA 25 NA
## 6 acacias 2022-02-17 11:17:17 33478033 -765330032 23.9 NA NA NA
## # ⦠with 1 more variable: humedad <dbl>
canti=nchar(contaminacion$Latitude)-1
contaminacion$Latitude[canti==4]=contaminacion$Latitude[canti==4]/10000
contaminacion$Latitude[canti==5]=contaminacion$Latitude[canti==5]/100000
contaminacion$Latitude[canti==6]=contaminacion$Latitude[canti==6]/1000000
contaminacion$Latitude[canti==7]=contaminacion$Latitude[canti==7]/10000000
canti2=nchar(contaminacion$Longitude)-1
contaminacion$Longitude[canti2==7]=contaminacion$Longitude[canti2==7]/100000
contaminacion$Longitude[canti2==8]=contaminacion$Longitude[canti2==8]/1000000
contaminacion$Longitude[canti2==9]=contaminacion$Longitude[canti2==9]/10000000
require(leaflet)
## Loading required package: leaflet
leaflet() %>% addCircleMarkers(lng = contaminacion$Longitude,lat = contaminacion$Latitude,radius = 0.2,label = contaminacion$Session_Name) %>% addTiles()
require(ggplot2)
## Loading required package: ggplot2
require(plotly)
## Loading required package: plotly
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
g1=ggplot(contaminacion,aes(x = Session_Name,y=pm2_5,fill=Session_Name))+geom_boxplot()+theme_bw()
ggplotly(g1)
## Warning: Removed 4056 rows containing non-finite values (stat_boxplot).
t.test( contaminacion$pm2_5~contaminacion$Session_Name)
##
## Welch Two Sample t-test
##
## data: contaminacion$pm2_5 by contaminacion$Session_Name
## t = -11.014, df = 597.9, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -8.917027 -6.218169
## sample estimates:
## mean in group acacias mean in group canasgordas
## 26.3970 33.9646
g2=ggplot(contaminacion,aes(x = Session_Name,y=temp,fill=Session_Name))+geom_boxplot()+theme_bw()
ggplotly(g2)
## Warning: Removed 4055 rows containing non-finite values (stat_boxplot).
t.test( contaminacion$temp~contaminacion$Session_Name)
##
## Welch Two Sample t-test
##
## data: contaminacion$temp by contaminacion$Session_Name
## t = -67.256, df = 945.15, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.889097 -3.668572
## sample estimates:
## mean in group acacias mean in group canasgordas
## 25.73936 29.51819
temp_hora=tapply(contaminacion$temp, contaminacion$Timestamp, mean,na.rm=T)
pm25_hora=tapply(contaminacion$pm2_5, contaminacion$Timestamp, mean,na.rm=T)
pm1_hora=tapply(contaminacion$pm1, contaminacion$Timestamp, mean,na.rm=T)
pm10_hora=tapply(contaminacion$pm10, contaminacion$Timestamp, mean,na.rm=T)
res=data.frame(pm25_hora,pm1_hora,pm10_hora,temp_hora)
g3=ggplot(res,aes(x = temp_hora,y=pm10_hora))+geom_point()+theme_bw()+geom_smooth()
g3
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 64 rows containing non-finite values (stat_smooth).
## Warning: Removed 64 rows containing missing values (geom_point).
