library(seasonal)
library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(showtext)
## Loading required package: sysfonts
library(ggplot2)
library(scales)
library(xlsx)
## Loading required package: rJava
## Loading required package: xlsxjars

Check if there is seasonal component for the hospital admission data

 load("~/Madrid/prepareDataAndStatistics/pollutant_meteo_hospital_twitter.RData")

plot(pollutant_meteo_hospital_twitter$ICD_460.519,type="l")

plot(pollutant_meteo_hospital_twitter$ICD_410_414_420_438_440_444,type="l")

# plot(pollutant_meteo_hospital_twitter$ICD_410_414,type="l")
# plot(pollutant_meteo_hospital_twitter$ICD_493,type="l")
# plot(pollutant_meteo_hospital_twitter$ICD_428,type="l")
# plot(pollutant_meteo_hospital_twitter$ICD_430_438,type="l")
plot(pollutant_meteo_hospital_twitter$ICD_all_case_mortality,type="l")

plot(pollutant_meteo_hospital_twitter$ICD_total_admissions[1:21],type="l")

Except daily mortality, it can be seenb that the seasonal components exists for all the hospital admission data. Therefore, we used the differences of sequence two days values as new hospital admission data as inputs to remove the seasonal factor, instead of using daily hospital admission data.

diff(pollutant_meteo_hospital_twitter$ICD_460.519,1)->ICD_460_519_diff
diff(pollutant_meteo_hospital_twitter$ICD_410_414_420_438_440_444,1)->ICD_410_414_420_438_440_444_diff
diff(pollutant_meteo_hospital_twitter$ICD_410_414,1)->ICD_410_414_diff
diff(pollutant_meteo_hospital_twitter$ICD_493,1)->ICD_493_diff
diff(pollutant_meteo_hospital_twitter$ICD_428,1)->ICD_428_diff
diff(pollutant_meteo_hospital_twitter$ICD_430_438,1)->ICD_430_438_diff


pollutant_meteo_hospital_twitter[2:nrow(pollutant_meteo_hospital_twitter),]->new
new$ICD_460.519<-ICD_460_519_diff
new$ICD_493<-ICD_493_diff
new$ICD_410_414_420_438_440_444<-ICD_410_414_420_438_440_444_diff
new$ICD_410_414<-ICD_410_414_diff
new$ICD_428<-ICD_428_diff
new$ICD_430_438<-ICD_430_438_diff

##correlation of variables based on daily records
cor(new[,c(2:(ncol(new)-5))])->cor_daily
write.xlsx(cor_daily,file="cor_daily.xlsx")
##Aggreate data by month
Month<-factor(new$Month)
Year<-factor(new$Year)
aggregate(new[,c(2:(ncol(new)-5))],by=list(Year,Month),FUN="sum")->pollutant_meteo_hospital_twitter_monthly
cor(pollutant_meteo_hospital_twitter_monthly[,3:ncol(pollutant_meteo_hospital_twitter_monthly)])->cor_monthly
write.xlsx(cor_monthly,file="cor_monthly.xlsx")
##normalized the inputs
normalized<-function(x){
        (x-min(x))*0.8/(max(x)-min(x))+0.1
}
data.frame(apply(pollutant_meteo_hospital_twitter[,2:(ncol(pollutant_meteo_hospital_twitter)-5)],2,normalized))->pollutant_meteo_hospital_twitter_normalized

####selected the first 7 weeks data and plot it###
par(mfrow=c(2,1),mar=c(2,2,2,2)+0.1)
plot(data.frame(pollutant_meteo_hospital_twitter_normalized)$ICD_total_admissions[1:50],type="l",ylab="",main="Total Admissions")
plot(pollutant_meteo_hospital_twitter_normalized$contaminacion[1:50],type="l",ylab="",main="twitter key words contaminacion")

par(mfrow=c(2,1),mar=c(2,2,2,2)+0.1)
plot(pollutant_meteo_hospital_twitter_normalized$ICD_total_admissions[1:50],type="l",ylab="",main="Total Admissions")
plot(pollutant_meteo_hospital_twitter_normalized$SO2[1:50],type="l",ylab="",main="daily average SO2 levels")

par(mfrow=c(2,1),mar=c(2,2,2,2)+0.1)
plot(pollutant_meteo_hospital_twitter_normalized$PM2.5[1:50],type="l",ylab="",main="daily average PM2.5 level")
plot(pollutant_meteo_hospital_twitter_normalized$contaminacion[1:50],type="l",ylab="",main="twitter key word contaminacion")

par(mfrow=c(1,1))
plot(pollutant_meteo_hospital_twitter_normalized$PM2.5[1:50],type="l",ylab="",main="",cex.axis=2,ylim=c(0.1,0.7))
lines(pollutant_meteo_hospital_twitter_normalized$contaminacion[1:50],col="red")
lag(pollutant_meteo_hospital_twitter_normalized$contaminacion[1:50],2)->lag2_contaminacion
lines(lag2_contaminacion,col="blue")
legend(-0.1,0.72,legend=c("PM2.5","contaminacion","contaminacion 2lags"),col = c("black","red","blue"),horiz=TRUE,lty=1)

##correlation of PM2.5 and keyword contaminacion with two days lag
cor(pollutant_meteo_hospital_twitter_normalized$PM2.5[3:50],lag2_contaminacion[3:50])
## [1] 0.2808011

Analysis the holiday

cbind(pollutant_meteo_hospital_twitter_normalized$SO2,pollutant_meteo_hospital_twitter_normalized$O3,pollutant_meteo_hospital_twitter_normalized$PM2.5,pollutant_meteo_hospital_twitter$holidays_weekend)->pollution_holidays

data.frame(pollution_holidays)->pollution_holidays
colnames(pollution_holidays)<-c("SO2","O3","PM2.5","holidays")

reshape(data.frame(pollution_holidays),varying=c(1:(ncol(pollution_holidays)-1)),v.names="values",timevar = "typePollution",times=colnames(pollution_holidays[,1:(ncol(pollution_holidays)-1)]),direction="long")->pollution_holidays_reshape

pollution_holidays[pollution_holidays$holidays==1,]->pollution_holidayWeekends
pollution_holidays[pollution_holidays$holidays==0,]->pollution_workdays

par(mfrow=c(2,1),mar=c(2,2,2,2)+0.1)
plot(pollution_workdays$SO2,type="l",main="SO2 workdays")
plot(pollution_holidayWeekends$SO2,type="l",main="SO2 holidays")

plot(pollution_workdays$O3,type="l",main="O3 workdays")
plot(pollution_holidayWeekends$O3,type="l",main="O3 holidays")

plot(pollution_workdays$PM2.5,type="l",main="PM2.5 workdays")
plot(pollution_holidayWeekends$PM2.5,type="l",main="PM2.5 holidays")

plot(pollution_holidays$O3,col=as.factor(as.character(pollution_holidays$holidays)),main="O3")
plot(pollution_holidays$PM2.5,col=as.factor(as.character(pollution_holidays$holidays)),main="PM2.5")

plot(pollution_holidays$SO2,col=as.factor(as.character(pollution_holidays$holidays)),main="SO2")

plot hospitial admissions grouped by workdays and holidays

load("~/Madrid/prepareDataAndStatistics/pollutant_meteo_hospital_twitter.RData")

plot(pollutant_meteo_hospital_twitter$ICD_460.519,col=as.factor(as.character(pollutant_meteo_hospital_twitter$holidays)),main="ICD 460 519")

plot(pollutant_meteo_hospital_twitter$ICD_410_414_420_438_440_444,col=as.factor(as.character(pollutant_meteo_hospital_twitter$holidays)),main="ICD 410_414_420_438_440_444")

plot(pollutant_meteo_hospital_twitter$ICD_all_case_mortality,col=as.factor(as.character(pollutant_meteo_hospital_twitter$holidays)),main="ICD_all_case_mortality")

plot the keywords and selected the days that exceed the limitations

plot(pollutant_meteo_hospital_twitter$contaminacion,col=as.factor(as.character(pollutant_meteo_hospital_twitter$holidays)),main="contaminacion")

plot(pollutant_meteo_hospital_twitter$aire,col=as.factor(as.character(pollutant_meteo_hospital_twitter$holidays)),main="aire",type="l")

pollutant_meteo_hospital_twitter[pollutant_meteo_hospital_twitter$aire>50,"date"]->aire50_date
aire50_date
##  [1] "2014-01-04" "2014-02-10" "2014-02-25" "2014-03-25" "2014-06-11"
##  [6] "2014-06-13" "2014-06-14" "2014-07-15" "2014-07-16" "2014-07-17"
## [11] "2014-07-18" "2014-07-28" "2014-07-29" "2014-09-01" "2014-09-02"
## [16] "2014-09-03"
plot(pollutant_meteo_hospital_twitter$sick,col=as.factor(as.character(pollutant_meteo_hospital_twitter$holidays)),main="sick",type="l")

pollutant_meteo_hospital_twitter[pollutant_meteo_hospital_twitter$sick>50,"date"]->sick50_date
sick50_date
## [1] "2014-11-11"
plot(pollutant_meteo_hospital_twitter$garganta,col=as.factor(as.character(pollutant_meteo_hospital_twitter$holidays)),main="garganta",type="l")

pollutant_meteo_hospital_twitter[pollutant_meteo_hospital_twitter$garganta>20,"date"]->garganta50_date
garganta50_date
##  [1] "2014-01-02" "2014-01-08" "2014-01-19" "2014-01-22" "2014-01-25"
##  [6] "2014-01-26" "2014-02-03" "2014-02-09" "2014-02-10" "2014-02-16"
## [11] "2014-02-19" "2014-04-09" "2014-05-24" "2014-05-25" "2014-06-02"
## [16] "2014-07-11" "2014-08-25" "2014-09-14" "2014-09-21" "2014-09-22"
## [21] "2014-09-23" "2014-09-25" "2014-09-26" "2014-09-27" "2014-09-28"
## [26] "2014-09-29" "2014-09-30" "2014-10-01" "2014-10-02" "2014-10-03"
## [31] "2014-10-04" "2014-10-05" "2014-10-06" "2014-10-07" "2014-10-09"
## [36] "2014-10-11" "2014-10-12" "2014-10-15" "2014-10-20" "2014-11-03"
pollutant_meteo_hospital_twitter[2:nrow(pollutant_meteo_hospital_twitter),]->new2
new2$ICD_460.519<-ICD_460_519_diff
new2$ICD_493<-ICD_493_diff
new2$ICD_410_414_420_438_440_444<-ICD_410_414_420_438_440_444_diff
new2$ICD_410_414<-ICD_410_414_diff
new2$ICD_428<-ICD_428_diff
new2$ICD_430_438<-ICD_430_438_diff
new2[new2$holidays_weekend==1,]->new2_holidays
new2[new2$holidays_weekend==0,]->new2_weekdays
cor(new2_holidays[,2:(ncol(new2_holidays)-5)])->cor_daily_holidays
## Warning in cor(new2_holidays[, 2:(ncol(new2_holidays) - 5)]): the standard
## deviation is zero
cor(new2_weekdays[,2:(ncol(new2_weekdays)-5)])->cor_new2_weekdays

library(xlsx)
write.xlsx(cor_daily_holidays,file="cor_daily_holidays.xlsx")
write.xlsx(cor_new2_weekdays,file="cor_new2_weekdays.xlsx")