importing data

rainfall <- read.csv("RAINFALL DATA.csv")

plot GPM_IMERG data

#convert into time series
#Maasin
GPM.ts.msn <- ts(rainfall$GPM.IMERG.Maasin.Rainfall,
             start = c(2010,1),
             end = c(2020,365),
             frequency = 365)

autoplot(GPM.ts.msn,
         ylab = "Rainfall Amount",
         xlab = "Time") +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black"))

#Tacloban
GPM.ts.tac <- ts(rainfall$GPM.IMERG.Tacloban.Rainfall,
             start = c(2010,1),
             end = c(2020,365),
             frequency = 365)

autoplot(GPM.ts.tac,
         ylab = "Rainfall Amount",
         xlab = "Time") +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black"))

#VSU
#Tacloban
GPM.ts.vsu <- ts(rainfall$GPM.IMERG.VSU.Rainfall,
             start = c(2010,1),
             end = c(2020,365),
             frequency = 365)


autoplot(GPM.ts.vsu,
         ylab = "Rainfall Amount",
         xlab = "Time") +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black"))

#ploting GPM vs in-situ

#converting in-situ data into time series

#Maasin
Msn_nc2 <- ts(rainfall$Maasin.Rainfall,
             start = c(2010,1),
             end = c(2020,365),
             frequency = 365)

autoplot(GPM.ts.msn)+
  autolayer(Msn_nc2)+
  ylab("Rainfall Amount")+
  xlab("Time")+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        legend.position="none",
        text = element_text(size = 12, family = "TT Times New Roman"))

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

#Tacloban
Tac_nc2 <- ts(rainfall$Tacloban.Rainfall,
             start = c(2010,1),
             end = c(2020,365),
             frequency = 365)

autoplot(GPM.ts.tac)+
  autolayer(Tac_nc2)+
  ylab("Rainfall Amount")+
  xlab("Time")+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        legend.position="none",
        text = element_text(size = 12, family = "TT Times New Roman"))

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

#VSU
VSU_nc2 <- ts(rainfall$VSU.Rainfall,
             start = c(2010,1),
             end = c(2020,365),
             frequency = 365)

autoplot(GPM.ts.vsu)+
  autolayer(VSU_nc2)+
  ylab("Rainfall Amount")+
  xlab("Time")+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        legend.position="none",
        text = element_text(size = 12, family = "TT Times New Roman"))

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

univariate linear fitting

#partitioning the data set
ind = sample.split(Y = rainfall$GPM.IMERG.Maasin.Rainfall, SplitRatio = 0.7)

#subsetting into Train data
train <- rainfall[ind,]

#subsetting into Test data
test <- rainfall[!ind,]


#MAASIN

#correlation of satellite vs ground data
cor.test(rainfall$Maasin.Rainfall,rainfall$GPM.IMERG.Maasin.Rainfall)

## 
##  Pearson's product-moment correlation
## 
## data:  rainfall$Maasin.Rainfall and rainfall$GPM.IMERG.Maasin.Rainfall
## t = 19.274, df = 4016, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2624222 0.3190343
## sample estimates:
##       cor 
## 0.2909829

#fitting regression model
msn_lm.u <- lm(GPM.IMERG.Maasin.Rainfall ~ Maasin.Rainfall, data = train)
summary(msn_lm.u)

## 
## Call:
## lm(formula = GPM.IMERG.Maasin.Rainfall ~ Maasin.Rainfall, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.000  -6.259  -5.692  -0.079 230.856 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      6.25909    0.38257   16.36   <2e-16 ***
## Maasin.Rainfall  0.29001    0.01783   16.27   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.8 on 2810 degrees of freedom
## Multiple R-squared:  0.08609,    Adjusted R-squared:  0.08577 
## F-statistic: 264.7 on 1 and 2810 DF,  p-value: < 2.2e-16

ggplot(train, aes(x=Maasin.Rainfall, y=GPM.IMERG.Maasin.Rainfall))+
  geom_point(size=0.5)+ geom_smooth(method=lm)+
  labs(x="Rainfall Data", y="GPM IMERG")+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black"))

## `geom_smooth()` using formula = 'y ~ x'

#TACLOBAN

#correlation of satellite vs ground data
cor.test(rainfall$Tacloban.Rainfall,rainfall$GPM.IMERG.Tacloban.Rainfall)

## 
##  Pearson's product-moment correlation
## 
## data:  rainfall$Tacloban.Rainfall and rainfall$GPM.IMERG.Tacloban.Rainfall
## t = 25.913, df = 3743, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3625161 0.4168391
## sample estimates:
##       cor 
## 0.3900169

#fitting regression model
tac_lm.u <- lm(GPM.IMERG.Tacloban.Rainfall ~ Tacloban.Rainfall, data = train)
summary(tac_lm.u)

## 
## Call:
## lm(formula = GPM.IMERG.Tacloban.Rainfall ~ Tacloban.Rainfall, 
##     data = train)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -88.35  -6.19  -5.65  -0.14 316.54 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        6.07579    0.43664   13.91   <2e-16 ***
## Tacloban.Rainfall  0.41957    0.01854   22.63   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.89 on 2623 degrees of freedom
##   (187 observations deleted due to missingness)
## Multiple R-squared:  0.1633, Adjusted R-squared:  0.163 
## F-statistic:   512 on 1 and 2623 DF,  p-value: < 2.2e-16

ggplot(train, aes(x=Tacloban.Rainfall, y=GPM.IMERG.Tacloban.Rainfall))+
  geom_point(size=0.5)+ geom_smooth(method=lm)+
  labs(x="Rainfall Data", y="GPM IMERG")+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black"))

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 187 rows containing non-finite values (`stat_smooth()`).

## Warning: Removed 187 rows containing missing values (`geom_point()`).

#VSU

#correlation of satellite vs ground data
cor.test(rainfall$VSU.Rainfall,rainfall$GPM.IMERG.VSU.Rainfall)

## 
##  Pearson's product-moment correlation
## 
## data:  rainfall$VSU.Rainfall and rainfall$GPM.IMERG.VSU.Rainfall
## t = 19.47, df = 4015, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2652085 0.3217285
## sample estimates:
##       cor 
## 0.2937252

#fitting regression model
vsu_lm.u <- lm(GPM.IMERG.VSU.Rainfall ~ VSU.Rainfall, data = train)
summary(vsu_lm.u)

## 
## Call:
## lm(formula = GPM.IMERG.VSU.Rainfall ~ VSU.Rainfall, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -60.066  -6.342  -5.880  -0.105 251.814 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   6.34242    0.39598   16.02   <2e-16 ***
## VSU.Rainfall  0.29135    0.01706   17.08   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.24 on 2810 degrees of freedom
## Multiple R-squared:  0.09402,    Adjusted R-squared:  0.09369 
## F-statistic: 291.6 on 1 and 2810 DF,  p-value: < 2.2e-16

ggplot(train, aes(x=VSU.Rainfall, y=GPM.IMERG.VSU.Rainfall))+
  geom_point(size=0.5)+ geom_smooth(method=lm)+
  labs(x="Rainfall Data", y="GPM IMERG")+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black"))

## `geom_smooth()` using formula = 'y ~ x'

#prediction

msn_predict <- 6.5191 + 0.2875*test$Maasin.Rainfall
tac_predict <- 6.1398 + 0.3936*test$Tacloban.Rainfall
vsu_predict <- 6.2288 + 0.2888*test$VSU.Rainfall

df <- data.frame(msn_predict,test$GPM.IMERG.Maasin.Rainfall,tac_predict,test$GPM.IMERG.Tacloban.Rainfall,vsu_predict,test$GPM.IMERG.VSU.Rainfall)

write.csv(df, "C:/Users/Admin/OneDrive/Desktop/rainfall\\univariate_linear_fitting.csv")

#multivariate linear fitting

#fitting regression model
#cleaning data values in temperature
rainfall[rainfall == "-999"] <- NA
rainfall <- na.omit(rainfall)

train[train == "-999"] <- NA
train <- na.omit(train)
#MAASIN
msn_lm.m <- lm(GPM.IMERG.Maasin.Rainfall ~ Maasin.Rainfall + MAASIN.TMEAN + MAASIN.RH, data = train)
summary(msn_lm.m)

## 
## Call:
## lm(formula = GPM.IMERG.Maasin.Rainfall ~ Maasin.Rainfall + MAASIN.TMEAN + 
##     MAASIN.RH, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -50.245  -7.358  -4.010   0.538 170.241 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     50.41689   14.98124   3.365 0.000777 ***
## Maasin.Rainfall  0.17371    0.02075   8.373  < 2e-16 ***
## MAASIN.TMEAN    -2.07904    0.33584  -6.191 7.03e-10 ***
## MAASIN.RH        0.15439    0.09242   1.670 0.094960 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.68 on 2401 degrees of freedom
## Multiple R-squared:  0.0962, Adjusted R-squared:  0.09507 
## F-statistic: 85.19 on 3 and 2401 DF,  p-value: < 2.2e-16

#TACLOBAN
tac_lm.m <- lm(GPM.IMERG.Tacloban.Rainfall ~ Tacloban.Rainfall + TAC.TMEAN + TAC.RH, data = train)
summary(tac_lm.m)

## 
## Call:
## lm(formula = GPM.IMERG.Tacloban.Rainfall ~ Tacloban.Rainfall + 
##     TAC.TMEAN + TAC.RH, data = train)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -77.10  -6.88  -4.55  -0.57 318.13 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -29.91688   18.80321  -1.591 0.111730    
## Tacloban.Rainfall   0.34885    0.02159  16.161  < 2e-16 ***
## TAC.TMEAN           0.24849    0.46473   0.535 0.592913    
## TAC.RH              0.34983    0.09858   3.549 0.000395 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21 on 2401 degrees of freedom
## Multiple R-squared:  0.1432, Adjusted R-squared:  0.1421 
## F-statistic: 133.7 on 3 and 2401 DF,  p-value: < 2.2e-16

#VSU
vsu_lm.m <- lm(GPM.IMERG.VSU.Rainfall ~ VSU.Rainfall + VSU.TMEAN + VSU.RH, data = train)
summary(vsu_lm.m)

## 
## Call:
## lm(formula = GPM.IMERG.VSU.Rainfall ~ VSU.Rainfall + VSU.TMEAN + 
##     VSU.RH, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -50.326  -7.260  -4.220  -0.011 251.139 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  31.47263   16.30286   1.930 0.053663 .  
## VSU.Rainfall  0.19155    0.02007   9.543  < 2e-16 ***
## VSU.TMEAN    -2.03934    0.41038  -4.969 7.19e-07 ***
## VSU.RH        0.38489    0.09901   3.887 0.000104 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.01 on 2401 degrees of freedom
## Multiple R-squared:  0.09747,    Adjusted R-squared:  0.09634 
## F-statistic: 86.43 on 3 and 2401 DF,  p-value: < 2.2e-16

#multivariate prediction

msn.m_predict <-  45.4932 + (0.1419*test$Maasin.Rainfall) - (1.9930*test$MAASIN.TMEAN) + (0.1867*test$MAASIN.RH)
tac.m_predict <- 0.8472 + (0.3594*test$Tacloban.Rainfall) - (0.2176*test$TAC.RH)
vsu.m_predict <- 39.2318 + (0.1769*test$VSU.Rainfall) -(2.2382*test$VSU.TMEAN) + 0.3563*test$VSU.RH

df <- data.frame(msn.m_predict,test$GPM.IMERG.Maasin.Rainfall,tac.m_predict,test$GPM.IMERG.Tacloban.Rainfall,vsu.m_predict,test$GPM.IMERG.VSU.Rainfall)

write.csv(df, "C:/Users/Admin/OneDrive/Desktop/rainfall\\multivariate_linear_fitting.csv")

#plotting multivariate regrssion results

avPlots(msn_lm.m, layout = c(1,3), grid=F, ylab="GPM IMERG", main = "", )

avPlots(tac_lm.m, layout = c(1,3), grid=F, ylab="GPM IMERG", main = "")

avPlots(vsu_lm.m, layout = c(1,3), grid=F, ylab="GPM IMERG", main = "")

rainfall

Otero, Kate Andrea

2023-07-13

importing data

plot GPM_IMERG data

univariate linear fitting