library(readxl)
datainflowsumatera <- read_excel(path = "aceh.xlsx")
datainflowsumatera
## # A tibble: 11 x 2
## Tahun Utara
## <dbl> <dbl>
## 1 2011 23238.
## 2 2012 25981.
## 3 2013 18120.
## 4 2014 30503.
## 5 2015 30254.
## 6 2016 34427.
## 7 2017 35617.
## 8 2018 41769.
## 9 2019 47112.
## 10 2020 36609.
## 11 2021 31840.
summary(datainflowsumatera)
## Tahun Utara
## Min. :2011 Min. :18120
## 1st Qu.:2014 1st Qu.:28117
## Median :2016 Median :31840
## Mean :2016 Mean :32315
## 3rd Qu.:2018 3rd Qu.:36113
## Max. :2021 Max. :47112
pairs(datainflowsumatera)

plot(datainflowsumatera$Utara ~ datainflowsumatera$Tahun, data = datainflowsumatera)

cor(datainflowsumatera$Utara, datainflowsumatera$Tahun)
## [1] 0.7360339
#define data
model <- lm(datainflowsumatera$Utara ~ datainflowsumatera$Tahun)
summary(model)
##
## Call:
## lm(formula = datainflowsumatera$Utara ~ datainflowsumatera$Tahun)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9584.1 -1616.6 952.5 1971.3 9331.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3640449.3 1125972.4 -3.233 0.01027 *
## datainflowsumatera$Tahun 1821.8 558.5 3.262 0.00981 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5858 on 9 degrees of freedom
## Multiple R-squared: 0.5417, Adjusted R-squared: 0.4908
## F-statistic: 10.64 on 1 and 9 DF, p-value: 0.009809
anova(model)
## Analysis of Variance Table
##
## Response: datainflowsumatera$Utara
## Df Sum Sq Mean Sq F value Pr(>F)
## datainflowsumatera$Tahun 1 365088223 365088223 10.64 0.009809 **
## Residuals 9 308822226 34313581
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(datainflowsumatera$Utara ~ datainflowsumatera$Tahun, data = datainflowsumatera, col = "dodgerblue", pch = 20, cex = 1.5, main = "Data Inflow Sumatera Utara")
abline(model) #Add a regression line

plot(cooks.distance(model), pch = 16, col = "blue") #Plot the Cooks Distances.

plot(model)




AIC(model)
## [1] 225.8708
BIC(model)
## [1] 227.0645
head(predict(model), n = 11)
## 1 2 3 4 5 6 7 8
## 23206.32 25028.13 26849.93 28671.74 30493.55 32315.36 34137.17 35958.97
## 9 10 11
## 37780.78 39602.59 41424.40
plot(head(predict(model), n = 10))

head(resid(model), n = 11)
## 1 2 3 4 5 6
## 31.43721 952.51813 -8729.86506 1831.00913 -239.96924 2111.51341
## 7 8 9 10 11
## 1479.55102 5809.68687 9331.54030 -2993.27554 -9584.14622
coef(model)
## (Intercept) datainflowsumatera$Tahun
## -3640449.271 1821.808
datainflowsumatera$residuals <- model$residuals
datainflowsumatera$predicted <- model$fitted.values
datainflowsumatera
## # A tibble: 11 x 4
## Tahun Utara residuals predicted
## <dbl> <dbl> <dbl> <dbl>
## 1 2011 23238. 31.4 23206.
## 2 2012 25981. 953. 25028.
## 3 2013 18120. -8730. 26850.
## 4 2014 30503. 1831. 28672.
## 5 2015 30254. -240. 30494.
## 6 2016 34427. 2112. 32315.
## 7 2017 35617. 1480. 34137.
## 8 2018 41769. 5810. 35959.
## 9 2019 47112. 9332. 37781.
## 10 2020 36609. -2993. 39603.
## 11 2021 31840. -9584. 41424.
scatter.smooth(x=datainflowsumatera$Tahun, y=datainflowsumatera$Utara, main="Tahun ~ Utara")

boxplot(datainflowsumatera$Utara, main="Utara", boxplot.stats(datainflowsumatera$Utara)$out)

plot(density(datainflowsumatera$Utara), main="Sumatera Utara Plot: Inflow", ylab="Frequency")

coefs <- coef(model)
plot(Utara ~ Tahun, data = datainflowsumatera)
abline(coefs)
text(x = 12, y = 10, paste('expression = ', round(coefs[1], 2), '+', round(coefs[2], 2), '*Utara'))

cor.test(datainflowsumatera$Tahun, datainflowsumatera$Utara)
##
## Pearson's product-moment correlation
##
## data: datainflowsumatera$Tahun and datainflowsumatera$Utara
## t = 3.2619, df = 9, p-value = 0.009809
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2438063 0.9267309
## sample estimates:
## cor
## 0.7360339