Regresi Linear dengan R

library(readxl)
datainflowsumatera <- read_excel(path = "aceh.xlsx")
datainflowsumatera

## # A tibble: 11 x 2
##    Tahun  Utara
##    <dbl>  <dbl>
##  1  2011 23238.
##  2  2012 25981.
##  3  2013 18120.
##  4  2014 30503.
##  5  2015 30254.
##  6  2016 34427.
##  7  2017 35617.
##  8  2018 41769.
##  9  2019 47112.
## 10  2020 36609.
## 11  2021 31840.

summary(datainflowsumatera)

##      Tahun          Utara      
##  Min.   :2011   Min.   :18120  
##  1st Qu.:2014   1st Qu.:28117  
##  Median :2016   Median :31840  
##  Mean   :2016   Mean   :32315  
##  3rd Qu.:2018   3rd Qu.:36113  
##  Max.   :2021   Max.   :47112

pairs(datainflowsumatera)

plot(datainflowsumatera$Utara ~ datainflowsumatera$Tahun, data = datainflowsumatera)

cor(datainflowsumatera$Utara, datainflowsumatera$Tahun)

## [1] 0.7360339

#define data
model <- lm(datainflowsumatera$Utara ~ datainflowsumatera$Tahun)

summary(model)

## 
## Call:
## lm(formula = datainflowsumatera$Utara ~ datainflowsumatera$Tahun)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9584.1 -1616.6   952.5  1971.3  9331.5 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)   
## (Intercept)              -3640449.3  1125972.4  -3.233  0.01027 * 
## datainflowsumatera$Tahun     1821.8      558.5   3.262  0.00981 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5858 on 9 degrees of freedom
## Multiple R-squared:  0.5417, Adjusted R-squared:  0.4908 
## F-statistic: 10.64 on 1 and 9 DF,  p-value: 0.009809

anova(model)

## Analysis of Variance Table
## 
## Response: datainflowsumatera$Utara
##                          Df    Sum Sq   Mean Sq F value   Pr(>F)   
## datainflowsumatera$Tahun  1 365088223 365088223   10.64 0.009809 **
## Residuals                 9 308822226  34313581                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plot(datainflowsumatera$Utara ~ datainflowsumatera$Tahun, data = datainflowsumatera, col = "dodgerblue", pch = 20, cex = 1.5, main = "Data Inflow Sumatera Utara")
abline(model) #Add a regression line

plot(cooks.distance(model), pch = 16, col = "blue") #Plot the Cooks Distances.

plot(model)

AIC(model)

## [1] 225.8708

BIC(model)

## [1] 227.0645

head(predict(model), n = 11)

##        1        2        3        4        5        6        7        8 
## 23206.32 25028.13 26849.93 28671.74 30493.55 32315.36 34137.17 35958.97 
##        9       10       11 
## 37780.78 39602.59 41424.40

plot(head(predict(model), n = 10))

head(resid(model), n = 11)

##           1           2           3           4           5           6 
##    31.43721   952.51813 -8729.86506  1831.00913  -239.96924  2111.51341 
##           7           8           9          10          11 
##  1479.55102  5809.68687  9331.54030 -2993.27554 -9584.14622

coef(model)

##              (Intercept) datainflowsumatera$Tahun 
##             -3640449.271                 1821.808

datainflowsumatera$residuals <- model$residuals

datainflowsumatera$predicted <- model$fitted.values

datainflowsumatera

## # A tibble: 11 x 4
##    Tahun  Utara residuals predicted
##    <dbl>  <dbl>     <dbl>     <dbl>
##  1  2011 23238.      31.4    23206.
##  2  2012 25981.     953.     25028.
##  3  2013 18120.   -8730.     26850.
##  4  2014 30503.    1831.     28672.
##  5  2015 30254.    -240.     30494.
##  6  2016 34427.    2112.     32315.
##  7  2017 35617.    1480.     34137.
##  8  2018 41769.    5810.     35959.
##  9  2019 47112.    9332.     37781.
## 10  2020 36609.   -2993.     39603.
## 11  2021 31840.   -9584.     41424.

scatter.smooth(x=datainflowsumatera$Tahun, y=datainflowsumatera$Utara, main="Tahun ~ Utara")

boxplot(datainflowsumatera$Utara, main="Utara", boxplot.stats(datainflowsumatera$Utara)$out)

plot(density(datainflowsumatera$Utara), main="Sumatera Utara Plot: Inflow", ylab="Frequency")

coefs <- coef(model)
plot(Utara ~ Tahun, data = datainflowsumatera)
abline(coefs)
text(x = 12, y = 10, paste('expression = ', round(coefs[1], 2),  '+', round(coefs[2], 2), '*Utara'))

cor.test(datainflowsumatera$Tahun, datainflowsumatera$Utara)

## 
##  Pearson's product-moment correlation
## 
## data:  datainflowsumatera$Tahun and datainflowsumatera$Utara
## t = 3.2619, df = 9, p-value = 0.009809
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2438063 0.9267309
## sample estimates:
##       cor 
## 0.7360339

Regresi Linear dengan R

Prof Dr Suhartono M.Kom UIN Maulana Malik Ibrahim Malang

3/7/2022