#setwd("D:/Misc/EIT/Data Analytics/Data Analytics-PGDAV8.100-T4-2021/Practical")
df<- read.csv("StockIndexPrice.csv")
dim(df)
## [1] 24 5
head(df)
## Year Month Iterest_Rate Unemplyment_Rate Stock_Index_Price
## 1 2017 12 2.75 5.3 1464
## 2 2017 11 2.50 5.3 1394
## 3 2017 10 2.50 5.3 1357
## 4 2017 9 2.50 5.3 1293
## 5 2017 8 2.50 5.4 1256
## 6 2017 7 2.50 5.6 1254
names (df)<- c("Year", "Month", "IR", "UR", "SIP")
## Linear regression - single variate
lm1<- lm(df$SIP ~ df$IR)
summary(lm1)
##
## Call:
## lm(formula = df$SIP ~ df$IR)
##
## Residuals:
## Min 1Q Median 3Q Max
## -183.892 -30.181 4.455 56.608 101.057
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -99.46 95.21 -1.045 0.308
## df$IR 564.20 45.32 12.450 1.95e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 75.96 on 22 degrees of freedom
## Multiple R-squared: 0.8757, Adjusted R-squared: 0.8701
## F-statistic: 155 on 1 and 22 DF, p-value: 1.954e-11
plot(df$IR, df$SIP)
abline(lm1, col= "blue")
Intercept H0: The intercept should be equal to 0 => You cannot reject H0 because p-value (0.308) > alpha (0.05) H1: The intercept should be unequal to 0
Coefficient for IR H0: The coefficient for IR should be equal to 0 H1: The coefficient for IR should be unequal to 0 => You can reject H0 in favour of H1 because p-value (1.95e-11) < alpha (0.05)
Initial equation: SIP= -99.46 + 564.20 * IR
Your initial equation could be changed to an equation with intercept equal to 0
## Linear regression - single variate
lm11<- lm(df$SIP ~ 0 + df$IR)
summary(lm11)
##
## Call:
## lm(formula = df$SIP ~ 0 + df$IR)
##
## Residuals:
## Min 1Q Median 3Q Max
## -201.614 -38.204 7.326 45.636 100.265
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## df$IR 517.494 7.395 69.98 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 76.12 on 23 degrees of freedom
## Multiple R-squared: 0.9953, Adjusted R-squared: 0.9951
## F-statistic: 4897 on 1 and 23 DF, p-value: < 2.2e-16
plot(df$IR, df$SIP)
abline(lm11, col= "green")
lm2<- lm(df$SIP ~ df$UR)
summary(lm2)
##
## Call:
## lm(formula = df$SIP ~ df$UR)
##
## Residuals:
## Min 1Q Median 3Q Max
## -159.671 -41.996 2.089 72.381 151.226
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4471.3 304.2 14.7 7.41e-13 ***
## df$UR -589.0 52.6 -11.2 1.49e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 83.25 on 22 degrees of freedom
## Multiple R-squared: 0.8507, Adjusted R-squared: 0.8439
## F-statistic: 125.4 on 1 and 22 DF, p-value: 1.487e-10
plot(df$UR, df$SIP)
abline(lm2, col= "red")
lm3<- lm(formula = df$SIP ~ df$IR + df$UR)
summary(lm3)
##
## Call:
## lm(formula = df$SIP ~ df$IR + df$UR)
##
## Residuals:
## Min 1Q Median 3Q Max
## -158.205 -41.667 -6.248 57.741 118.810
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1798.4 899.2 2.000 0.05861 .
## df$IR 345.5 111.4 3.103 0.00539 **
## df$UR -250.1 117.9 -2.121 0.04601 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 70.56 on 21 degrees of freedom
## Multiple R-squared: 0.8976, Adjusted R-squared: 0.8879
## F-statistic: 92.07 on 2 and 21 DF, p-value: 4.043e-11
lm33<- lm(formula = df$SIP ~ 0+ df$IR + df$UR)
summary(lm33)
##
## Call:
## lm(formula = df$SIP ~ 0 + df$IR + df$UR)
##
## Residuals:
## Min 1Q Median 3Q Max
## -178.595 -31.234 4.249 56.899 99.977
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## df$IR 558.91 34.04 16.418 7.88e-14 ***
## df$UR -15.40 12.37 -1.246 0.226
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 75.22 on 22 degrees of freedom
## Multiple R-squared: 0.9956, Adjusted R-squared: 0.9952
## F-statistic: 2508 on 2 and 22 DF, p-value: < 2.2e-16
###Resources Linear regression http://r-statistics.co/Linear-Regression.html