moneydata = read.csv("moneydata.csv")
summary(moneydata)
## year Money_printed GDP Interest_RATE
## Min. :1970 Min. : 7374 Min. : 474131 Min. : 5.375
## 1st Qu.:1978 1st Qu.: 18646 1st Qu.: 615406 1st Qu.: 9.000
## Median :1987 Median : 58555 Median : 880267 Median :10.000
## Mean :1987 Mean :146642 Mean :1079314 Mean : 9.786
## 3rd Qu.:1996 3rd Qu.:227725 3rd Qu.:1452676 3rd Qu.:11.000
## Max. :2004 Max. :647495 Max. :2389660 Max. :13.000
## WPI
## Min. : 14.30
## 1st Qu.: 28.90
## Median : 58.20
## Mean : 77.37
## 3rd Qu.:124.40
## Max. :187.30
# Our dependent variable is how much money to be printed by the Central Bank.
# Our independent variables are GDP, Interest Rate and WPI.
# To check the linear relationship between Dependent Variable and Independent Variable.
newmoneydate = subset(moneydata,select = c(2:5) ) # Removed Year column from the data set.
kdepairs(newmoneydate)

# From the above we can say that there is strong correlation between Money Vs GDP , Money vs WPI and GDP vs WPI.
# Next week will check the linearity between the independent variables. This is for MultiCollinearity.
# for this we will drop the Dependent Variable(Money Printed)
printmoney = subset(newmoneydate, select = c(2:4))
kdepairs(printmoney)

# From the above plot we can say that GDP and WPI has a very strong Correlation, So we can remove either one of this.
# But for our initial analysis we will add and the remove one.
# Next we will see how our model looks with 3 independent variables and will do the backward elimination.
mutlireg = lm(newmoneydate$Money_printed ~ newmoneydate$GDP + newmoneydate$Interest_RATE + newmoneydate$WPI)
summary(mutlireg)
##
## Call:
## lm(formula = newmoneydate$Money_printed ~ newmoneydate$GDP +
## newmoneydate$Interest_RATE + newmoneydate$WPI)
##
## Residuals:
## Min 1Q Median 3Q Max
## -46875 -7027 1387 15068 46249
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.759e+04 5.052e+04 -0.348 0.73008
## newmoneydate$GDP 2.975e-01 8.787e-02 3.385 0.00195 **
## newmoneydate$Interest_RATE -1.626e+04 2.919e+03 -5.570 4.19e-06 ***
## newmoneydate$WPI 2.943e+01 9.038e+02 0.033 0.97423
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22770 on 31 degrees of freedom
## Multiple R-squared: 0.9848, Adjusted R-squared: 0.9834
## F-statistic: 670.4 on 3 and 31 DF, p-value: < 2.2e-16
# From the P values we can reject WPI, any how we were thinking of removing it.
model2= lm(newmoneydate$Money_printed ~ newmoneydate$GDP + newmoneydate$Interest_RATE)
summary(model2)
##
## Call:
## lm(formula = newmoneydate$Money_printed ~ newmoneydate$GDP +
## newmoneydate$Interest_RATE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -47055 -7168 1432 14998 46008
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.906e+04 2.192e+04 -0.870 0.391
## newmoneydate$GDP 3.003e-01 6.913e-03 43.443 < 2e-16 ***
## newmoneydate$Interest_RATE -1.619e+04 1.977e+03 -8.191 2.35e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22420 on 32 degrees of freedom
## Multiple R-squared: 0.9848, Adjusted R-squared: 0.9839
## F-statistic: 1038 on 2 and 32 DF, p-value: < 2.2e-16
# From the new model we can see the p value is less than 0.05 for both the independent variables.
# Next we will see how our Residuals Look like.
plot(fitted(model2), resid(model2))
abline(h=0)

mean(model2$residuals)
## [1] -6.82121e-13
# Mean of the residuals is less near zero.
hist(model2$residuals)

qqnorm(model2$residuals)
qqline(model2$residuals)

# Conclusion: From the QQ Plot, we can see though center is little super imposed on the line,i,e there is some normality
# but little towards the end, the residuals are not normally distributed. Esp when it is towards 0
# We could not say with certainity that both GDP and Interest Rate are to be considered while printing money.