moneydata = read.csv("moneydata.csv")

summary(moneydata)
##       year      Money_printed         GDP          Interest_RATE   
##  Min.   :1970   Min.   :  7374   Min.   : 474131   Min.   : 5.375  
##  1st Qu.:1978   1st Qu.: 18646   1st Qu.: 615406   1st Qu.: 9.000  
##  Median :1987   Median : 58555   Median : 880267   Median :10.000  
##  Mean   :1987   Mean   :146642   Mean   :1079314   Mean   : 9.786  
##  3rd Qu.:1996   3rd Qu.:227725   3rd Qu.:1452676   3rd Qu.:11.000  
##  Max.   :2004   Max.   :647495   Max.   :2389660   Max.   :13.000  
##       WPI        
##  Min.   : 14.30  
##  1st Qu.: 28.90  
##  Median : 58.20  
##  Mean   : 77.37  
##  3rd Qu.:124.40  
##  Max.   :187.30
# Our dependent variable is how much money to be printed by the Central Bank.

# Our independent variables are GDP, Interest Rate and WPI.


# To check the linear relationship between Dependent Variable and Independent Variable.
newmoneydate = subset(moneydata,select = c(2:5) ) # Removed Year column from the data set.

kdepairs(newmoneydate)

# From the above we can say that there is strong correlation between Money Vs GDP , Money vs WPI and GDP vs WPI.


# Next week will check the linearity between the independent variables. This is for MultiCollinearity.
# for this we will drop the Dependent Variable(Money Printed)

printmoney = subset(newmoneydate, select = c(2:4))
kdepairs(printmoney)

# From the above plot we can say that GDP and WPI has a very strong Correlation, So we can remove either one of this.
# But for our initial analysis we will add and the remove one.

# Next we will see how our model looks with 3 independent variables and will do the backward elimination.


mutlireg = lm(newmoneydate$Money_printed ~ newmoneydate$GDP + newmoneydate$Interest_RATE + newmoneydate$WPI)

summary(mutlireg)
## 
## Call:
## lm(formula = newmoneydate$Money_printed ~ newmoneydate$GDP + 
##     newmoneydate$Interest_RATE + newmoneydate$WPI)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -46875  -7027   1387  15068  46249 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                -1.759e+04  5.052e+04  -0.348  0.73008    
## newmoneydate$GDP            2.975e-01  8.787e-02   3.385  0.00195 ** 
## newmoneydate$Interest_RATE -1.626e+04  2.919e+03  -5.570 4.19e-06 ***
## newmoneydate$WPI            2.943e+01  9.038e+02   0.033  0.97423    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22770 on 31 degrees of freedom
## Multiple R-squared:  0.9848, Adjusted R-squared:  0.9834 
## F-statistic: 670.4 on 3 and 31 DF,  p-value: < 2.2e-16
# From the P values we can reject WPI, any how we were thinking of removing it.

model2= lm(newmoneydate$Money_printed ~ newmoneydate$GDP + newmoneydate$Interest_RATE)

summary(model2)
## 
## Call:
## lm(formula = newmoneydate$Money_printed ~ newmoneydate$GDP + 
##     newmoneydate$Interest_RATE)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -47055  -7168   1432  14998  46008 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                -1.906e+04  2.192e+04  -0.870    0.391    
## newmoneydate$GDP            3.003e-01  6.913e-03  43.443  < 2e-16 ***
## newmoneydate$Interest_RATE -1.619e+04  1.977e+03  -8.191 2.35e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22420 on 32 degrees of freedom
## Multiple R-squared:  0.9848, Adjusted R-squared:  0.9839 
## F-statistic:  1038 on 2 and 32 DF,  p-value: < 2.2e-16
# From the new model we can see the p value is less than 0.05 for both the independent variables.

# Next we will see how our Residuals Look like.

plot(fitted(model2), resid(model2))
abline(h=0)

mean(model2$residuals)
## [1] -6.82121e-13
# Mean of the residuals is less near zero.

hist(model2$residuals)

qqnorm(model2$residuals)
qqline(model2$residuals)

# Conclusion: From the QQ Plot, we can see though center is little super imposed on the line,i,e there is some normality 
# but little towards the end, the residuals are not normally distributed. Esp when it is towards 0
# We could not say with certainity that both GDP and Interest Rate are to be considered while printing money.

Reference

https://www.youtube.com/watch?v=px72eCYPuvc http://analyticspro.org/2016/03/02/r-tutorial-multiple-linear-regression/