Data Preparation

# load data
financial_survey <- read.csv("Data/The_Survey_of_Financial_Security_2005.csv")
financial_survey
names(financial_survey)
##  [1] "ï..CASEID" "weight"    "wdtotal"   "wdprmor"   "fmsz27"   
##  [6] "nbear27"   "dvfcrn"    "atinc27"   "wdstcred"  "wdsloan"  
## [11] "wnetwpg"   "dvphlv2g"  "hcsex_r"   "wdstomor"  "wdstloc"  
## [16] "ecpage"    "ecfexchr"  "watotpt"

Response

The response variable is “HouseHold Debt” of type numerical.
Household Debt [wdtotal]

Explanatory

Explanatory variables belongs to both numerical and categorival types.

Numerical/Quantitative variables:-
Family income after taxes [atinc27]
Number of family members [fmsz27]
Number of credit cards [dvfcrn]
Student debt [wdsloan]
Child related expense [ecfexchr]
Subtotal-credit card & instalment debt [wdstcred]
Assets (continue basis) [watotpt]
Mortgage on principle residence [wdprmor]
Mortgage on other residence [wdstomor]
Line of credit [wdstloc]
Net worth including pension [wnetwpg]
Age of major income earner [ecpage]

Categorival/Qualitative variables:-
Number of earners in the family [nbear27]
Level of education [dvphlv2g]
Sex of major income earner [hcsex_r]

Relevant summary statistics and linear regression

summary(financial_survey$wdtotal)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2300   80400  131500  161122  208025  950000
plot(financial_survey$wdtotal ~ financial_survey$atinc27)

cor(financial_survey$wdtotal, financial_survey$atinc27)
## [1] 0.3970706
plot(financial_survey$wdtotal ~ financial_survey$fmsz27)

cor(financial_survey$wdtotal, financial_survey$fmsz27)
## [1] 0.2773952
plot(financial_survey$wdtotal ~ financial_survey$dvfcrn)

cor(financial_survey$wdtotal, financial_survey$dvfcrn)
## [1] -0.1065488
plot(financial_survey$wdtotal ~ financial_survey$wdsloan)

cor(financial_survey$wdtotal, financial_survey$wdsloan)
## [1] 0.1159679
plot(financial_survey$wdtotal ~ financial_survey$ecfexchr)

cor(financial_survey$wdtotal, financial_survey$ecfexchr)
## [1] 0.1147722
plot(financial_survey$wdtotal ~ financial_survey$wdstcred)

cor(financial_survey$wdtotal, financial_survey$wdstcred)
## [1] 0.1504003
plot(financial_survey$wdtotal ~ financial_survey$watotpt)

cor(financial_survey$wdtotal, financial_survey$watotpt)
## [1] 0.383406
plot(financial_survey$wdtotal ~ financial_survey$wdprmor )

cor(financial_survey$wdtotal, financial_survey$wdprmor )
## [1] 0.94779
plot(financial_survey$wdtotal ~ financial_survey$wdstomor)

cor(financial_survey$wdtotal, financial_survey$wdstomor)
## Warning in cor(financial_survey$wdtotal, financial_survey$wdstomor): the
## standard deviation is zero
## [1] NA
plot(financial_survey$wdtotal ~ financial_survey$wdstloc)

cor(financial_survey$wdtotal, financial_survey$wdstloc)
## [1] 0.4262347
plot(financial_survey$wdtotal ~ financial_survey$wnetwpg)

cor(financial_survey$wdtotal, financial_survey$wnetwpg)
## [1] 0.2767768
plot(financial_survey$wdtotal ~ financial_survey$ecpage )

cor(financial_survey$wdtotal, financial_survey$ecpage )
## [1] -0.1029961
plot(financial_survey$wdtotal ~ financial_survey$nbear27 )

cor(financial_survey$wdtotal, financial_survey$nbear27 )
## [1] 0.2211976
plot(financial_survey$wdtotal ~ financial_survey$dvphlv2g)

cor(financial_survey$wdtotal, financial_survey$dvphlv2g)
## [1] 0.2656022
plot(financial_survey$wdtotal ~ financial_survey$hcsex_r)

cor(financial_survey$wdtotal, financial_survey$hcsex_r)
## [1] -0.07038076
 reg_multi <- lm(financial_survey$wdtotal ~ financial_survey$atinc27 +financial_survey$fmsz27 +financial_survey$dvfcrn +financial_survey$wdsloan +financial_survey$ecfexchr +financial_survey$wdstcred +financial_survey$watotpt +financial_survey$wdprmor  +financial_survey$wdstomor +financial_survey$wdstloc +financial_survey$wnetwpg +financial_survey$ecpage  +financial_survey$nbear27  +financial_survey$dvphlv2g +financial_survey$hcsex_r )
summary(reg_multi)
## 
## Call:
## lm(formula = financial_survey$wdtotal ~ financial_survey$atinc27 + 
##     financial_survey$fmsz27 + financial_survey$dvfcrn + financial_survey$wdsloan + 
##     financial_survey$ecfexchr + financial_survey$wdstcred + financial_survey$watotpt + 
##     financial_survey$wdprmor + financial_survey$wdstomor + financial_survey$wdstloc + 
##     financial_survey$wnetwpg + financial_survey$ecpage + financial_survey$nbear27 + 
##     financial_survey$dvphlv2g + financial_survey$hcsex_r)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -33515  -6983  -2865   3686  79145 
## 
## Coefficients: (1 not defined because of singularities)
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                1.057e+04  4.135e+03   2.556  0.01092 *  
## financial_survey$atinc27   2.561e-02  1.077e-02   2.377  0.01785 *  
## financial_survey$fmsz27   -9.041e+02  5.193e+02  -1.741  0.08232 .  
## financial_survey$dvfcrn   -1.836e+02  3.055e+02  -0.601  0.54809    
## financial_survey$wdsloan   7.402e-01  8.372e-02   8.841  < 2e-16 ***
## financial_survey$ecfexchr -2.929e-03  1.511e-01  -0.019  0.98454    
## financial_survey$wdstcred  7.938e-01  7.865e-02  10.093  < 2e-16 ***
## financial_survey$watotpt   3.104e-01  2.246e-02  13.819  < 2e-16 ***
## financial_survey$wdprmor   6.769e-01  2.336e-02  28.976  < 2e-16 ***
## financial_survey$wdstomor         NA         NA      NA       NA    
## financial_survey$wdstloc   7.362e-01  3.045e-02  24.180  < 2e-16 ***
## financial_survey$wnetwpg  -3.116e-01  2.248e-02 -13.861  < 2e-16 ***
## financial_survey$ecpage   -1.365e+02  5.357e+01  -2.548  0.01114 *  
## financial_survey$nbear27   2.483e+03  8.304e+02   2.990  0.00293 ** 
## financial_survey$dvphlv2g  6.099e+02  6.174e+02   0.988  0.32373    
## financial_survey$hcsex_r  -7.484e+02  1.235e+03  -0.606  0.54499    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11830 on 465 degrees of freedom
## Multiple R-squared:  0.9906, Adjusted R-squared:  0.9903 
## F-statistic:  3497 on 14 and 465 DF,  p-value: < 2.2e-16

Linear Regression Equation

From the given co-efficients, we can express our model as such:
Y = B0 + B1(X1) + B2(X2) + B3(X3) + B4(X4)
Y = 5290 + 0.9944\(*(wdprmor)\) + 2871 \(*(bear27)\) + 1.11\(*(wdsloan)\) + 1.059\(*(wdstloc)\) This model has an R Squared value of 0.9815, which indicates a very strong relationship. 98.15% of variability in total household debt is caused by the four x variables as a whole.