##Trying to predict what neighborhood the house is in.
library(car)
library(textir)
## Loading required package: distrom
## Loading required package: Matrix
## Loading required package: gamlr
## Loading required package: parallel
library(class)
houses <- read.csv("/Users/kimberlyhatlestad/Data Mining/HousePrices.csv")
head(houses)
## HomeID Price SqFt Bedrooms Bathrooms Offers Brick Neighborhood
## 1 1 114300 1790 2 2 2 No East
## 2 2 114200 2030 4 2 3 No East
## 3 3 114800 1740 3 2 1 No East
## 4 4 94700 1980 3 2 3 No East
## 5 5 119800 2130 3 3 3 No East
## 6 6 114600 1780 3 2 2 No North
plot(houses$Price~houses$Neighborhood)
library(class)
##length(class)
##set.seed(1)
##nearest1<- knn(train=x[train,],test=x[-train,],cl=houses$neighborhood[train],k=1)
##nearest5<- knn(train=x[train,],test=x[-train,],cl=houses$neighborhood[train],k=5)
##data.fram(houses$Neighborhood[-train],nearest1,nearest5)
summary(houses)
## HomeID Price SqFt Bedrooms
## Min. : 1.00 Min. : 69100 Min. :1450 Min. :2.000
## 1st Qu.: 32.75 1st Qu.:111325 1st Qu.:1880 1st Qu.:3.000
## Median : 64.50 Median :125950 Median :2000 Median :3.000
## Mean : 64.50 Mean :130427 Mean :2001 Mean :3.023
## 3rd Qu.: 96.25 3rd Qu.:148250 3rd Qu.:2140 3rd Qu.:3.000
## Max. :128.00 Max. :211200 Max. :2590 Max. :5.000
## Bathrooms Offers Brick Neighborhood
## Min. :2.000 Min. :1.000 No :86 East :45
## 1st Qu.:2.000 1st Qu.:2.000 Yes:42 North:44
## Median :2.000 Median :3.000 West :39
## Mean :2.445 Mean :2.578
## 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :4.000 Max. :6.000
Xcred <- model.matrix(Neighborhood~.,data=houses)[,-1]
Xcred[1:3,]
## HomeID Price SqFt Bedrooms Bathrooms Offers BrickYes
## 1 1 114300 1790 2 2 2 0
## 2 2 114200 2030 4 2 3 0
## 3 3 114800 1740 3 2 1 0
##set.seed(1)
##train <- sample(1:1000,900)
##xtrain <- Xcred[train,]
##xnew <- Xcred[-train,]
##ytrain <- credit$Default[train]
##ynew <- credit$Default[-train]
##credglm=glm(Default~.,family=binomial,data=data.frame(Default=ytrain,xtrain))
##summary(credglm)
set.seed(1)
train <- sample(1:128,100)
xtrain<-Xcred[train,]
xnew<-Xcred[-train,]
ytrain<- houses$Neighborhood[train]
ynew<-houses$Neighborhood[-train]
housesglm <- glm(Neighborhood~.,family=binomial,data=data.frame(Neighborhood=ytrain,xtrain))
summary(housesglm)
##
## Call:
## glm(formula = Neighborhood ~ ., family = binomial, data = data.frame(Neighborhood = ytrain,
## xtrain))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.7511 -0.7087 0.4374 0.6775 2.1025
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.033e+00 2.567e+00 2.350 0.018754 *
## HomeID 1.099e-02 7.639e-03 1.439 0.150242
## Price 1.158e-04 2.754e-05 4.206 2.60e-05 ***
## SqFt -9.665e-03 2.577e-03 -3.750 0.000177 ***
## Bedrooms -6.131e-01 4.855e-01 -1.263 0.206696
## Bathrooms -9.907e-01 6.608e-01 -1.499 0.133785
## Offers 1.509e+00 4.446e-01 3.394 0.000689 ***
## BrickYes -3.108e+00 7.915e-01 -3.926 8.63e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 123.82 on 99 degrees of freedom
## Residual deviance: 91.17 on 92 degrees of freedom
## AIC: 107.17
##
## Number of Fisher Scoring iterations: 5
houses <- houses[-1]
head(houses)
## Price SqFt Bedrooms Bathrooms Offers Brick Neighborhood
## 1 114300 1790 2 2 2 No East
## 2 114200 2030 4 2 3 No East
## 3 114800 1740 3 2 1 No East
## 4 94700 1980 3 2 3 No East
## 5 119800 2130 3 3 3 No East
## 6 114600 1780 3 2 2 No North
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
training_data_subset<-data.frame(xtrain,Default=ytrain)
testing_data_subset<-data.frame(xnew,Default=ynew)
##AAAAAHHHHHH HEEEELLLLLPPPPPP, I'm copying what we did in class but I don't know why it's not working.
##houses.model<- train(Neighborhood ~.,preProcess = "scale",data=training_data_subset, method="knn")
##houses.model
##houses.prediction <- predict(houses.model,newdata=testing_data_subset, type="raw")
##confusionMatrix(houses.prediction,testing_data_subset[['Default']])
The second dataset is attached. It is a significantly abbreviated version of the NHIS data from Chopped, with some variables and most of the cases taken out. KNN can be difficult with masses of missing data, so this set should have values in place for all cases and variables. Try to predict FSRUNOUT groups using a SUBSET of the variables available. Check to make sure there is some variation in each variable that you select for your desired subset.
Due Monday night at midnight on RPubs.
##Trying to predict FSRUNOUT using a SUBSET OF VARIABLES AVAILABLE
food <- read.csv("/Users/kimberlyhatlestad/Data Mining/ReducedFoodInsec.csv")
head(food)
## WTFA_FAM FINT_M_P TELN_FLG CURWRKN TELCELN WRKCELN PHONEUSE FLNGINTV
## 1 1749 12 1 1 1 1 3 1
## 2 2407 8 1 1 1 2 3 1
## 3 18260 4 1 1 1 2 1 1
## 4 1145 11 1 1 1 5 1 1
## 5 5571 2 1 1 1 4 1 1
## 6 233 6 1 1 1 3 1 1
## FM_SIZE FM_KIDS FM_ELDR FM_TYPE FM_STRCP FM_STRP FM_EDUC1 FCHLMYN
## 1 3 1 0 4 41 42 8 2
## 2 12 8 1 4 45 45 5 1
## 3 3 1 0 4 41 41 6 2
## 4 6 2 0 4 44 44 8 2
## 5 4 1 0 4 44 44 8 2
## 6 4 2 0 4 42 43 6 2
## FCHLMCT FSPEDYN FSPEDCT FLAADLYN FLAADLCT FLIADLYN FLIADLCT FWKLIMYN
## 1 0 2 0 2 0 2 0 2
## 2 1 2 0 2 0 1 1 1
## 3 0 2 0 2 0 2 0 2
## 4 0 2 0 2 0 1 1 1
## 5 0 2 0 2 0 2 0 1
## 6 0 1 1 2 0 2 0 2
## FWKLIMCT FWALKYN FWALKCT FREMEMYN FREMEMCT FANYLYN FANYLCT FHSTATEX
## 1 0 2 0 2 0 2 0 3
## 2 1 1 1 2 0 1 2 0
## 3 0 2 0 2 0 2 0 0
## 4 1 2 0 2 0 1 1 6
## 5 1 2 0 2 0 1 1 3
## 6 0 2 0 2 0 1 1 4
## FHSTATVG FHSTATG FHSTATFR FHSTATPR FSRUNOUT FSLAST FSBALANC FDMEDYN
## 1 0 0 0 0 3 3 3 2
## 2 3 8 1 0 2 3 3 2
## 3 3 0 0 0 2 2 2 1
## 4 0 0 0 0 2 3 3 2
## 5 1 0 0 0 3 3 3 2
## 6 0 0 0 0 3 3 3 2
## FDMEDCT FNMEDYN FNMEDCT FHOSP2YN FHOSP2CT FHCHMYN FHCHMCT FHCPHRYN
## 1 0 2 0 1 1 2 0 2
## 2 0 1 2 1 1 2 0 1
## 3 1 2 0 1 2 2 0 2
## 4 0 2 0 2 0 2 0 2
## 5 0 2 0 1 1 2 0 2
## 6 0 2 0 2 0 2 0 2
## FHCPHRCT FHCDVYN FHCDVCT F10DVYN F10DVCT FHICOVYN FHICOVCT FHIPRVCT
## 1 0 2 0 2 0 1 3 1
## 2 1 1 2 2 0 1 12 3
## 3 0 1 2 2 0 1 3 3
## 4 0 2 0 2 0 1 5 3
## 5 0 2 0 2 0 1 4 3
## 6 0 1 1 2 0 1 4 4
## FHIEXCT FHISINCT FHICARCT FHICADCT FHICHPCT FHIMILCT FHIIHSCT FHIPUBCT
## 1 0 1 0 2 0 0 0 0
## 2 1 0 0 9 0 0 0 0
## 3 0 3 0 0 0 0 0 0
## 4 0 4 0 2 0 0 0 0
## 5 0 3 0 1 0 0 0 0
## 6 0 4 0 0 0 0 0 0
## FHIOGVCT FPRCOOH FHIEBCCT FHICOST FMEDBILL FMEDBPAY FSAF FHDSTCT
## 1 0 1 1 2 1 1 2 0
## 2 0 1 2 3 1 1 2 2
## 3 0 1 3 4 1 1 2 0
## 4 0 1 3 2 1 1 2 1
## 5 0 1 3 1 2 2 2 0
## 6 0 1 4 1 2 2 2 0
## FDGLWCT1 FDGLWCT2 FSALYN FSALCT FSEINCYN FSEINCCT FSSRRYN FSSRRCT
## 1 1 0 1 1 2 0 2 0
## 2 2 0 1 1 2 0 2 0
## 3 1 0 1 1 2 0 2 0
## 4 3 0 1 2 2 0 1 1
## 5 2 0 1 1 1 1 2 0
## 6 2 0 1 2 2 0 2 0
## FPENSYN FPENSCT FOPENSYN FOPENSCT FSSIYN FSSICT FTANFYN FTANFCT FOWBENYN
## 1 2 0 2 0 2 0 2 0 2
## 2 2 0 1 1 1 1 2 0 2
## 3 2 0 2 0 2 0 2 0 2
## 4 2 0 2 0 2 0 2 0 2
## 5 2 0 2 0 2 0 1 1 2
## 6 2 0 2 0 2 0 2 0 2
## FOWBENCT FINTR1YN FINTR1CT FDIVDYN FDIVDCT FCHSPYN FCHSPCT FINCOTYN
## 1 0 2 0 2 0 2 0 2
## 2 0 2 0 2 0 2 0 2
## 3 0 2 0 2 0 2 0 2
## 4 0 2 0 2 0 2 0 2
## 5 0 1 2 2 0 2 0 2
## 6 0 1 2 2 0 2 0 2
## FINCOTCT INCGRP4 INCGRP5 RAT_CAT4 RAT_CAT5 HOUSEOWN FSSAPLYN FSSAPLCT
## 1 0 2 2 8 8 2 2 0
## 2 0 2 2 3 3 1 1 1
## 3 0 4 3 13 13 1 2 0
## 4 0 3 2 7 7 1 1 1
## 5 0 5 4 14 14 1 1 1
## 6 0 5 4 14 14 1 2 0
## FSDAPLYN FSDAPLCT FSNAP FWICYN FWICCT
## 1 2 0 2 2 0
## 2 2 0 1 1 1
## 3 2 0 2 2 0
## 4 1 1 1 1 1
## 5 1 1 1 1 1
## 6 2 0 2 2 0
summary(food)
## WTFA_FAM FINT_M_P TELN_FLG CURWRKN TELCELN
## Min. : 74 Min. : 1.000 Min. :1.000 Min. :1 Min. :1
## 1st Qu.: 1258 1st Qu.: 3.000 1st Qu.:1.000 1st Qu.:1 1st Qu.:1
## Median : 2504 Median : 6.000 Median :1.000 Median :1 Median :1
## Mean : 2668 Mean : 6.396 Mean :1.161 Mean :1 Mean :1
## 3rd Qu.: 3886 3rd Qu.: 9.000 3rd Qu.:1.000 3rd Qu.:1 3rd Qu.:1
## Max. :18260 Max. :12.000 Max. :9.000 Max. :1 Max. :1
## WRKCELN PHONEUSE FLNGINTV FM_SIZE
## Min. : 1.000 Min. :1.000 Min. :1.000 Min. : 2.000
## 1st Qu.: 2.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.: 4.000
## Median : 2.000 Median :1.000 Median :1.000 Median : 4.000
## Mean : 2.625 Mean :1.592 Mean :1.066 Mean : 4.537
## 3rd Qu.: 3.000 3rd Qu.:2.000 3rd Qu.:1.000 3rd Qu.: 5.000
## Max. :99.000 Max. :3.000 Max. :4.000 Max. :12.000
## FM_KIDS FM_ELDR FM_TYPE FM_STRCP
## Min. :1.000 Min. :0.00000 Min. :3.000 Min. :31.00
## 1st Qu.:1.000 1st Qu.:0.00000 1st Qu.:4.000 1st Qu.:41.00
## Median :2.000 Median :0.00000 Median :4.000 Median :41.00
## Mean :2.166 Mean :0.09932 Mean :3.967 Mean :41.86
## 3rd Qu.:3.000 3rd Qu.:0.00000 3rd Qu.:4.000 3rd Qu.:44.00
## Max. :9.000 Max. :2.00000 Max. :4.000 Max. :99.00
## FM_STRP FM_EDUC1 FCHLMYN FCHLMCT
## Min. :31.00 Min. : 1.000 Min. :1.000 Min. :0.00000
## 1st Qu.:41.00 1st Qu.: 5.000 1st Qu.:2.000 1st Qu.:0.00000
## Median :41.00 Median : 8.000 Median :2.000 Median :0.00000
## Mean :41.91 Mean : 7.256 Mean :1.985 Mean :0.01541
## 3rd Qu.:44.00 3rd Qu.: 9.000 3rd Qu.:2.000 3rd Qu.:0.00000
## Max. :99.00 Max. :99.000 Max. :2.000 Max. :1.00000
## FSPEDYN FSPEDCT FLAADLYN FLAADLCT
## Min. :1.000 Min. :0.00000 Min. :1.00 Min. :0.00000
## 1st Qu.:2.000 1st Qu.:0.00000 1st Qu.:2.00 1st Qu.:0.00000
## Median :2.000 Median :0.00000 Median :2.00 Median :0.00000
## Mean :1.918 Mean :0.09418 Mean :1.98 Mean :0.02226
## 3rd Qu.:2.000 3rd Qu.:0.00000 3rd Qu.:2.00 3rd Qu.:0.00000
## Max. :2.000 Max. :4.00000 Max. :2.00 Max. :2.00000
## FLIADLYN FLIADLCT FWKLIMYN FWKLIMCT
## Min. :1.000 Min. :0.00000 Min. :1.000 Min. :0.00000
## 1st Qu.:2.000 1st Qu.:0.00000 1st Qu.:2.000 1st Qu.:0.00000
## Median :2.000 Median :0.00000 Median :2.000 Median :0.00000
## Mean :1.978 Mean :0.02397 Mean :1.926 Mean :0.08305
## 3rd Qu.:2.000 3rd Qu.:0.00000 3rd Qu.:2.000 3rd Qu.:0.00000
## Max. :2.000 Max. :3.00000 Max. :2.000 Max. :3.00000
## FWALKYN FWALKCT FREMEMYN FREMEMCT
## Min. :1.000 Min. :0.00000 Min. :1.000 Min. :0.00000
## 1st Qu.:2.000 1st Qu.:0.00000 1st Qu.:2.000 1st Qu.:0.00000
## Median :2.000 Median :0.00000 Median :2.000 Median :0.00000
## Mean :1.965 Mean :0.03682 Mean :1.984 Mean :0.02055
## 3rd Qu.:2.000 3rd Qu.:0.00000 3rd Qu.:2.000 3rd Qu.:0.00000
## Max. :2.000 Max. :2.00000 Max. :7.000 Max. :1.00000
## FANYLYN FANYLCT FHSTATEX FHSTATVG
## Min. :1.00 Min. :0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.:2.00 1st Qu.:0.000 1st Qu.: 0.000 1st Qu.: 0.000
## Median :2.00 Median :0.000 Median : 2.000 Median : 0.000
## Mean :1.83 Mean :0.214 Mean : 2.383 Mean : 1.307
## 3rd Qu.:2.00 3rd Qu.:0.000 3rd Qu.: 4.000 3rd Qu.: 2.000
## Max. :2.00 Max. :9.000 Max. :12.000 Max. :12.000
## FHSTATG FHSTATFR FHSTATPR FSRUNOUT
## Min. :0.0000 Min. :0.000 Min. :0.00000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.00000 1st Qu.:3.000
## Median :0.0000 Median :0.000 Median :0.00000 Median :3.000
## Mean :0.6918 Mean :0.131 Mean :0.02226 Mean :2.874
## 3rd Qu.:1.0000 3rd Qu.:0.000 3rd Qu.:0.00000 3rd Qu.:3.000
## Max. :8.0000 Max. :8.000 Max. :2.00000 Max. :3.000
## FSLAST FSBALANC FDMEDYN FDMEDCT
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :0.000
## 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:0.000
## Median :3.000 Median :3.000 Median :2.000 Median :0.000
## Mean :2.917 Mean :2.927 Mean :1.902 Mean :0.149
## 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:2.000 3rd Qu.:0.000
## Max. :3.000 Max. :3.000 Max. :2.000 Max. :7.000
## FNMEDYN FNMEDCT FHOSP2YN FHOSP2CT
## Min. :1.000 Min. :0.00000 Min. :1.000 Min. :0.000
## 1st Qu.:2.000 1st Qu.:0.00000 1st Qu.:1.000 1st Qu.:0.000
## Median :2.000 Median :0.00000 Median :2.000 Median :0.000
## Mean :1.935 Mean :0.08733 Mean :1.748 Mean :0.369
## 3rd Qu.:2.000 3rd Qu.:0.00000 3rd Qu.:2.000 3rd Qu.:1.000
## Max. :2.000 Max. :7.00000 Max. :2.000 Max. :4.000
## FHCHMYN FHCHMCT FHCPHRYN FHCPHRCT
## Min. :1.000 Min. :0.00000 Min. :1.000 Min. :0.0000
## 1st Qu.:2.000 1st Qu.:0.00000 1st Qu.:2.000 1st Qu.:0.0000
## Median :2.000 Median :0.00000 Median :2.000 Median :0.0000
## Mean :1.988 Mean :0.01284 Mean :1.905 Mean :0.1293
## 3rd Qu.:2.000 3rd Qu.:0.00000 3rd Qu.:2.000 3rd Qu.:0.0000
## Max. :2.000 Max. :2.00000 Max. :9.000 Max. :3.0000
## FHCDVYN FHCDVCT F10DVYN F10DVCT
## Min. :1.000 Min. :0.0000 Min. :1.000 Min. :0.0000
## 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:2.000 1st Qu.:0.0000
## Median :2.000 Median :0.0000 Median :2.000 Median :0.0000
## Mean :1.545 Mean :0.6027 Mean :1.781 Mean :0.2671
## 3rd Qu.:2.000 3rd Qu.:1.0000 3rd Qu.:2.000 3rd Qu.:0.0000
## Max. :2.000 Max. :5.0000 Max. :9.000 Max. :5.0000
## FHICOVYN FHICOVCT FHIPRVCT FHIEXCT
## Min. :1 Min. : 1.00 Min. : 1.000 Min. :0.00000
## 1st Qu.:1 1st Qu.: 3.00 1st Qu.: 3.000 1st Qu.:0.00000
## Median :1 Median : 4.00 Median : 4.000 Median :0.00000
## Mean :1 Mean : 4.36 Mean : 3.714 Mean :0.06764
## 3rd Qu.:1 3rd Qu.: 5.00 3rd Qu.: 5.000 3rd Qu.:0.00000
## Max. :1 Max. :12.00 Max. :10.000 Max. :6.00000
## FHISINCT FHICARCT FHICADCT FHICHPCT
## Min. : 0.000 Min. :0.00000 Min. : 0.0000 Min. :0.00000
## 1st Qu.: 0.000 1st Qu.:0.00000 1st Qu.: 0.0000 1st Qu.:0.00000
## Median : 2.000 Median :0.00000 Median : 0.0000 Median :0.00000
## Mean : 2.073 Mean :0.09247 Mean : 0.5334 Mean :0.07534
## 3rd Qu.: 4.000 3rd Qu.:0.00000 3rd Qu.: 0.0000 3rd Qu.:0.00000
## Max. :11.000 Max. :3.00000 Max. :10.0000 Max. :4.00000
## FHIMILCT FHIIHSCT FHIPUBCT FHIOGVCT
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.0000000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.0000000
## Mean :0.03425 Mean :0.01627 Mean :0.03596 Mean :0.0008562
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000000
## Max. :5.00000 Max. :4.00000 Max. :9.00000 Max. :1.0000000
## FPRCOOH FHIEBCCT FHICOST FMEDBILL
## Min. :1.000 Min. : 0.000 Min. :0.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.: 2.000 1st Qu.:1.000 1st Qu.:2.000
## Median :2.000 Median : 4.000 Median :2.000 Median :2.000
## Mean :1.969 Mean : 3.366 Mean :2.241 Mean :1.854
## 3rd Qu.:2.000 3rd Qu.: 4.000 3rd Qu.:3.000 3rd Qu.:2.000
## Max. :2.000 Max. :10.000 Max. :9.000 Max. :9.000
## FMEDBPAY FSAF FHDSTCT FDGLWCT1
## Min. :1.000 Min. :1.000 Min. :0.0000 Min. :0.000
## 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:0.0000 1st Qu.:1.000
## Median :2.000 Median :2.000 Median :0.0000 Median :2.000
## Mean :1.754 Mean :1.878 Mean :0.3005 Mean :1.783
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:0.0000 3rd Qu.:2.000
## Max. :9.000 Max. :9.000 Max. :7.0000 Max. :6.000
## FDGLWCT2 FSALYN FSALCT FSEINCYN
## Min. :0.00000 Min. :1.000 Min. :0.000 Min. :1.000
## 1st Qu.:0.00000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:2.000
## Median :0.00000 Median :1.000 Median :2.000 Median :2.000
## Mean :0.06164 Mean :1.139 Mean :1.741 Mean :1.943
## 3rd Qu.:0.00000 3rd Qu.:1.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :2.00000 Max. :9.000 Max. :6.000 Max. :9.000
## FSEINCCT FSSRRYN FSSRRCT FPENSYN
## Min. :0.0000 Min. :1.000 Min. :0.0000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:2.000 1st Qu.:0.0000 1st Qu.:2.000
## Median :0.0000 Median :2.000 Median :0.0000 Median :2.000
## Mean :0.1798 Mean :2.015 Mean :0.1045 Mean :2.077
## 3rd Qu.:0.0000 3rd Qu.:2.000 3rd Qu.:0.0000 3rd Qu.:2.000
## Max. :3.0000 Max. :9.000 Max. :3.0000 Max. :9.000
## FPENSCT FOPENSYN FOPENSCT FSSIYN
## Min. :0.00000 Min. :1.000 Min. :0.00000 Min. :1.000
## 1st Qu.:0.00000 1st Qu.:2.000 1st Qu.:0.00000 1st Qu.:2.000
## Median :0.00000 Median :2.000 Median :0.00000 Median :2.000
## Mean :0.02568 Mean :2.066 Mean :0.03853 Mean :2.077
## 3rd Qu.:0.00000 3rd Qu.:2.000 3rd Qu.:0.00000 3rd Qu.:2.000
## Max. :2.00000 Max. :9.000 Max. :2.00000 Max. :9.000
## FSSICT FTANFYN FTANFCT FOWBENYN
## Min. :0.00000 Min. :1.000 Min. :0.00000 Min. :1.000
## 1st Qu.:0.00000 1st Qu.:2.000 1st Qu.:0.00000 1st Qu.:2.000
## Median :0.00000 Median :2.000 Median :0.00000 Median :2.000
## Mean :0.03082 Mean :2.076 Mean :0.02911 Mean :2.082
## 3rd Qu.:0.00000 3rd Qu.:2.000 3rd Qu.:0.00000 3rd Qu.:2.000
## Max. :2.00000 Max. :9.000 Max. :4.00000 Max. :9.000
## FOWBENCT FINTR1YN FINTR1CT FDIVDYN
## Min. :0.00000 Min. :1.000 Min. :0.00 Min. :1.000
## 1st Qu.:0.00000 1st Qu.:2.000 1st Qu.:0.00 1st Qu.:2.000
## Median :0.00000 Median :2.000 Median :0.00 Median :2.000
## Mean :0.02825 Mean :1.957 Mean :0.47 Mean :2.021
## 3rd Qu.:0.00000 3rd Qu.:2.000 3rd Qu.:0.00 3rd Qu.:2.000
## Max. :4.00000 Max. :9.000 Max. :7.00 Max. :9.000
## FDIVDCT FCHSPYN FCHSPCT FINCOTYN
## Min. :0.0000 Min. :1.000 Min. :0.00000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:2.000 1st Qu.:0.00000 1st Qu.:2.000
## Median :0.0000 Median :2.000 Median :0.00000 Median :2.000
## Mean :0.2543 Mean :2.045 Mean :0.07705 Mean :2.048
## 3rd Qu.:0.0000 3rd Qu.:2.000 3rd Qu.:0.00000 3rd Qu.:2.000
## Max. :6.0000 Max. :9.000 Max. :3.00000 Max. :9.000
## FINCOTCT INCGRP4 INCGRP5 RAT_CAT4
## Min. :0.0000 Min. : 1.0 Min. : 1.00 Min. : 1.00
## 1st Qu.:0.0000 1st Qu.: 3.0 1st Qu.: 2.00 1st Qu.: 9.00
## Median :0.0000 Median : 5.0 Median : 4.00 Median :12.00
## Mean :0.0899 Mean :12.9 Mean :13.81 Mean :16.96
## 3rd Qu.:0.0000 3rd Qu.: 5.0 3rd Qu.: 4.00 3rd Qu.:14.00
## Max. :9.0000 Max. :99.0 Max. :99.00 Max. :99.00
## RAT_CAT5 HOUSEOWN FSSAPLYN FSSAPLCT
## Min. : 1.00 Min. :1.000 Min. :1.000 Min. :0.00000
## 1st Qu.: 9.00 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:0.00000
## Median :12.00 Median :1.000 Median :2.000 Median :0.00000
## Mean :18.29 Mean :1.364 Mean :2.058 Mean :0.05223
## 3rd Qu.:14.00 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:0.00000
## Max. :99.00 Max. :8.000 Max. :9.000 Max. :3.00000
## FSDAPLYN FSDAPLCT FSNAP FWICYN
## Min. :1.000 Min. :0.00000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:0.00000 1st Qu.:2.000 1st Qu.:2.000
## Median :2.000 Median :0.00000 Median :2.000 Median :2.000
## Mean :2.058 Mean :0.05908 Mean :1.988 Mean :1.942
## 3rd Qu.:2.000 3rd Qu.:0.00000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :9.000 Max. :2.00000 Max. :9.000 Max. :9.000
## FWICCT
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.2414
## 3rd Qu.:0.0000
## Max. :4.0000
Xfood <- model.matrix(FSRUNOUT ~.,data=food)[,-1]
Xfood[1:3,]
## WTFA_FAM FINT_M_P TELN_FLG CURWRKN TELCELN WRKCELN PHONEUSE FLNGINTV
## 1 1749 12 1 1 1 1 3 1
## 2 2407 8 1 1 1 2 3 1
## 3 18260 4 1 1 1 2 1 1
## FM_SIZE FM_KIDS FM_ELDR FM_TYPE FM_STRCP FM_STRP FM_EDUC1 FCHLMYN
## 1 3 1 0 4 41 42 8 2
## 2 12 8 1 4 45 45 5 1
## 3 3 1 0 4 41 41 6 2
## FCHLMCT FSPEDYN FSPEDCT FLAADLYN FLAADLCT FLIADLYN FLIADLCT FWKLIMYN
## 1 0 2 0 2 0 2 0 2
## 2 1 2 0 2 0 1 1 1
## 3 0 2 0 2 0 2 0 2
## FWKLIMCT FWALKYN FWALKCT FREMEMYN FREMEMCT FANYLYN FANYLCT FHSTATEX
## 1 0 2 0 2 0 2 0 3
## 2 1 1 1 2 0 1 2 0
## 3 0 2 0 2 0 2 0 0
## FHSTATVG FHSTATG FHSTATFR FHSTATPR FSLAST FSBALANC FDMEDYN FDMEDCT
## 1 0 0 0 0 3 3 2 0
## 2 3 8 1 0 3 3 2 0
## 3 3 0 0 0 2 2 1 1
## FNMEDYN FNMEDCT FHOSP2YN FHOSP2CT FHCHMYN FHCHMCT FHCPHRYN FHCPHRCT
## 1 2 0 1 1 2 0 2 0
## 2 1 2 1 1 2 0 1 1
## 3 2 0 1 2 2 0 2 0
## FHCDVYN FHCDVCT F10DVYN F10DVCT FHICOVYN FHICOVCT FHIPRVCT FHIEXCT
## 1 2 0 2 0 1 3 1 0
## 2 1 2 2 0 1 12 3 1
## 3 1 2 2 0 1 3 3 0
## FHISINCT FHICARCT FHICADCT FHICHPCT FHIMILCT FHIIHSCT FHIPUBCT FHIOGVCT
## 1 1 0 2 0 0 0 0 0
## 2 0 0 9 0 0 0 0 0
## 3 3 0 0 0 0 0 0 0
## FPRCOOH FHIEBCCT FHICOST FMEDBILL FMEDBPAY FSAF FHDSTCT FDGLWCT1
## 1 1 1 2 1 1 2 0 1
## 2 1 2 3 1 1 2 2 2
## 3 1 3 4 1 1 2 0 1
## FDGLWCT2 FSALYN FSALCT FSEINCYN FSEINCCT FSSRRYN FSSRRCT FPENSYN FPENSCT
## 1 0 1 1 2 0 2 0 2 0
## 2 0 1 1 2 0 2 0 2 0
## 3 0 1 1 2 0 2 0 2 0
## FOPENSYN FOPENSCT FSSIYN FSSICT FTANFYN FTANFCT FOWBENYN FOWBENCT
## 1 2 0 2 0 2 0 2 0
## 2 1 1 1 1 2 0 2 0
## 3 2 0 2 0 2 0 2 0
## FINTR1YN FINTR1CT FDIVDYN FDIVDCT FCHSPYN FCHSPCT FINCOTYN FINCOTCT
## 1 2 0 2 0 2 0 2 0
## 2 2 0 2 0 2 0 2 0
## 3 2 0 2 0 2 0 2 0
## INCGRP4 INCGRP5 RAT_CAT4 RAT_CAT5 HOUSEOWN FSSAPLYN FSSAPLCT FSDAPLYN
## 1 2 2 8 8 2 2 0 2
## 2 2 2 3 3 1 1 1 2
## 3 4 3 13 13 1 2 0 2
## FSDAPLCT FSNAP FWICYN FWICCT
## 1 0 2 2 0
## 2 0 1 1 1
## 3 0 2 2 0
set.seed(1)
train <- sample(1:128,100)
xtrain<-Xfood[train,]
xnew<-Xfood[-train,]
ytrain<- food$FSRUNOUT[train]
ynew<-food$FSRUNOUT[-train]
##foodglm <- glm(FSRUNOUT ~.,family=binomial,data=data.frame(FSRUNOUT=ytrain,xtrain))
##summary(foodglm)
library(caret)
training_data_subset<-data.frame(xtrain,Default=ytrain)
testing_data_subset<-data.frame(xnew,Default=ynew)
##AAAAAHHHHHH HEEEELLLLLPPPPPP - same as above issue?
##credit.model<- train(FSRUNOUT ~.,preProcess = "scale",data=training_data_subset, method="knn")