##Trying to predict what neighborhood the house is in. 

library(car)
library(textir)
## Loading required package: distrom
## Loading required package: Matrix
## Loading required package: gamlr
## Loading required package: parallel
library(class)

houses <- read.csv("/Users/kimberlyhatlestad/Data Mining/HousePrices.csv")
head(houses)
##   HomeID  Price SqFt Bedrooms Bathrooms Offers Brick Neighborhood
## 1      1 114300 1790        2         2      2    No         East
## 2      2 114200 2030        4         2      3    No         East
## 3      3 114800 1740        3         2      1    No         East
## 4      4  94700 1980        3         2      3    No         East
## 5      5 119800 2130        3         3      3    No         East
## 6      6 114600 1780        3         2      2    No        North
plot(houses$Price~houses$Neighborhood)

library(class)

##length(class)
##set.seed(1)
##nearest1<- knn(train=x[train,],test=x[-train,],cl=houses$neighborhood[train],k=1)
##nearest5<- knn(train=x[train,],test=x[-train,],cl=houses$neighborhood[train],k=5)
##data.fram(houses$Neighborhood[-train],nearest1,nearest5)
summary(houses)
##      HomeID           Price             SqFt         Bedrooms    
##  Min.   :  1.00   Min.   : 69100   Min.   :1450   Min.   :2.000  
##  1st Qu.: 32.75   1st Qu.:111325   1st Qu.:1880   1st Qu.:3.000  
##  Median : 64.50   Median :125950   Median :2000   Median :3.000  
##  Mean   : 64.50   Mean   :130427   Mean   :2001   Mean   :3.023  
##  3rd Qu.: 96.25   3rd Qu.:148250   3rd Qu.:2140   3rd Qu.:3.000  
##  Max.   :128.00   Max.   :211200   Max.   :2590   Max.   :5.000  
##    Bathrooms         Offers      Brick    Neighborhood
##  Min.   :2.000   Min.   :1.000   No :86   East :45    
##  1st Qu.:2.000   1st Qu.:2.000   Yes:42   North:44    
##  Median :2.000   Median :3.000            West :39    
##  Mean   :2.445   Mean   :2.578                        
##  3rd Qu.:3.000   3rd Qu.:3.000                        
##  Max.   :4.000   Max.   :6.000
Xcred <- model.matrix(Neighborhood~.,data=houses)[,-1]
Xcred[1:3,]
##   HomeID  Price SqFt Bedrooms Bathrooms Offers BrickYes
## 1      1 114300 1790        2         2      2        0
## 2      2 114200 2030        4         2      3        0
## 3      3 114800 1740        3         2      1        0
##set.seed(1)
##train <- sample(1:1000,900)
##xtrain <- Xcred[train,]
##xnew <- Xcred[-train,]
##ytrain <- credit$Default[train]
##ynew <- credit$Default[-train]
##credglm=glm(Default~.,family=binomial,data=data.frame(Default=ytrain,xtrain))
##summary(credglm)
set.seed(1)
train <- sample(1:128,100)
xtrain<-Xcred[train,]
xnew<-Xcred[-train,]
ytrain<- houses$Neighborhood[train]
ynew<-houses$Neighborhood[-train]
housesglm <- glm(Neighborhood~.,family=binomial,data=data.frame(Neighborhood=ytrain,xtrain))
summary(housesglm)
## 
## Call:
## glm(formula = Neighborhood ~ ., family = binomial, data = data.frame(Neighborhood = ytrain, 
##     xtrain))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.7511  -0.7087   0.4374   0.6775   2.1025  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  6.033e+00  2.567e+00   2.350 0.018754 *  
## HomeID       1.099e-02  7.639e-03   1.439 0.150242    
## Price        1.158e-04  2.754e-05   4.206 2.60e-05 ***
## SqFt        -9.665e-03  2.577e-03  -3.750 0.000177 ***
## Bedrooms    -6.131e-01  4.855e-01  -1.263 0.206696    
## Bathrooms   -9.907e-01  6.608e-01  -1.499 0.133785    
## Offers       1.509e+00  4.446e-01   3.394 0.000689 ***
## BrickYes    -3.108e+00  7.915e-01  -3.926 8.63e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 123.82  on 99  degrees of freedom
## Residual deviance:  91.17  on 92  degrees of freedom
## AIC: 107.17
## 
## Number of Fisher Scoring iterations: 5
houses <- houses[-1]
head(houses)
##    Price SqFt Bedrooms Bathrooms Offers Brick Neighborhood
## 1 114300 1790        2         2      2    No         East
## 2 114200 2030        4         2      3    No         East
## 3 114800 1740        3         2      1    No         East
## 4  94700 1980        3         2      3    No         East
## 5 119800 2130        3         3      3    No         East
## 6 114600 1780        3         2      2    No        North
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
training_data_subset<-data.frame(xtrain,Default=ytrain)
testing_data_subset<-data.frame(xnew,Default=ynew)
##AAAAAHHHHHH HEEEELLLLLPPPPPP, I'm copying what we did in class but I don't know why it's not working. 
##houses.model<- train(Neighborhood ~.,preProcess = "scale",data=training_data_subset, method="knn")

##houses.model

##houses.prediction <- predict(houses.model,newdata=testing_data_subset, type="raw")

##confusionMatrix(houses.prediction,testing_data_subset[['Default']])

The second dataset is attached. It is a significantly abbreviated version of the NHIS data from Chopped, with some variables and most of the cases taken out. KNN can be difficult with masses of missing data, so this set should have values in place for all cases and variables. Try to predict FSRUNOUT groups using a SUBSET of the variables available. Check to make sure there is some variation in each variable that you select for your desired subset.

Due Monday night at midnight on RPubs.

##Trying to predict FSRUNOUT using a SUBSET OF VARIABLES AVAILABLE

food <- read.csv("/Users/kimberlyhatlestad/Data Mining/ReducedFoodInsec.csv")
head(food)
##   WTFA_FAM FINT_M_P TELN_FLG CURWRKN TELCELN WRKCELN PHONEUSE FLNGINTV
## 1     1749       12        1       1       1       1        3        1
## 2     2407        8        1       1       1       2        3        1
## 3    18260        4        1       1       1       2        1        1
## 4     1145       11        1       1       1       5        1        1
## 5     5571        2        1       1       1       4        1        1
## 6      233        6        1       1       1       3        1        1
##   FM_SIZE FM_KIDS FM_ELDR FM_TYPE FM_STRCP FM_STRP FM_EDUC1 FCHLMYN
## 1       3       1       0       4       41      42        8       2
## 2      12       8       1       4       45      45        5       1
## 3       3       1       0       4       41      41        6       2
## 4       6       2       0       4       44      44        8       2
## 5       4       1       0       4       44      44        8       2
## 6       4       2       0       4       42      43        6       2
##   FCHLMCT FSPEDYN FSPEDCT FLAADLYN FLAADLCT FLIADLYN FLIADLCT FWKLIMYN
## 1       0       2       0        2        0        2        0        2
## 2       1       2       0        2        0        1        1        1
## 3       0       2       0        2        0        2        0        2
## 4       0       2       0        2        0        1        1        1
## 5       0       2       0        2        0        2        0        1
## 6       0       1       1        2        0        2        0        2
##   FWKLIMCT FWALKYN FWALKCT FREMEMYN FREMEMCT FANYLYN FANYLCT FHSTATEX
## 1        0       2       0        2        0       2       0        3
## 2        1       1       1        2        0       1       2        0
## 3        0       2       0        2        0       2       0        0
## 4        1       2       0        2        0       1       1        6
## 5        1       2       0        2        0       1       1        3
## 6        0       2       0        2        0       1       1        4
##   FHSTATVG FHSTATG FHSTATFR FHSTATPR FSRUNOUT FSLAST FSBALANC FDMEDYN
## 1        0       0        0        0        3      3        3       2
## 2        3       8        1        0        2      3        3       2
## 3        3       0        0        0        2      2        2       1
## 4        0       0        0        0        2      3        3       2
## 5        1       0        0        0        3      3        3       2
## 6        0       0        0        0        3      3        3       2
##   FDMEDCT FNMEDYN FNMEDCT FHOSP2YN FHOSP2CT FHCHMYN FHCHMCT FHCPHRYN
## 1       0       2       0        1        1       2       0        2
## 2       0       1       2        1        1       2       0        1
## 3       1       2       0        1        2       2       0        2
## 4       0       2       0        2        0       2       0        2
## 5       0       2       0        1        1       2       0        2
## 6       0       2       0        2        0       2       0        2
##   FHCPHRCT FHCDVYN FHCDVCT F10DVYN F10DVCT FHICOVYN FHICOVCT FHIPRVCT
## 1        0       2       0       2       0        1        3        1
## 2        1       1       2       2       0        1       12        3
## 3        0       1       2       2       0        1        3        3
## 4        0       2       0       2       0        1        5        3
## 5        0       2       0       2       0        1        4        3
## 6        0       1       1       2       0        1        4        4
##   FHIEXCT FHISINCT FHICARCT FHICADCT FHICHPCT FHIMILCT FHIIHSCT FHIPUBCT
## 1       0        1        0        2        0        0        0        0
## 2       1        0        0        9        0        0        0        0
## 3       0        3        0        0        0        0        0        0
## 4       0        4        0        2        0        0        0        0
## 5       0        3        0        1        0        0        0        0
## 6       0        4        0        0        0        0        0        0
##   FHIOGVCT FPRCOOH FHIEBCCT FHICOST FMEDBILL FMEDBPAY FSAF FHDSTCT
## 1        0       1        1       2        1        1    2       0
## 2        0       1        2       3        1        1    2       2
## 3        0       1        3       4        1        1    2       0
## 4        0       1        3       2        1        1    2       1
## 5        0       1        3       1        2        2    2       0
## 6        0       1        4       1        2        2    2       0
##   FDGLWCT1 FDGLWCT2 FSALYN FSALCT FSEINCYN FSEINCCT FSSRRYN FSSRRCT
## 1        1        0      1      1        2        0       2       0
## 2        2        0      1      1        2        0       2       0
## 3        1        0      1      1        2        0       2       0
## 4        3        0      1      2        2        0       1       1
## 5        2        0      1      1        1        1       2       0
## 6        2        0      1      2        2        0       2       0
##   FPENSYN FPENSCT FOPENSYN FOPENSCT FSSIYN FSSICT FTANFYN FTANFCT FOWBENYN
## 1       2       0        2        0      2      0       2       0        2
## 2       2       0        1        1      1      1       2       0        2
## 3       2       0        2        0      2      0       2       0        2
## 4       2       0        2        0      2      0       2       0        2
## 5       2       0        2        0      2      0       1       1        2
## 6       2       0        2        0      2      0       2       0        2
##   FOWBENCT FINTR1YN FINTR1CT FDIVDYN FDIVDCT FCHSPYN FCHSPCT FINCOTYN
## 1        0        2        0       2       0       2       0        2
## 2        0        2        0       2       0       2       0        2
## 3        0        2        0       2       0       2       0        2
## 4        0        2        0       2       0       2       0        2
## 5        0        1        2       2       0       2       0        2
## 6        0        1        2       2       0       2       0        2
##   FINCOTCT INCGRP4 INCGRP5 RAT_CAT4 RAT_CAT5 HOUSEOWN FSSAPLYN FSSAPLCT
## 1        0       2       2        8        8        2        2        0
## 2        0       2       2        3        3        1        1        1
## 3        0       4       3       13       13        1        2        0
## 4        0       3       2        7        7        1        1        1
## 5        0       5       4       14       14        1        1        1
## 6        0       5       4       14       14        1        2        0
##   FSDAPLYN FSDAPLCT FSNAP FWICYN FWICCT
## 1        2        0     2      2      0
## 2        2        0     1      1      1
## 3        2        0     2      2      0
## 4        1        1     1      1      1
## 5        1        1     1      1      1
## 6        2        0     2      2      0
summary(food)
##     WTFA_FAM        FINT_M_P         TELN_FLG        CURWRKN     TELCELN 
##  Min.   :   74   Min.   : 1.000   Min.   :1.000   Min.   :1   Min.   :1  
##  1st Qu.: 1258   1st Qu.: 3.000   1st Qu.:1.000   1st Qu.:1   1st Qu.:1  
##  Median : 2504   Median : 6.000   Median :1.000   Median :1   Median :1  
##  Mean   : 2668   Mean   : 6.396   Mean   :1.161   Mean   :1   Mean   :1  
##  3rd Qu.: 3886   3rd Qu.: 9.000   3rd Qu.:1.000   3rd Qu.:1   3rd Qu.:1  
##  Max.   :18260   Max.   :12.000   Max.   :9.000   Max.   :1   Max.   :1  
##     WRKCELN          PHONEUSE        FLNGINTV        FM_SIZE      
##  Min.   : 1.000   Min.   :1.000   Min.   :1.000   Min.   : 2.000  
##  1st Qu.: 2.000   1st Qu.:1.000   1st Qu.:1.000   1st Qu.: 4.000  
##  Median : 2.000   Median :1.000   Median :1.000   Median : 4.000  
##  Mean   : 2.625   Mean   :1.592   Mean   :1.066   Mean   : 4.537  
##  3rd Qu.: 3.000   3rd Qu.:2.000   3rd Qu.:1.000   3rd Qu.: 5.000  
##  Max.   :99.000   Max.   :3.000   Max.   :4.000   Max.   :12.000  
##     FM_KIDS         FM_ELDR           FM_TYPE         FM_STRCP    
##  Min.   :1.000   Min.   :0.00000   Min.   :3.000   Min.   :31.00  
##  1st Qu.:1.000   1st Qu.:0.00000   1st Qu.:4.000   1st Qu.:41.00  
##  Median :2.000   Median :0.00000   Median :4.000   Median :41.00  
##  Mean   :2.166   Mean   :0.09932   Mean   :3.967   Mean   :41.86  
##  3rd Qu.:3.000   3rd Qu.:0.00000   3rd Qu.:4.000   3rd Qu.:44.00  
##  Max.   :9.000   Max.   :2.00000   Max.   :4.000   Max.   :99.00  
##     FM_STRP         FM_EDUC1         FCHLMYN         FCHLMCT       
##  Min.   :31.00   Min.   : 1.000   Min.   :1.000   Min.   :0.00000  
##  1st Qu.:41.00   1st Qu.: 5.000   1st Qu.:2.000   1st Qu.:0.00000  
##  Median :41.00   Median : 8.000   Median :2.000   Median :0.00000  
##  Mean   :41.91   Mean   : 7.256   Mean   :1.985   Mean   :0.01541  
##  3rd Qu.:44.00   3rd Qu.: 9.000   3rd Qu.:2.000   3rd Qu.:0.00000  
##  Max.   :99.00   Max.   :99.000   Max.   :2.000   Max.   :1.00000  
##     FSPEDYN         FSPEDCT           FLAADLYN       FLAADLCT      
##  Min.   :1.000   Min.   :0.00000   Min.   :1.00   Min.   :0.00000  
##  1st Qu.:2.000   1st Qu.:0.00000   1st Qu.:2.00   1st Qu.:0.00000  
##  Median :2.000   Median :0.00000   Median :2.00   Median :0.00000  
##  Mean   :1.918   Mean   :0.09418   Mean   :1.98   Mean   :0.02226  
##  3rd Qu.:2.000   3rd Qu.:0.00000   3rd Qu.:2.00   3rd Qu.:0.00000  
##  Max.   :2.000   Max.   :4.00000   Max.   :2.00   Max.   :2.00000  
##     FLIADLYN        FLIADLCT          FWKLIMYN        FWKLIMCT      
##  Min.   :1.000   Min.   :0.00000   Min.   :1.000   Min.   :0.00000  
##  1st Qu.:2.000   1st Qu.:0.00000   1st Qu.:2.000   1st Qu.:0.00000  
##  Median :2.000   Median :0.00000   Median :2.000   Median :0.00000  
##  Mean   :1.978   Mean   :0.02397   Mean   :1.926   Mean   :0.08305  
##  3rd Qu.:2.000   3rd Qu.:0.00000   3rd Qu.:2.000   3rd Qu.:0.00000  
##  Max.   :2.000   Max.   :3.00000   Max.   :2.000   Max.   :3.00000  
##     FWALKYN         FWALKCT           FREMEMYN        FREMEMCT      
##  Min.   :1.000   Min.   :0.00000   Min.   :1.000   Min.   :0.00000  
##  1st Qu.:2.000   1st Qu.:0.00000   1st Qu.:2.000   1st Qu.:0.00000  
##  Median :2.000   Median :0.00000   Median :2.000   Median :0.00000  
##  Mean   :1.965   Mean   :0.03682   Mean   :1.984   Mean   :0.02055  
##  3rd Qu.:2.000   3rd Qu.:0.00000   3rd Qu.:2.000   3rd Qu.:0.00000  
##  Max.   :2.000   Max.   :2.00000   Max.   :7.000   Max.   :1.00000  
##     FANYLYN        FANYLCT         FHSTATEX         FHSTATVG     
##  Min.   :1.00   Min.   :0.000   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.:2.00   1st Qu.:0.000   1st Qu.: 0.000   1st Qu.: 0.000  
##  Median :2.00   Median :0.000   Median : 2.000   Median : 0.000  
##  Mean   :1.83   Mean   :0.214   Mean   : 2.383   Mean   : 1.307  
##  3rd Qu.:2.00   3rd Qu.:0.000   3rd Qu.: 4.000   3rd Qu.: 2.000  
##  Max.   :2.00   Max.   :9.000   Max.   :12.000   Max.   :12.000  
##     FHSTATG          FHSTATFR        FHSTATPR          FSRUNOUT    
##  Min.   :0.0000   Min.   :0.000   Min.   :0.00000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.00000   1st Qu.:3.000  
##  Median :0.0000   Median :0.000   Median :0.00000   Median :3.000  
##  Mean   :0.6918   Mean   :0.131   Mean   :0.02226   Mean   :2.874  
##  3rd Qu.:1.0000   3rd Qu.:0.000   3rd Qu.:0.00000   3rd Qu.:3.000  
##  Max.   :8.0000   Max.   :8.000   Max.   :2.00000   Max.   :3.000  
##      FSLAST         FSBALANC        FDMEDYN         FDMEDCT     
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :0.000  
##  1st Qu.:3.000   1st Qu.:3.000   1st Qu.:2.000   1st Qu.:0.000  
##  Median :3.000   Median :3.000   Median :2.000   Median :0.000  
##  Mean   :2.917   Mean   :2.927   Mean   :1.902   Mean   :0.149  
##  3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:2.000   3rd Qu.:0.000  
##  Max.   :3.000   Max.   :3.000   Max.   :2.000   Max.   :7.000  
##     FNMEDYN         FNMEDCT           FHOSP2YN        FHOSP2CT    
##  Min.   :1.000   Min.   :0.00000   Min.   :1.000   Min.   :0.000  
##  1st Qu.:2.000   1st Qu.:0.00000   1st Qu.:1.000   1st Qu.:0.000  
##  Median :2.000   Median :0.00000   Median :2.000   Median :0.000  
##  Mean   :1.935   Mean   :0.08733   Mean   :1.748   Mean   :0.369  
##  3rd Qu.:2.000   3rd Qu.:0.00000   3rd Qu.:2.000   3rd Qu.:1.000  
##  Max.   :2.000   Max.   :7.00000   Max.   :2.000   Max.   :4.000  
##     FHCHMYN         FHCHMCT           FHCPHRYN        FHCPHRCT     
##  Min.   :1.000   Min.   :0.00000   Min.   :1.000   Min.   :0.0000  
##  1st Qu.:2.000   1st Qu.:0.00000   1st Qu.:2.000   1st Qu.:0.0000  
##  Median :2.000   Median :0.00000   Median :2.000   Median :0.0000  
##  Mean   :1.988   Mean   :0.01284   Mean   :1.905   Mean   :0.1293  
##  3rd Qu.:2.000   3rd Qu.:0.00000   3rd Qu.:2.000   3rd Qu.:0.0000  
##  Max.   :2.000   Max.   :2.00000   Max.   :9.000   Max.   :3.0000  
##     FHCDVYN         FHCDVCT          F10DVYN         F10DVCT      
##  Min.   :1.000   Min.   :0.0000   Min.   :1.000   Min.   :0.0000  
##  1st Qu.:1.000   1st Qu.:0.0000   1st Qu.:2.000   1st Qu.:0.0000  
##  Median :2.000   Median :0.0000   Median :2.000   Median :0.0000  
##  Mean   :1.545   Mean   :0.6027   Mean   :1.781   Mean   :0.2671  
##  3rd Qu.:2.000   3rd Qu.:1.0000   3rd Qu.:2.000   3rd Qu.:0.0000  
##  Max.   :2.000   Max.   :5.0000   Max.   :9.000   Max.   :5.0000  
##     FHICOVYN    FHICOVCT        FHIPRVCT         FHIEXCT       
##  Min.   :1   Min.   : 1.00   Min.   : 1.000   Min.   :0.00000  
##  1st Qu.:1   1st Qu.: 3.00   1st Qu.: 3.000   1st Qu.:0.00000  
##  Median :1   Median : 4.00   Median : 4.000   Median :0.00000  
##  Mean   :1   Mean   : 4.36   Mean   : 3.714   Mean   :0.06764  
##  3rd Qu.:1   3rd Qu.: 5.00   3rd Qu.: 5.000   3rd Qu.:0.00000  
##  Max.   :1   Max.   :12.00   Max.   :10.000   Max.   :6.00000  
##     FHISINCT         FHICARCT          FHICADCT          FHICHPCT      
##  Min.   : 0.000   Min.   :0.00000   Min.   : 0.0000   Min.   :0.00000  
##  1st Qu.: 0.000   1st Qu.:0.00000   1st Qu.: 0.0000   1st Qu.:0.00000  
##  Median : 2.000   Median :0.00000   Median : 0.0000   Median :0.00000  
##  Mean   : 2.073   Mean   :0.09247   Mean   : 0.5334   Mean   :0.07534  
##  3rd Qu.: 4.000   3rd Qu.:0.00000   3rd Qu.: 0.0000   3rd Qu.:0.00000  
##  Max.   :11.000   Max.   :3.00000   Max.   :10.0000   Max.   :4.00000  
##     FHIMILCT          FHIIHSCT          FHIPUBCT          FHIOGVCT        
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.0000000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.0000000  
##  Mean   :0.03425   Mean   :0.01627   Mean   :0.03596   Mean   :0.0008562  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.0000000  
##  Max.   :5.00000   Max.   :4.00000   Max.   :9.00000   Max.   :1.0000000  
##     FPRCOOH         FHIEBCCT         FHICOST         FMEDBILL    
##  Min.   :1.000   Min.   : 0.000   Min.   :0.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.: 2.000   1st Qu.:1.000   1st Qu.:2.000  
##  Median :2.000   Median : 4.000   Median :2.000   Median :2.000  
##  Mean   :1.969   Mean   : 3.366   Mean   :2.241   Mean   :1.854  
##  3rd Qu.:2.000   3rd Qu.: 4.000   3rd Qu.:3.000   3rd Qu.:2.000  
##  Max.   :2.000   Max.   :10.000   Max.   :9.000   Max.   :9.000  
##     FMEDBPAY          FSAF          FHDSTCT          FDGLWCT1    
##  Min.   :1.000   Min.   :1.000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:1.000   1st Qu.:2.000   1st Qu.:0.0000   1st Qu.:1.000  
##  Median :2.000   Median :2.000   Median :0.0000   Median :2.000  
##  Mean   :1.754   Mean   :1.878   Mean   :0.3005   Mean   :1.783  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:0.0000   3rd Qu.:2.000  
##  Max.   :9.000   Max.   :9.000   Max.   :7.0000   Max.   :6.000  
##     FDGLWCT2           FSALYN          FSALCT         FSEINCYN    
##  Min.   :0.00000   Min.   :1.000   Min.   :0.000   Min.   :1.000  
##  1st Qu.:0.00000   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:2.000  
##  Median :0.00000   Median :1.000   Median :2.000   Median :2.000  
##  Mean   :0.06164   Mean   :1.139   Mean   :1.741   Mean   :1.943  
##  3rd Qu.:0.00000   3rd Qu.:1.000   3rd Qu.:2.000   3rd Qu.:2.000  
##  Max.   :2.00000   Max.   :9.000   Max.   :6.000   Max.   :9.000  
##     FSEINCCT         FSSRRYN         FSSRRCT          FPENSYN     
##  Min.   :0.0000   Min.   :1.000   Min.   :0.0000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:2.000   1st Qu.:0.0000   1st Qu.:2.000  
##  Median :0.0000   Median :2.000   Median :0.0000   Median :2.000  
##  Mean   :0.1798   Mean   :2.015   Mean   :0.1045   Mean   :2.077  
##  3rd Qu.:0.0000   3rd Qu.:2.000   3rd Qu.:0.0000   3rd Qu.:2.000  
##  Max.   :3.0000   Max.   :9.000   Max.   :3.0000   Max.   :9.000  
##     FPENSCT           FOPENSYN        FOPENSCT           FSSIYN     
##  Min.   :0.00000   Min.   :1.000   Min.   :0.00000   Min.   :1.000  
##  1st Qu.:0.00000   1st Qu.:2.000   1st Qu.:0.00000   1st Qu.:2.000  
##  Median :0.00000   Median :2.000   Median :0.00000   Median :2.000  
##  Mean   :0.02568   Mean   :2.066   Mean   :0.03853   Mean   :2.077  
##  3rd Qu.:0.00000   3rd Qu.:2.000   3rd Qu.:0.00000   3rd Qu.:2.000  
##  Max.   :2.00000   Max.   :9.000   Max.   :2.00000   Max.   :9.000  
##      FSSICT           FTANFYN         FTANFCT           FOWBENYN    
##  Min.   :0.00000   Min.   :1.000   Min.   :0.00000   Min.   :1.000  
##  1st Qu.:0.00000   1st Qu.:2.000   1st Qu.:0.00000   1st Qu.:2.000  
##  Median :0.00000   Median :2.000   Median :0.00000   Median :2.000  
##  Mean   :0.03082   Mean   :2.076   Mean   :0.02911   Mean   :2.082  
##  3rd Qu.:0.00000   3rd Qu.:2.000   3rd Qu.:0.00000   3rd Qu.:2.000  
##  Max.   :2.00000   Max.   :9.000   Max.   :4.00000   Max.   :9.000  
##     FOWBENCT          FINTR1YN        FINTR1CT       FDIVDYN     
##  Min.   :0.00000   Min.   :1.000   Min.   :0.00   Min.   :1.000  
##  1st Qu.:0.00000   1st Qu.:2.000   1st Qu.:0.00   1st Qu.:2.000  
##  Median :0.00000   Median :2.000   Median :0.00   Median :2.000  
##  Mean   :0.02825   Mean   :1.957   Mean   :0.47   Mean   :2.021  
##  3rd Qu.:0.00000   3rd Qu.:2.000   3rd Qu.:0.00   3rd Qu.:2.000  
##  Max.   :4.00000   Max.   :9.000   Max.   :7.00   Max.   :9.000  
##     FDIVDCT          FCHSPYN         FCHSPCT           FINCOTYN    
##  Min.   :0.0000   Min.   :1.000   Min.   :0.00000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:2.000   1st Qu.:0.00000   1st Qu.:2.000  
##  Median :0.0000   Median :2.000   Median :0.00000   Median :2.000  
##  Mean   :0.2543   Mean   :2.045   Mean   :0.07705   Mean   :2.048  
##  3rd Qu.:0.0000   3rd Qu.:2.000   3rd Qu.:0.00000   3rd Qu.:2.000  
##  Max.   :6.0000   Max.   :9.000   Max.   :3.00000   Max.   :9.000  
##     FINCOTCT         INCGRP4        INCGRP5         RAT_CAT4    
##  Min.   :0.0000   Min.   : 1.0   Min.   : 1.00   Min.   : 1.00  
##  1st Qu.:0.0000   1st Qu.: 3.0   1st Qu.: 2.00   1st Qu.: 9.00  
##  Median :0.0000   Median : 5.0   Median : 4.00   Median :12.00  
##  Mean   :0.0899   Mean   :12.9   Mean   :13.81   Mean   :16.96  
##  3rd Qu.:0.0000   3rd Qu.: 5.0   3rd Qu.: 4.00   3rd Qu.:14.00  
##  Max.   :9.0000   Max.   :99.0   Max.   :99.00   Max.   :99.00  
##     RAT_CAT5        HOUSEOWN        FSSAPLYN        FSSAPLCT      
##  Min.   : 1.00   Min.   :1.000   Min.   :1.000   Min.   :0.00000  
##  1st Qu.: 9.00   1st Qu.:1.000   1st Qu.:2.000   1st Qu.:0.00000  
##  Median :12.00   Median :1.000   Median :2.000   Median :0.00000  
##  Mean   :18.29   Mean   :1.364   Mean   :2.058   Mean   :0.05223  
##  3rd Qu.:14.00   3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:0.00000  
##  Max.   :99.00   Max.   :8.000   Max.   :9.000   Max.   :3.00000  
##     FSDAPLYN        FSDAPLCT           FSNAP           FWICYN     
##  Min.   :1.000   Min.   :0.00000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:0.00000   1st Qu.:2.000   1st Qu.:2.000  
##  Median :2.000   Median :0.00000   Median :2.000   Median :2.000  
##  Mean   :2.058   Mean   :0.05908   Mean   :1.988   Mean   :1.942  
##  3rd Qu.:2.000   3rd Qu.:0.00000   3rd Qu.:2.000   3rd Qu.:2.000  
##  Max.   :9.000   Max.   :2.00000   Max.   :9.000   Max.   :9.000  
##      FWICCT      
##  Min.   :0.0000  
##  1st Qu.:0.0000  
##  Median :0.0000  
##  Mean   :0.2414  
##  3rd Qu.:0.0000  
##  Max.   :4.0000
Xfood <- model.matrix(FSRUNOUT ~.,data=food)[,-1]
Xfood[1:3,]
##   WTFA_FAM FINT_M_P TELN_FLG CURWRKN TELCELN WRKCELN PHONEUSE FLNGINTV
## 1     1749       12        1       1       1       1        3        1
## 2     2407        8        1       1       1       2        3        1
## 3    18260        4        1       1       1       2        1        1
##   FM_SIZE FM_KIDS FM_ELDR FM_TYPE FM_STRCP FM_STRP FM_EDUC1 FCHLMYN
## 1       3       1       0       4       41      42        8       2
## 2      12       8       1       4       45      45        5       1
## 3       3       1       0       4       41      41        6       2
##   FCHLMCT FSPEDYN FSPEDCT FLAADLYN FLAADLCT FLIADLYN FLIADLCT FWKLIMYN
## 1       0       2       0        2        0        2        0        2
## 2       1       2       0        2        0        1        1        1
## 3       0       2       0        2        0        2        0        2
##   FWKLIMCT FWALKYN FWALKCT FREMEMYN FREMEMCT FANYLYN FANYLCT FHSTATEX
## 1        0       2       0        2        0       2       0        3
## 2        1       1       1        2        0       1       2        0
## 3        0       2       0        2        0       2       0        0
##   FHSTATVG FHSTATG FHSTATFR FHSTATPR FSLAST FSBALANC FDMEDYN FDMEDCT
## 1        0       0        0        0      3        3       2       0
## 2        3       8        1        0      3        3       2       0
## 3        3       0        0        0      2        2       1       1
##   FNMEDYN FNMEDCT FHOSP2YN FHOSP2CT FHCHMYN FHCHMCT FHCPHRYN FHCPHRCT
## 1       2       0        1        1       2       0        2        0
## 2       1       2        1        1       2       0        1        1
## 3       2       0        1        2       2       0        2        0
##   FHCDVYN FHCDVCT F10DVYN F10DVCT FHICOVYN FHICOVCT FHIPRVCT FHIEXCT
## 1       2       0       2       0        1        3        1       0
## 2       1       2       2       0        1       12        3       1
## 3       1       2       2       0        1        3        3       0
##   FHISINCT FHICARCT FHICADCT FHICHPCT FHIMILCT FHIIHSCT FHIPUBCT FHIOGVCT
## 1        1        0        2        0        0        0        0        0
## 2        0        0        9        0        0        0        0        0
## 3        3        0        0        0        0        0        0        0
##   FPRCOOH FHIEBCCT FHICOST FMEDBILL FMEDBPAY FSAF FHDSTCT FDGLWCT1
## 1       1        1       2        1        1    2       0        1
## 2       1        2       3        1        1    2       2        2
## 3       1        3       4        1        1    2       0        1
##   FDGLWCT2 FSALYN FSALCT FSEINCYN FSEINCCT FSSRRYN FSSRRCT FPENSYN FPENSCT
## 1        0      1      1        2        0       2       0       2       0
## 2        0      1      1        2        0       2       0       2       0
## 3        0      1      1        2        0       2       0       2       0
##   FOPENSYN FOPENSCT FSSIYN FSSICT FTANFYN FTANFCT FOWBENYN FOWBENCT
## 1        2        0      2      0       2       0        2        0
## 2        1        1      1      1       2       0        2        0
## 3        2        0      2      0       2       0        2        0
##   FINTR1YN FINTR1CT FDIVDYN FDIVDCT FCHSPYN FCHSPCT FINCOTYN FINCOTCT
## 1        2        0       2       0       2       0        2        0
## 2        2        0       2       0       2       0        2        0
## 3        2        0       2       0       2       0        2        0
##   INCGRP4 INCGRP5 RAT_CAT4 RAT_CAT5 HOUSEOWN FSSAPLYN FSSAPLCT FSDAPLYN
## 1       2       2        8        8        2        2        0        2
## 2       2       2        3        3        1        1        1        2
## 3       4       3       13       13        1        2        0        2
##   FSDAPLCT FSNAP FWICYN FWICCT
## 1        0     2      2      0
## 2        0     1      1      1
## 3        0     2      2      0
set.seed(1)
train <- sample(1:128,100)
xtrain<-Xfood[train,]
xnew<-Xfood[-train,]
ytrain<- food$FSRUNOUT[train]
ynew<-food$FSRUNOUT[-train]
##foodglm <- glm(FSRUNOUT ~.,family=binomial,data=data.frame(FSRUNOUT=ytrain,xtrain))
##summary(foodglm)

library(caret)
training_data_subset<-data.frame(xtrain,Default=ytrain)
testing_data_subset<-data.frame(xnew,Default=ynew)
##AAAAAHHHHHH HEEEELLLLLPPPPPP - same as above issue?
##credit.model<- train(FSRUNOUT ~.,preProcess = "scale",data=training_data_subset, method="knn")

This is what I copied from your code when I was in your office, so I’m not sure where I’m going wrong.

training data

training_data_subset<-data.frame(xtrain,VARIABLE=ytrain)

testing data

testing_data_subset<-data.frame(xnew,VARIABLE=ynew)

train(variable~., preProcess = “scale”, data=training_data_subset,method=“knn”)