Load Data Frame from website

require(data.table)
## Loading required package: data.table
theUrl <- "http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
rm(df_census)
## Warning in rm(df_census): object 'df_census' not found
rm(df_census.select)
## Warning in rm(df_census.select): object 'df_census.select' not found
rm(dt_census)
## Warning in rm(dt_census): object 'dt_census' not found
rm(dt_select)
## Warning in rm(dt_select): object 'dt_select' not found
df_census <- read.table(file = theUrl, header = TRUE, sep = ",")
## Decode column labels and Load as data.table
setnames(
  df_census, 
  old=c(
        "X39",
        "State.gov",
        "X77516",
        "Bachelors",
        "X13",
        "Never.married",
        "Adm.clerical",
        "Not.in.family",
        "White",
        "Male",
        "X2174",
        "X0", 
        "X40",
        "United.States",
        "X..50K"
  ), 
  new=c(
        "age",
        "workclass",
        "fnlwgt",
        "education",
        "doctorate",
        "marital-status",
        "occupation",
        "relationship",
        "race",
        "sex",
        "capital-gain",
        "capital-loss",
        "hours-per-week",
        "native-country",
        "socio-status"
  )
)
id <- rownames(df_census)
df_census <- cbind(id=id, df_census)
hist(df_census$age)

df_census.select <- subset(df_census, age <= 65)
dt_census <- data.table(df_census.select)
setkey(dt_census,"id")

Load a result set data.table with a subset of interesting columns Filter on “capgain” census to look at filtered set of data

dt_select <- dt_census[`capital-gain` > 0, .(id, age, workclass, occupation,sex, education,`hours-per-week`,`socio-status`)]
str(dt_select)
## Classes 'data.table' and 'data.frame':   2511 obs. of  8 variables:
##  $ id            : Factor w/ 32560 levels "1","10","100",..: 41 54 92 95 114 128 129 131 137 140 ...
##  $ age           : int  28 20 54 34 44 29 18 43 50 50 ...
##  $ workclass     : Factor w/ 9 levels " ?"," Federal-gov",..: 5 5 7 5 5 5 5 8 5 3 ...
##  $ occupation    : Factor w/ 15 levels " ?"," Adm-clerical",..: 13 9 4 11 5 4 13 11 8 11 ...
##  $ sex           : Factor w/ 2 levels " Female"," Male": 2 2 2 1 2 2 1 2 2 1 ...
##  $ education     : Factor w/ 16 levels " 10th"," 11th",..: 12 16 12 10 10 2 12 11 12 13 ...
##  $ hours-per-week: int  40 25 52 36 60 50 30 64 40 40 ...
##  $ socio-status  : Factor w/ 2 levels " <=50K"," >50K": 2 1 2 2 2 1 1 2 2 2 ...
##  - attr(*, ".internal.selfref")=<externalptr> 
##  - attr(*, "sorted")= chr "id"