Reading Data

Sameer Mathur

Reading, Processing and Describing Data

Using data.table()

---

READING DATA

Method 1: Importing Data from CSV using read.csv

csv.df <- read.csv(paste("DefaultData.csv", sep=""))

head(csv.df)
  default student   balance    income
1      No      No  729.5265 44361.625
2      No     Yes  817.1804 12106.135
3      No      No 1073.5492 31767.139
4      No      No  529.2506 35704.494
5      No      No  785.6559 38463.496
6      No     Yes  919.5885  7491.559

Method 2: Importing Data from CSV using data.table::fread

library(data.table)
dt <- fread("DefaultData.csv")
head(dt)
   default student   balance    income
1:      No      No  729.5265 44361.625
2:      No     Yes  817.1804 12106.135
3:      No      No 1073.5492 31767.139
4:      No      No  529.2506 35704.494
5:      No      No  785.6559 38463.496
6:      No     Yes  919.5885  7491.559

Method 3: Importing Data inbuilt within a package

The package ISLR has an inbuilt dataset called Default concerning credit card default among consumers

library(ISLR)
data(Default)
head(Default)
  default student   balance    income
1      No      No  729.5265 44361.625
2      No     Yes  817.1804 12106.135
3      No      No 1073.5492 31767.139
4      No      No  529.2506 35704.494
5      No      No  785.6559 38463.496
6      No     Yes  919.5885  7491.559

# Method 4: Importing Inbuilt Data as a Data.Table

library(ISLR)
library(data.table)
# reading inbuilt data as data table
default.dt <- data.table(Default)

# Converting data.table to dataframe

library(ISLR)
# reading inbuilt data as data frame
default.df <- as.data.frame(default.dt)

Number of Rows and Columns

# dimentions of the data table
dim(default.dt)
[1] 10000     4

Data Structure

str(default.dt)
Classes 'data.table' and 'data.frame':  10000 obs. of  4 variables:
 $ default: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
 $ student: Factor w/ 2 levels "No","Yes": 1 2 1 1 1 2 1 2 1 1 ...
 $ balance: num  730 817 1074 529 786 ...
 $ income : num  44362 12106 31767 35704 38463 ...
 - attr(*, ".internal.selfref")=<externalptr> 

Name of Data Columns

# name of data columns
colnames(default.dt)
[1] "default" "student" "balance" "income" 

First Few Rows of Data Table

# first few rows
head(default.dt)
   default student   balance    income
1:      No      No  729.5265 44361.625
2:      No     Yes  817.1804 12106.135
3:      No      No 1073.5492 31767.139
4:      No      No  529.2506 35704.494
5:      No      No  785.6559 38463.496
6:      No     Yes  919.5885  7491.559

Last Few Rows of Data Table

# last few rows
tail(default.dt)
   default student   balance   income
1:      No     Yes  172.4130 14955.94
2:      No      No  711.5550 52992.38
3:      No      No  757.9629 19660.72
4:      No      No  845.4120 58636.16
5:      No      No 1569.0091 36669.11
6:      No     Yes  200.9222 16862.95

Random Few Rows of Data Table

# random few rows
library(car)
some(default.dt)
    default student   balance   income
 1:      No      No  748.9371 44186.62
 2:      No     Yes  953.6263 18363.07
 3:      No      No  396.5136 41969.75
 4:      No      No  832.6460 25106.37
 5:      No     Yes  700.3352 15905.21
 6:      No      No 1943.9323 24193.61
 7:      No      No    0.0000 43033.49
 8:      No     Yes  584.5762 16639.66
 9:      No      No  752.7394 38671.79
10:      No     Yes  536.0102 23587.49

Data Structure

# attaching data columns
attach(default.dt)
# data types of the data coumns
str(default.dt)
Classes 'data.table' and 'data.frame':  10000 obs. of  4 variables:
 $ default: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
 $ student: Factor w/ 2 levels "No","Yes": 1 2 1 1 1 2 1 2 1 1 ...
 $ balance: num  730 817 1074 529 786 ...
 $ income : num  44362 12106 31767 35704 38463 ...
 - attr(*, ".internal.selfref")=<externalptr>