This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.
#Q 1a. Reading the Data into df dataframe
df.df <- read.csv('DefaultData.csv',header = T)
#Q 1b. Reading data into dt data table
require('data.table')
## Loading required package: data.table
dt <- data.table(df.df)
#Q 2. Dimensions of dataframe
dim(df.df)
## [1] 10000 4
#Q 3. Listing the column names of dataframe
colnames(df.df)
## [1] "default" "student" "balance" "income"
#Q 4. Attaching the dataframe
attach(df.df)
#Q 5a. listing the data structures of the columns in the dataframe “df”
str(df.df)
## 'data.frame': 10000 obs. of 4 variables:
## $ default: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ student: Factor w/ 2 levels "No","Yes": 1 2 1 1 1 2 1 2 1 1 ...
## $ balance: num 730 817 1074 529 786 ...
## $ income : num 44362 12106 31767 35704 38463 ...
#Q 5b. Listing The Data Structures Of The Columns in the data.table “dt”
str(dt)
## Classes 'data.table' and 'data.frame': 10000 obs. of 4 variables:
## $ default: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ student: Factor w/ 2 levels "No","Yes": 1 2 1 1 1 2 1 2 1 1 ...
## $ balance: num 730 817 1074 529 786 ...
## $ income : num 44362 12106 31767 35704 38463 ...
## - attr(*, ".internal.selfref")=<externalptr>
#Q 6. Counting the number of consumers defaulted on their loan
table(default)
## default
## No Yes
## 9667 333
#Q 7. Counting no Of consumers who defaulted on their loan by whether they are students or not
table(default,student)
## student
## default No Yes
## No 6850 2817
## Yes 206 127
#Q 8. Creating the complete contingency table of defaulters broken down by students
con_table <- table(default,student)
addmargins(con_table)
## student
## default No Yes Sum
## No 6850 2817 9667
## Yes 206 127 333
## Sum 7056 2944 10000
#Q 9. Calculating the percentage of Defaulters and non-Defaulters, rounded to 1 decimal place
round(prop.table(table(default))*100,1)
## default
## No Yes
## 96.7 3.3
#Q 10. Calculating Mean, Standard Deviation and Variance Of The Income
dt[, .(mean = mean(income), standard_deviation = sd(income), variance = var(income))]
## mean standard_deviation variance
## 1: 33516.98 13336.64 177865955
#Q 11. Calculating the Minimum And Maximum Income, rounding it to 2 decimal places
dt[, .(min = round(min(income),2), max = round(max(income,2)))]
## min max
## 1: 771.97 73554
#Q 12a. Printing the descriptive statistics
require('psych')
## Loading required package: psych
##
## Attaching package: 'psych'
## The following object is masked from 'df.df':
##
## income
describe(df.df)[,c(1:5,8:9)]
## vars n mean sd median min max
## default* 1 10000 1.03 0.18 1.00 1.00 2.00
## student* 2 10000 1.29 0.46 1.00 1.00 2.00
## balance 3 10000 835.37 483.71 823.64 0.00 2654.32
## income 4 10000 33516.98 13336.64 34552.64 771.97 73554.23
#Q 12b. In the above output, Interpret the meaning of the 1.29 written as the mean of the student column.
#Answer: 1.29 is the mean of student variable assigned to variables Yes and No.
#Q 13a. Calculating the average of balance, broken down by whether consumers defaulted on their loan
dt[, .(balance = mean(balance)), by = default]
## default balance
## 1: No 803.9438
## 2: Yes 1747.8217
#Q 13b. Creating a Histogram of balance
hist(balance, col = 'lightblue')
#Q 14. Breakdown of the mean and standard deviation of the balance, with respect to whether someone is a student and whether he or she has defaulted in payment
dt[, .(N = .N, MeanBalance = round(mean(balance),2), SDBalance = round(sd(balance),2)), by = .(default, student)]
## default student N MeanBalance SDBalance
## 1: No No 6850 744.50 445.52
## 2: No Yes 2817 948.48 450.55
## 3: Yes Yes 127 1860.38 328.74
## 4: Yes No 206 1678.43 330.91
#Q 15. Creating a Box-Plot for credit card balance
boxplot(balance,
horizontal = T,
col = 'lightblue',
main = "Boxplot for balance",xlab = 'balance')
#Q 16. Creating boxplots for credit card balance, broken down by whether a consumer is a student or not a student
boxplot(balance ~ student,
col = c('gray','lightblue'),
main = "Boxplot for balance, grouped by default (Yes/No)",xlab = "",ylab = "")