R Quiz for Batch 1 & 2 Answer the following question and publish in rpubs and share the link
Part 1
Q1) How many data structures does R language have?
Answer:________6_______________________________ # Vectors # Lists # Dataframes # Matrices # Arrays # Factors
Q2) What is the value of f(5) for the following R code?
b <- 4
f <- function (a)
{
b <- 3
b^3
}
Q3) Fix the error in the code?
printmessage <- function (a) {
if (is.na (a))
print ("a is a missing value!")
else if (a < 0){
print ("a is less than zero")
}
else{
print ("a is greater than or equal to zero")
}
}
printmessage (NA)
## [1] "a is a missing value!"
Q4) What is the difference between data frame and a matrix in R?
Answer: In a data frame the columns contain different types of data, but in a matrix all the elements are the same type of data.
Q5) Two vectors X and Y are defined as follows – X <- c(3, 2, 4) and Y <- c(1, 2). What will be output of vector Z that is defined as Z <- X*Y.
X <- c(3, 2, 4)
Y <- c(1, 2)
Z <- X*Y
## Warning in X * Y: longer object length is not a multiple of shorter object
## length
print(Z)
## [1] 3 4 4
Q6) Drop variables v2 & v3 from the below dataframe df<-data.frame(v1=c(1:5),v2=c(2:6),v3=c(3:7),v4=c(4:8))
Q7) What will be the output of the following R programming code?
x<-5
if(x%%2==0) {
print("X is an even number")
} else {
print("X is an odd number")
}
## [1] "X is an odd number"
Q8) I have a string “contact@boston.in”. Which string function can be used to split the string into two different strings “contact@boston” and “in”?
st <- "contact@boston.in"
strsplit(st,".",fixed = TRUE)
## [[1]]
## [1] "contact@boston" "in"
Q9) Write a R program to find the counts of uniques for the given vector
tt <- c("a", "b", "a", "a", "b", "c", "a1", "a1", "a1")
table(tt)
## tt
## a a1 b c
## 3 3 2 1
Q10) Write a R program to find the cumulative frequency for given vector?
Sales = c(10,2,40,13,34,12,35,67,12,56,14,56,134)
freq <- table(Sales)
cumsum(freq)
## 2 10 12 13 14 34 35 40 56 67 134
## 1 2 4 5 6 7 8 9 11 12 13
Part 2 For the given dataset carrying out following tasks Dataset: ##The dataset is given in the link 1) Find missing value in the dataset 2) Impute missing values in the dataset using missforest package 3) Find summary statistics using dplyr package 4) What is the avg amount spend by Male and Females? 5) Using GGplots library build 5 graphs?
data <- read.csv("C:\\Users\\kanch\\OneDrive\\Desktop\\R test\\german_credit_data_risk.csv")
summary(data)
## X Age Sex Job
## Min. : 0.0 Min. :19.00 Length:1000 Min. :0.000
## 1st Qu.:249.8 1st Qu.:27.00 Class :character 1st Qu.:2.000
## Median :499.5 Median :33.00 Mode :character Median :2.000
## Mean :499.5 Mean :35.55 Mean :1.904
## 3rd Qu.:749.2 3rd Qu.:42.00 3rd Qu.:2.000
## Max. :999.0 Max. :75.00 Max. :3.000
## Housing Saving.accounts Checking.account Credit.amount
## Length:1000 Length:1000 Length:1000 Min. : 250
## Class :character Class :character Class :character 1st Qu.: 1366
## Mode :character Mode :character Mode :character Median : 2320
## Mean : 3271
## 3rd Qu.: 3972
## Max. :18424
## Duration Purpose Risk
## Min. : 4.0 Length:1000 Length:1000
## 1st Qu.:12.0 Class :character Class :character
## Median :18.0 Mode :character Mode :character
## Mean :20.9
## 3rd Qu.:24.0
## Max. :72.0
colSums(is.na(data)) ## shows columnwise missing values
## X Age Sex Job
## 0 0 0 0
## Housing Saving.accounts Checking.account Credit.amount
## 0 183 394 0
## Duration Purpose Risk
## 0 0 0
sum(is.na(data)) ## shows the total missing values in the dataset
## [1] 577
library(missForest)
## Loading required package: randomForest
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
## Loading required package: foreach
## Loading required package: itertools
## Loading required package: iterators
Find summary statistics using dplyr package
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:randomForest':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data %>% group_by(Sex) %>%
summarise(Mean_age=mean(Credit.amount),StndDev=sd(Credit.amount),Max_credit=max(Credit.amount),Min_credit = min(Credit.amount),Quartile_1=quantile(Credit.amount,prob=c(0.25)),Median=quantile(Credit.amount,prob=c(0.50)),Quartile_3=quantile(Credit.amount,prob=c(0.75)))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 8
## Sex Mean_age StndDev Max_credit Min_credit Quartile_1 Median Quartile_3
## <chr> <dbl> <dbl> <int> <int> <dbl> <dbl> <dbl>
## 1 female 2878. 2603. 18424 250 1248. 1959 3606.
## 2 male 3448. 2900. 15945 276 1442. 2444. 4266.
data %>% group_by(Sex) %>%
summarise(Avg_cr_amt=mean(Credit.amount))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
## Sex Avg_cr_amt
## <chr> <dbl>
## 1 female 2878.
## 2 male 3448.
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
##
## margin
ggplot(data = data) +
geom_bar(mapping = aes(x = Sex))+ coord_flip()+ ggtitle("Sex vs Count :Bar Plot")
ggplot(data = data, aes(Age)) + geom_histogram(binwidth=4, colour="black", fill="green") +
labs(x= "Age",y= "Frequency" , title = "Plot of Age")
ggplot(data=data, aes(Housing) ) + geom_bar(aes(fill = as.factor(Housing))) +
scale_fill_discrete(name="Housing",
labels=c( "Free","Own", "Rent")) +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank()) +
labs(x= "Housing",y= "Frequency" , title = "Plot of Housing")
ggplot(data=data, aes(Saving.accounts) ) + geom_bar(aes(fill = as.factor(Saving.accounts))) +
scale_fill_discrete(name="Saving Accounts",
labels=c( "Little","Moderate", "Quite Rich", "Rich", "NA")) +
labs(x= "Saving Accounts",y= "Frequency" , title = "Plot of Saving Accounts")
ggplot(data=data, aes(Purpose) ) + geom_bar(aes(fill = as.factor(Purpose))) +
scale_fill_discrete(name="Purpose of Loan",
labels=c( "Business","Car", "Domestic Appliances","Education","Furniture/Equipment","Radio/TV","Repairs","Vacation/Others")) +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank()) +
labs(x= "Purpose of Loan",y= "Frequency" , title = "Plot of Loan Purpose")