getwd()
setwd(“C:/XLRI/R data set”) getwd() data<- read.csv(“DATASET.csv”)
data<- read.csv(“C:/XLRI/R data set/DATASET.csv”)
data <- read.csv(file.choose()) View(data)
head(data) tail(data) str(data) summary((data)) #################################
is.na(data) # cal. sum of columns colSums(is.na(data))
hist(dataAge)hist(dataSCR) summary(dataAge)summary(dataSCR)
dataGender<−factor(dataGender) str(data)
mean(dataAge,na.rm=TRUE)mean(dataSCR, na.rm = TRUE)
dataAge[is.na(dataAge)]
dataAge[is.na(dataAge)] <- mean(data$Age, na.rm = TRUE)
dataSCR[is.na(dataSCR)] <- mean(data$SCR, na.rm = TRUE)
is.na(data) colSums(is.na(data)) ###########################################################
library(ggplot2)
max(data$Age)
ggplot(data, aes(Age, fill =“blue”)) + geom_histogram()
ggplot(data, aes(Age, fill = Gender)) + geom_histogram()
ggplot(data, aes(Age)) + geom_histogram() + facet_grid(Gender~.) # based on Occupation ggplot(data, aes(Age)) + geom_histogram() + facet_grid(Occupation~.)
ggplot(data, aes(Age)) + geom_histogram() + facet_grid(Occupation~.) + coord_cartesian(ylim = c(0,500)) ##########################################################
ggplot(data, aes(Age)) + geom_histogram() + facet_grid(Occupation~.) + coord_cartesian(ylim = c(0,500)) + xlab(“Age of Customers”) + ylab(“No. of Customers”) ####################################################
ggplot(data, aes(Occupation, fill = Gender)) + geom_bar() ###########################################################
ggplot(data, aes(Occupation, SCR)) + geom_boxplot()
ggplot(data, aes(Gender, SCR)) + geom_boxplot()
ggplot(data, aes(Occupation, SCR, fill = Gender)) + geom_boxplot() #####################################################################