rankfreq <- function(data, column, min=1) {
df = as.data.frame(sort(table(data[, column]), decreasing = T))
#df = df[df$Freq >= min, ]
df$Rank = seq(length(df$Freq))
plot(df$Rank, df$Freq, log='xy', type='b', main=column)
df
}
contfreq <- function(data, column, breaks, flag){
if(flag == 1){
h = hist(data[,column], breaks = breaks, plot = F)
data2 = NULL
h1 = cut(h$breaks,length(h$breaks)-1)
data2$breaks = h1[1:length(h1)-1]
data2$counts = h$counts
h = hist(data[,column], breaks = breaks)
as.data.frame(data2)
}
else {
h = hist(data[,column], breaks = breaks, plot = F)
data2 = NULL
h1 = cut(h$breaks,length(h$breaks)-1)
data2$breaks = h1[1:length(h1)-1]
data2$counts = h$counts
plot(seq(1,length(data2$breaks)),data2$counts,log = 'y',type='h')
as.data.frame(data2)
}
}
ordfreq = function(data, column, levels) {
plot(factor(data[, column], levels=levels), main=column)
as.data.frame(table(data[, column]))
}
rr rankfreq <- function(data, column, min=1) { df = as.data.frame(sort(table(data[, column]), decreasing = T)) #df = df[df$Freq >= min, ] df\(Rank = seq(length(df\)Freq)) plot(df\(Rank, df\)Freq, log=‘xy’, type=‘b’, main=column) df }
contfreq <- function(data, column, breaks, flag){ if(flag == 1){ h = hist(data[,column], breaks = breaks, plot = F) data2 = NULL h1 = cut(h\(breaks,length(h\)breaks)-1) data2\(breaks = h1[1:length(h1)-1] data2\)counts = h\(counts h = hist(data[,column], breaks = breaks) as.data.frame(data2) } else { h = hist(data[,column], breaks = breaks, plot = F) data2 = NULL h1 = cut(h\)breaks,length(h\(breaks)-1) data2\)breaks = h1[1:length(h1)-1] data2\(counts = h\)counts plot(seq(1,length(data2\(breaks)),data2\)counts,log = ‘y’,type=‘h’) as.data.frame(data2) } } ordfreq = function(data, column, levels) { plot(factor(data[, column], levels=levels), main=column) as.data.frame(table(data[, column])) }
contfreq(data,'Serial.Number',breaks = 'FD',flag = 1)
contfreq(data,'Serial.Number',breaks = 'FD',flag = 0)
as.data.frame(table(data[,1],useNA="always"))
rankfreq(data,'Serial.Number')
rankfreq(data,'Name.of.the.Candidate')
rr rankfreq(data,‘Name.of.the.Candidate’)
rankfreq(data,'Postal.Address')
as.data.frame(table(data[,4]))
rr rankfreq(data,‘Postal.Address’) as.data.frame(table(data[,4]))
rankfreq(data,'Date.of.Birth')
rr rankfreq(data,‘Date.of.Birth’)
# contfreq(data,'Work.Experience..Years.',breaks = 'FD',flag = 1)
# as.data.frame(table(data[,9]))
ordfreq(data,'Work.Experience..Years.')
# contfreq(data,'Work.Experience..Months.',breaks = 'FD',flag = 1)
ordfreq(data,'Work.Experience..Months.')
contfreq(data,'Work.Experience',breaks = 200,flag = 1)
contfreq(data,'Work.Experience',breaks = 'FD',flag = 0)
7 y values <= 0 omitted from logarithmic plot
ordfreq(data,'Work.Experience')
rankfreq(data,'Work.Experience')
rankfreq(data,'Current.Location')
p<-rankfreq(data,'Preferred.Location')
p
# p<-as.data.frame(table(data[,14]))
rr rankfreq(data,‘Current.Location’) p<-rankfreq(data,‘Preferred.Location’) p # p<-as.data.frame(table(data[,14]))
rankfreq(data,'Currency.type')
x<-as.data.frame(as.numeric(as.character(data$Annually.salary.in.lacs.)))
NAs introduced by coercion
q<-as.data.frame(table(data[,18]))
# rankfreq(data,'Annually.salary.in.lacs.')
contfreq(x,'as.numeric(as.character(data$Annually.salary.in.lacs.))',breaks = 200,flag = 1)
contfreq(x,'as.numeric(as.character(data$Annually.salary.in.lacs.))',breaks = 200,flag = 0)
138 y values <= 0 omitted from logarithmic plot
q
rankfreq(data,'U.G..Course')
rankfreq(data,'P.G.Course')
rankfreq(data,'Post.P..G..Course')
rankfreq(data,'Last.Active.Date')