Initialization

Set up libraries and read in the data.

library(foreign)
library(MASS)
library(fitdistrplus)

## Loading required package: survival Loading required package: splines

library(lattice)
library(poweRlaw)
library(truncnorm)
options(warn = -1)

setwd("~/Dropbox/wrk/promotion")
spssdata <- data.frame(read.spss("EE Promo Leader Ratings.sav", use.value.labels = TRUE))

Take an initial look as how the data were read in.

str(spssdata)

## 'data.frame':    2625 obs. of  88 variables:
##  $ ID      : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ OFFICE  : Factor w/ 298 levels "       ","A      ",..: 245 294 4 261 1 239 237 248 237 97 ...
##  $ OFFICE2 : Factor w/ 13 levels " ","A","B","C",..: 11 13 2 11 1 11 11 11 11 8 ...
##  $ JOBCODE : Factor w/ 396 levels "      ","0001R1",..: 286 22 109 257 1 195 199 202 199 290 ...
##  $ WR      : Factor w/ 160 levels "    ","0001",..: 104 9 39 93 1 68 69 70 69 105 ...
##  $ BAND    : num  2 5 4 2 NA 3 4 3 4 4 ...
##  $ SUPLEV  : Factor w/ 16 levels "        ","0       ",..: 10 16 10 10 1 10 10 10 10 10 ...
##  $ SUPLEV2 : Factor w/ 2 levels "Non-Supervisor",..: 1 2 1 1 1 1 1 1 1 1 ...
##  $ SUPLEV3 : Factor w/ 2 levels "Non Super","Team Lead, Super, or Chief": 1 2 1 1 1 1 1 1 1 1 ...
##  $ WRTITLE : Factor w/ 232 levels "                                ",..: 146 25 31 77 1 133 23 59 23 47 ...
##  $ CITY    : Factor w/ 56 levels "                       ",..: 55 6 44 7 1 6 6 48 6 6 ...
##  $ STATE   : Factor w/ 27 levels "   ","AUS","AZ ",..: 7 17 27 7 1 17 17 18 17 17 ...
##  $ ETHNIC  : Factor w/ 8 levels " ","A","B","C",..: 4 6 6 6 1 6 6 6 6 4 ...
##  $ ETHNIC2 : Factor w/ 2 levels "Black","White": 1 2 2 2 NA 2 2 2 2 1 ...
##  $ SEX     : Factor w/ 3 levels " ","F","M": 2 3 3 3 1 3 2 3 3 3 ...
##  $ SEX2    : Factor w/ 2 levels "Female","Male": 1 2 2 2 NA 2 1 2 2 2 ...
##  $ BIRTH   : num  1.15e+10 1.17e+10 1.22e+10 1.19e+10 NA ...
##  $ RETIRE  : num  1.32e+10 1.35e+10 1.40e+10 1.37e+10 NA ...
##  $ S_CM    : Factor w/ 2 levels "   ","CIV": 2 2 2 2 1 2 2 2 2 2 ...
##  $ SID     : Factor w/ 380 levels "    ","S100",..: 97 218 45 99 1 19 355 228 355 313 ...
##  $ SID2    : num  198 317 141 20 NA 116 73 327 73 412 ...
##  $ SSTATE  : Factor w/ 14 levels "   ","AZ ","CA ",..: 4 9 14 4 1 8 8 9 8 8 ...
##  $ SCITY   : Factor w/ 27 levels "                      ",..: 26 23 21 5 1 4 4 23 4 4 ...
##  $ SETHNIC : Factor w/ 7 levels " ","A","B","C",..: 6 6 6 6 1 6 6 6 6 6 ...
##  $ SETHNIC2: Factor w/ 2 levels "Black","White": 2 2 2 2 NA 2 2 2 2 2 ...
##  $ SSEX    : Factor w/ 3 levels " ","F","M": 3 2 2 3 1 3 3 3 3 2 ...
##  $ SSEX2   : Factor w/ 2 levels "Female","Male": 2 1 1 2 NA 2 2 2 2 1 ...
##  $ SBDATE  : num  1.21e+10 1.17e+10 1.18e+10 1.19e+10 NA ...
##  $ SRETIRE : num  1.38e+10 1.35e+10 1.35e+10 1.37e+10 NA ...
##  $ R_CM    : Factor w/ 3 levels "   ","CIV","MIL": 2 2 2 3 1 2 2 2 2 2 ...
##  $ RID     : Factor w/ 174 levels "    ","R1  ",..: 144 150 28 20 1 112 90 42 90 140 ...
##  $ PROMO   : Factor w/ 5 levels "               ",..: 2 4 2 2 1 2 4 4 3 2 ...
##  $ PROMO2  : Factor w/ 4 levels "EE Declines",..: 2 3 2 2 NA 2 3 3 1 2 ...
##  $ PROMO3  : Factor w/ 2 levels "Decline or Do Not Rec",..: 1 2 1 1 NA 1 2 2 1 1 ...
##  $ PROMO4  : Factor w/ 2 levels "Recommend","Strongly Recommend": NA 1 NA NA NA NA 1 1 NA NA ...
##  $ ACCOUNT : num  0 0 0 0 NA 0 0 0 0 0 ...
##  $ CONFLICT: num  0 0 1 0 NA 0 0 0 0 0 ...
##  $ CONTINU : num  0 0 0 0 NA 4 0 0 0 0 ...
##  $ CREATIV : num  0 0 0 0 NA 0 0 0 0 0 ...
##  $ CULTURAL: num  0 0 0 0 NA 0 0 0 0 0 ...
##  $ CUSTOMER: num  0 0 0 0 NA 0 1 0 0 0 ...
##  $ DECISIVE: num  0 0 0 0 NA 0 0 0 0 0 ...
##  $ ENTREPRE: num  0 0 1 0 NA 0 0 0 0 0 ...
##  $ EXTERNAL: num  0 0 0 0 NA 0 0 0 0 0 ...
##  $ FINANCIA: num  0 0 1 1 NA 1 0 0 0 0 ...
##  $ FLEXIBIL: num  0 0 1 1 NA 0 0 0 0 0 ...
##  $ HUMAN_RE: num  0 0 0 0 NA 2 1 0 0 0 ...
##  $ INFLUENC: num  0 0 1 0 NA 0 1 0 0 0 ...
##  $ INTEGRIT: num  0 0 0 0 NA 0 1 0 0 0 ...
##  $ INTERPER: num  0 0 0 0 NA 0 0 0 0 0 ...
##  $ ORAL_COM: num  0 0 0 0 NA 0 0 0 0 0 ...
##  $ PARTNERI: num  0 0 0 0 NA 0 0 0 0 0 ...
##  $ POLITICA: num  0 0 0 0 NA 0 0 0 0 0 ...
##  $ PROBLEM : num  0 0 0 0 NA 0 0 0 0 0 ...
##  $ RESILIEN: num  0 0 0 0 NA 0 0 0 0 0 ...
##  $ SERVICE : num  0 0 1 0 NA 3 0 0 0 0 ...
##  $ STRATEGI: num  0 0 0 0 NA 0 0 0 0 0 ...
##  $ TEAM_BUI: num  0 0 2 0 NA 1 0 0 0 0 ...
##  $ TECH_CR : num  0 0 1 0 NA 2 1 0 0 0 ...
##  $ TECH_MGT: num  0 0 0 0 NA 0 0 0 0 0 ...
##  $ VISION  : num  0 0 0 0 NA 0 0 0 0 0 ...
##  $ WRITTEN : num  0 0 0 0 NA 0 0 0 0 0 ...
##  $ LEAD_CH : num  0 0 2 1 NA 7 0 0 0 0 ...
##  $ LEAD_P  : num  0 0 3 0 NA 1 1 0 0 0 ...
##  $ RESULTS : num  0 0 2 0 NA 2 2 0 0 0 ...
##  $ BUSINESS: num  0 0 1 1 NA 3 1 0 0 0 ...
##  $ BUILDING: num  0 0 1 0 NA 0 1 0 0 0 ...
##  $ PEOPLE  : num  0 0 4 0 NA 1 2 0 0 0 ...
##  $ TASK    : num  0 0 3 1 NA 5 3 0 0 0 ...
##  $ TASK2   : num  0 0 3 1 NA 5 2 0 0 0 ...
##  $ PEOPLE2 : num  0 0 4 0 NA 1 3 0 0 0 ...
##  $ P_LD_CH : num  NA 0 0.0328 0.0714 NA ...
##  $ P_LD_P  : num  NA 0 0.0492 0 NA ...
##  $ P_RESULT: num  NA 0 0.0328 0 NA ...
##  $ P_BUSIN : num  NA 0 0.0164 0.0714 NA ...
##  $ P_BUILD : num  NA 0 0.0164 0 NA ...
##  $ WRDCOUNT: num  0 3 61 14 90 116 48 9 12 25 ...
##  $ LEADERSH: num  0 0 9 2 NA 13 5 0 0 0 ...
##  $ ETH_SETH: num  0 1 1 1 NA 1 1 1 1 0 ...
##  $ SEX_SEX2: num  0 0 0 1 NA 1 0 1 1 0 ...
##  $ FILTER_.: Factor w/ 2 levels "Not Selected",..: 1 2 1 1 1 1 1 1 1 1 ...
##  $ O_A     : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ O_F     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ O_H     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ O_I     : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ O_O     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ O_P     : num  1 0 0 1 0 1 1 1 1 0 ...
##  $ O_S     : num  0 0 0 0 0 0 0 0 0 0 ...

Plots

Graphical displays of the data seem to be a helpful start to figuring out what is going on with the data.

Distribution of Recommendations

First, how are the promotion recommendations distributed?

promo.count <- table(spssdata$PROMO2)
## windows()
barplot(promo.count, ylim = c(0, max(promo.count + 75)))
text(seq(0.75, 4.5, by = 1.2), promo.count + 50, promo.count)

plot of chunk unnamed-chunk-3

Total Wordcount

There is an interesting comparison to be made of the overall distribution of word counts, which I think is The total number of words supervisors used in their descriptions of the person they are recommending for promotion, and the four conditional distributions.

hist(spssdata$WRDCOUNT)

plot of chunk unnamed-chunk-4

histogram(~WRDCOUNT | PROMO2, spssdata)

plot of chunk unnamed-chunk-4

Number of Leadership Words

histogram(~LEADERSH, spssdata, subset = !is.na(LEADERSH))

plot of chunk unnamed-chunk-5

histogram(~LEADERSH | PROMO2, spssdata, subset = !is.na(LEADERSH))

plot of chunk unnamed-chunk-5

Cullen and Frey Graphs

Overall

l.dat <- spssdata$LEADERSH[!is.na(spssdata$LEADERSH)]
descdist(l.dat, boot = 1000)

## summary statistics
## ------
## min:  0   max:  68 
## median:  6 
## mean:  8.74 
## estimated sd:  9.557 
## estimated skewness:  1.08 
## estimated kurtosis:  4.081

plot of chunk unnamed-chunk-6

## Fit normal distribution and plot fit
fn <- fitdist(l.dat, "norm", method = "mle")
plot(fn)

plot of chunk unnamed-chunk-7

summary(fn)

## Fitting of the distribution ' norm ' by maximum likelihood 
## Parameters : 
##      estimate Std. Error
## mean    8.740     0.1959
## sd      9.555     0.1385
## Loglikelihood:  -8741   AIC:  17487   BIC:  17499 
## Correlation matrix:
##           mean        sd
## mean 1.000e+00 6.172e-09
## sd   6.172e-09 1.000e+00

EE Declines

If the EE Declines, there is essentially no distribution, so it doesn't make send to try the graph.

with(subset(spssdata, PROMO2 == "EE Declines"), table(LEADERSH, useNA = "ifany"))

## LEADERSH
##   0 
## 437

Do Not Rec

If they are not recommended, then it seems more like an exponential or power law distribution.

l.dat <- with(spssdata, LEADERSH[!is.na(LEADERSH) & PROMO2 == "Do Not Rec"])
descdist(l.dat, boot = 1000)

## summary statistics
## ------
## min:  0   max:  40 
## median:  3 
## mean:  6.062 
## estimated sd:  7.5 
## estimated skewness:  1.366 
## estimated kurtosis:  4.274

$plot of chunk unnamed-chunk-9$

## Fit normal distribution and plot fit
fn <- fitdist(l.dat, "norm", method = "mle")
plot(fn)

plot of chunk unnamed-chunk-10

summary(fn)

## Fitting of the distribution ' norm ' by maximum likelihood 
## Parameters : 
##      estimate Std. Error
## mean    6.062     0.2555
## sd      7.496     0.1806
## Loglikelihood:  -2956   AIC:  5916   BIC:  5926 
## Correlation matrix:
##      mean sd
## mean    1  0
## sd      0  1

The plot does not seem to agree with my eyes, however.

Recommend

Those who are recommended look to my eye as if they are normally distributed but truncated. I am not sure how to test for that although we could either find something or make something up.

l.dat <- with(spssdata, LEADERSH[!is.na(LEADERSH) & PROMO2 == "Recommend"])
descdist(l.dat, boot = 1000)

## summary statistics
## ------
## min:  0   max:  39 
## median:  12 
## mean:  12.57 
## estimated sd:  8.67 
## estimated skewness:  0.5539 
## estimated kurtosis:  2.719

plot of chunk unnamed-chunk-11

## Fit normal distribution and plot fit
fn <- fitdist(l.dat, "norm", method = "mle")
plot(fn)

plot of chunk unnamed-chunk-12

summary(fn)

## Fitting of the distribution ' norm ' by maximum likelihood 
## Parameters : 
##      estimate Std. Error
## mean   12.572     0.3245
## sd      8.664     0.2294
## Loglikelihood:  -2551   AIC:  5106   BIC:  5115 
## Correlation matrix:
##      mean sd
## mean    1  0
## sd      0  1

The figure looks like it is consistent with this hypothesis. If you truncate a normal distribution, you might lower kurtosis and increase skew.

Strongly Recommend

Those who are recommended look normal.

l.dat <- with(spssdata, LEADERSH[!is.na(LEADERSH) & PROMO2 == "Strongly Rec"])
descdist(l.dat, boot = 1000)

## summary statistics
## ------
## min:  0   max:  68 
## median:  18 
## mean:  17.98 
## estimated sd:  9.612 
## estimated skewness:  0.9349 
## estimated kurtosis:  6.142

plot of chunk unnamed-chunk-13

## Fit normal distribution and plot fit
fn <- fitdist(l.dat, "norm", method = "mle")
plot(fn)

plot of chunk unnamed-chunk-14

summary(fn)

## Fitting of the distribution ' norm ' by maximum likelihood 
## Parameters : 
##      estimate Std. Error
## mean   17.984     0.5011
## sd      9.599     0.3543
## Loglikelihood:  -1351   AIC:  2706   BIC:  2713 
## Correlation matrix:
##      mean sd
## mean    1  0
## sd      0  1

Visually, I missed the right tail.

Tom's Suggested Outline

Select cases where promo2 is not equal to 1.

Thus, we will only use do not recommend, recommend, and strongly recommend ratings.

ldr.nd <- subset(spssdata, subset = (PROMO2 != "EE Declines"), select = c(LEAD_CH, 
    LEAD_P, RESULTS, BUSINESS, BUILDING, LEADERSH, WRDCOUNT))

Create barplots or histograms, which ever looks better, using the following variables (w/ the labels): leading change, leading people, results driven, business acumen, team building, sum leadership vars, and total number of words.

These are the broader competencies that are made up of “sub-competencies.” This will keep the counts at a reasonable level. I checked the OPM ECQ website and they have changed since the original analysis was done, but I can explain this. A single lattice plot with the plots would probably work well.

ldr.nd.l <- stack(ldr.nd)
histogram(~values | ind, ldr.nd.l)

plot of chunk unnamed-chunk-16

count.hist <- function(X, name) {
    hist(X, col = "gray", main = name, xlab = "", freq = TRUE)
}

count.hist(ldr.nd$LEAD_CH, "Leading Change")

plot of chunk unnamed-chunk-16

count.hist(ldr.nd$LEAD_P, "Leading People")

plot of chunk unnamed-chunk-16

count.hist(ldr.nd$RESULTS, "Results Driven")

plot of chunk unnamed-chunk-16

count.hist(ldr.nd$BUSINESS, "Business Acumen")

plot of chunk unnamed-chunk-16

count.hist(ldr.nd$BUILDING, "Team Building")

plot of chunk unnamed-chunk-16

count.hist(ldr.nd$LEADERSH, "Sum of Leadership")

plot of chunk unnamed-chunk-16

count.hist(ldr.nd$WRDCOUNT, "Total Words")

plot of chunk unnamed-chunk-16

Fit various distributions to the variables in number 2. I think a normal distribution, truncated normal, power/pareto, and any other that you think would be applicable. Use the chi-square and one other fit indice that you think is best. O’Boyle and Agunius used chi-square so it keeps it consistent with them and everyone understands it. To present the results a table like this would work well.

do.plfit <- function(X, title = "") {
    fit.pl <- displ(X)
    estimate_pars(fit.pl)
    est = estimate_xmin(fit.pl)
    fit.pl$setXmin(est)
    plot(fit.pl, main = title)
    pred <- lines(fit.pl, col = "blue")
    return(list(fit = fit.pl, pred = pred))
}

# wrdcount.plfit <- do.plfit(ldr.nd$WRDCOUNT + 1, 'Wordcount')
lead.ch.plfit <- do.plfit(ldr.nd$LEAD_CH + 1, "Leading Change")

plot of chunk unnamed-chunk-17

lead.p.plfit <- do.plfit(ldr.nd$LEAD_P + 1, "Leading People")

plot of chunk unnamed-chunk-17

results.plfit <- do.plfit(ldr.nd$RESULTS + 1, "Results Driven")

plot of chunk unnamed-chunk-17

business.plfit <- do.plfit(ldr.nd$BUSINESS + 1, "Business Acumen")

plot of chunk unnamed-chunk-17

building.plfit <- do.plfit(ldr.nd$BUILDING + 1, "Team Building")

plot of chunk unnamed-chunk-17

leadersh.plfit <- do.plfit(ldr.nd$LEADERSH + 1, "Sum of Leadership")

plot of chunk unnamed-chunk-17

wrdcount.plfit <- do.plfit(ldr.nd$WRDCOUNT + 1, "Total Words")

plot of chunk unnamed-chunk-17


fit.dists.bic <- function(X) {
    X <- X + 0.5
    norm.fd <- fitdist(X, pnorm, method = "mle", start = list(mean = mean(X), 
        sd = sd(X)))
    norm.fd$ks <- ks.test(X, pnorm, norm.fd$estimate["mean"], norm.fd$estimate["sd"])$statistic

    tnorm.fd <- fitdist(X, ptruncnorm, method = "mle", start = list(mean = mean(X)/2, 
        sd = sd(X)), fix.arg = list(a = 0, b = Inf))
    tnorm.fd$ks <- ks.test(X, ptruncnorm, 0, Inf, tnorm.fd$estimate["mean"], 
        tnorm.fd$estimate["sd"])$statistic

    lnorm.fd <- fitdist(X, plnorm, method = "mle", start = list(meanlog = log(mean(X)), 
        sdlog = log(sd(X))))
    lnorm.fd$ks <- ks.test(X, plnorm, lnorm.fd$estimate["meanlog"], lnorm.fd$estimate["sdlog"])$statistic

    pl.fd <- fitdist(X, pplcon, method = "mle", start = list(alpha = mean(X)), 
        fix.arg = list(xmin = 0.5))
    pl.fd$ks <- ks.test(X, pplcon, as.numeric(pl.fd$fix.arg["xmin"]), pl.fd$estimate["alpha"])$statistic

    exp.fd <- fitdist(X, pexp, method = "mle", start = list(rate = 1/mean(X - 
        1)))
    exp.fd$ks <- ks.test(X, pexp, exp.fd$estimate["rate"])$statistic

    return(list(norm.fd = norm.fd, tnorm.fd = tnorm.fd, lnorm.fd = lnorm.fd, 
        pl.fd = pl.fd, exp.fd = exp.fd))
}


fit.dists.ks <- function(X) {
    norm.fd <- fitdist(X, pnorm, method = "mge", gof = "KS", start = list(mean = mean(X), 
        sd = sd(X)))
    norm.fd$ks <- ks.test(X, pnorm, norm.fd$estimate["mean"], norm.fd$estimate["sd"])$statistic

    tnorm.fd <- fitdist(X + 0.5, ptruncnorm, method = "mge", gof = "KS", start = list(mean = mean(X), 
        sd = sd(X)), fix.arg = list(a = -1, b = Inf))
    tnorm.fd$ks <- ks.test(X + 0.5, ptruncnorm, -0.5, Inf, tnorm.fd$estimate["mean"], 
        tnorm.fd$estimate["sd"])$statistic

    lnorm.fd <- fitdist(X + 0.5, plnorm, method = "mge", gof = "KS", start = list(meanlog = log(mean(X)), 
        sdlog = log(sd(X))))
    lnorm.fd$ks <- ks.test(X, plnorm, lnorm.fd$estimate["meanlog"], lnorm.fd$estimate["sdlog"])$statistic

    pl.fd <- fitdist(X + 0.5, pplcon, method = "mge", gof = "KS", start = list(alpha = log(mean(X) + 
        1)/1.5), fix.arg = list(xmin = 1))
    pl.fd$ks <- ks.test(X, pplcon, as.numeric(pl.fd$fix.arg["xmin"]), pl.fd$estimate["alpha"])$statistic

    exp.fd <- fitdist(X + 0.5, pexp, method = "mge", gof = "KS", start = list(rate = 1/mean(X - 
        1)))
    exp.fd$ks <- ks.test(X - 1, pexp, exp.fd$estimate["rate"])$statistic

    return(list(norm.fd = norm.fd, tnorm.fd = tnorm.fd, lnorm.fd = lnorm.fd, 
        pl.fd = pl.fd, exp.fd = exp.fd))
}

lead.ch.fits.bic <- fit.dists.bic(ldr.nd$LEAD_CH)
lead.p.fits.bic <- fit.dists.bic(ldr.nd$LEAD_P)
results.fits.bic <- fit.dists.bic(ldr.nd$RESULTS)
business.fits.bic <- fit.dists.bic(ldr.nd$BUSINESS)
building.fits.bic <- fit.dists.bic(ldr.nd$BUILDING)
leadersh.fits.bic <- fit.dists.bic(ldr.nd$LEADERSH)
wrdcount.fits.bic <- fit.dists.bic(ldr.nd$WRDCOUNT)

# lead.ch.fits.ks <- fit.dists.ks(ldr.nd$LEAD_CH) lead.p.fits.ks <-
# fit.dists.ks(ldr.nd$LEAD_P) results.fits.ks <-
# fit.dists.ks(ldr.nd$RESULTS) business.fits.ks <-
# fit.dists.ks(ldr.nd$BUSINESS) building.fits.ks <-
# fit.dists.ks(ldr.nd$BUILDING) leadersh.fits.ks <-
# fit.dists.ks(ldr.nd$LEADERSH) wrdcount.fits.ks <-
# fit.dists.ks(ldr.nd$WRDCOUNT)

fit.bic <- rbind(sapply(lead.ch.fits.bic, function(X) X$bic), sapply(lead.p.fits.bic, 
    function(X) X$bic), sapply(results.fits.bic, function(X) X$bic), sapply(business.fits.bic, 
    function(X) X$bic), sapply(building.fits.bic, function(X) X$bic), sapply(leadersh.fits.bic, 
    function(X) X$bic), sapply(wrdcount.fits.bic, function(X) X$bic))
rownames(fit.bic) <- c("lead.ch", "lead.p", "results", "business", "building", 
    "leadersh", "wrdcount")
fit.bic

##          norm.fd tnorm.fd lnorm.fd pl.fd exp.fd
## lead.ch     9491     8400     8444  8114   8444
## lead.p      8750     7549     7459  6569   7571
## results     9737     8391     8340  7711   8395
## business    8240     6971     6736  5732   6984
## building    8029     6551     6236  4659   6547
## leadersh   14274    13219    13703 14369  13271
## wrdcount   21213    20630    21222 25357  20909

matplot(fit.bic, type = "b", pch = "NTLPE", ylog = TRUE, ylab = "BIC")

plot of chunk unnamed-chunk-18


fit.ks <- rbind(sapply(lead.ch.fits.bic, function(X) X$ks), sapply(lead.p.fits.bic, 
    function(X) X$ks), sapply(results.fits.bic, function(X) X$ks), sapply(business.fits.bic, 
    function(X) X$ks), sapply(building.fits.bic, function(X) X$ks), sapply(leadersh.fits.bic, 
    function(X) X$ks), sapply(wrdcount.fits.bic, function(X) X$ks))
rownames(fit.ks) <- c("lead.ch", "lead.p", "results", "business", "building", 
    "leadersh", "wrdcount")
fit.ks

##          norm.fd.D tnorm.fd.D lnorm.fd.D pl.fd.D exp.fd.D
## lead.ch    0.16345    0.16044     0.2004  0.2813   0.1433
## lead.p     0.18660    0.19776     0.2410  0.3539   0.1779
## results    0.19006    0.17562     0.2125  0.3101   0.1652
## business   0.22586    0.18383     0.2404  0.3601   0.2017
## building   0.22771    0.23042     0.2910  0.4462   0.2233
## leadersh   0.13054    0.13725     0.1616  0.2822   0.1290
## wrdcount   0.08285    0.07192     0.1238  0.4164   0.1087

matplot(fit.ks, type = "b", pch = "NTLPE", ylab = "Kolmogorov-Smirnov D")

plot of chunk unnamed-chunk-18

plot.fits <- function(fits, label) {
    hist(fits$norm.fd$data, freq = FALSE, col = "gray", main = "Normal Dist.", 
        xlab = label)
    curve(dnorm(x, fits$norm.fd$estimate["mean"], fits$norm.fd$estimate["sd"]), 
        add = TRUE, col = "red")

    hist(fits$norm.fd$data, freq = FALSE, col = "gray", main = "Truncated Normal Dist.", 
        xlab = label)
    curve(dtruncnorm(x, 0, Inf, fits$tnorm.fd$estimate["mean"], fits$tnorm.fd$estimate["sd"]), 
        add = TRUE, col = "red")

    hist(fits$norm.fd$data, freq = FALSE, col = "gray", main = "Log Normal Dist.", 
        xlab = label)
    curve(dlnorm(x, fits$lnorm.fd$estimate["meanlog"], fits$lnorm.fd$estimate["sdlog"]), 
        add = TRUE, col = "red")

    hist(fits$norm.fd$data, freq = FALSE, col = "gray", main = "Power Law Dist.", 
        xlab = label)
    curve(dplcon(x, as.numeric(fits$pl.fd$fix.arg["xmin"]), fits$pl.fd$estimate["alpha"]), 
        add = TRUE, col = "red")
}

plot.pdfs <- function(fits, label) {
    denscomp(fits$norm.fd, main = "Normal Dist.", xlab = label)
    denscomp(fits$tnorm.fd, main = "Truncated Normal Dist.", xlab = label)
    denscomp(fits$lnorm.fd, main = "Log Normal Dist.", xlab = label)
    denscomp(fits$pl.fd, main = "Power Law Dist.", xlab = label)
    denscomp(fits$exp.fd, main = "Exponential Dist.", xlab = label)
}

plot.cdfs <- function(fits, label) {
    cdfcompcall <- function(fit, main, ...) cdfcomp(fit, xlogscale = FALSE, 
        ylogscale = FALSE, main = main, xlab = label, addlegend = FALSE, ...)
    cdfcompcall(fits$norm.fd, main = "Normal Dist.")
    cdfcompcall(fits$tnorm.fd, main = "Truncated Normal Dist.")
    cdfcompcall(fits$lnorm.fd, main = "Log Normal Dist.")
    cdfcompcall(fits$pl.fd, main = "Power Law Dist.")
    cdfcompcall(fits$exp.fd, main = "Exponential Dist.")
}

# plot.fits(lead.ch.fits.bic, 'Leading Change') plot.fits(lead.p.fits.bic,
# 'Leading People') plot.fits(results.fits.bic, 'Results Driven')
# plot.fits(business.fits.bic, 'Business Acumen')
# plot.fits(building.fits.bic, 'Team Building') plot.fits(leadersh.fits.bic,
# 'Sum of Leadership') plot.fits(wrdcount.fits.bic, 'Total Words')

plot.cdfs(lead.ch.fits.bic, "Leading Change")