Abstract
Codes encounter errors sometimes. Thus, it is important to be able to diagnose if a code has defects or not. The data set for this project, SWDEFECTS, uses four methods (lines of code, cyclomatic complexity, essential complexity, and design complexity) to access if a code has a defect or not. It then computes the Accuracy, Detection rate, False alarm rate, and Precision of each method. Ultimately, plotting these values for each method onto a barplot for comparison.The SWDEFCTS data set consists of outputs on whether a series of code is likely to have a defect based on different methods. If a method outputs a “yes” then that section of code is likely to contain a defect. If it outputs a “no” then it likely has no defects. It looks at the following four methods lines of code, cyclomatic complexity, essential complexity, and design complexity. Each of these is given a variable which is then output under “yes” or “no” depending on the outcome.
SUM <- matrix(c("","False","True","yes","a","b","no","c","d"),ncol=3,byrow=TRUE)
colnames(SUM) <- c("","Module has defects","")
rownames(SUM) <- c("","Algorithm predicts defects","")
SUM <- as.table(SUM)
SUM
## Module has defects
## False True
## Algorithm predicts defects yes a b
## no c d
\[P(Algoritm\ is\ correct)=\frac{(a+d)}{(a+b+c+d)}\]
\[P(predict\ defect\ |\ module\ has\ defect)=\frac{d}{(b+d)}\]
\[P(predict\ defect\ |\ module\ has\ no\ defect)=\frac{c}{(a+c)}\]
\[P(module\ has\ defect\ |\ predict\ defect)=\frac{d}{c+d}\]
acc=function(a,b,c,d)
{(a+d)/(a+b+c+d)}
detect=function(b,d)
{d/(b+d)}
falarm=function(a,c)
{c/(a+c)}
prec=function(c,d)
{d/(c+d)}
getwd()
## [1] "C:/Users/Caleb/Documents/Civil Engineering degree coursework/Applied Statistical Methods/Projects/Project 1"
swd=read.csv("SWDEFECTS.csv")
head(swd)
tab1=with(swd, table(predict.loc.50,defect))
tab2=addmargins(tab1)
tab2
## defect
## predict.loc.50 FALSE TRUE Sum
## no 400 29 429
## yes 49 20 69
## Sum 449 49 498
tab1=with(swd, table(predict.vg.10,defect))
tab2=addmargins(tab1)
tab2
## defect
## predict.vg.10 FALSE TRUE Sum
## no 397 35 432
## yes 52 14 66
## Sum 449 49 498
tab1=with(swd, table(predict.evg.14.5,defect))
tab2=addmargins(tab1)
tab2
## defect
## predict.evg.14.5 FALSE TRUE Sum
## no 441 47 488
## yes 8 2 10
## Sum 449 49 498
tab1=with(swd, table(predict.ivg.9.2,defect))
tab2=addmargins(tab1)
tab2
## defect
## predict.ivg.9.2 FALSE TRUE Sum
## no 422 38 460
## yes 27 11 38
## Sum 449 49 498
tab3 <- matrix(c(acc(49,20,400,29),detect(20,29),falarm(49,400),prec(400,29),acc(52,14,397,35),detect(14,35),falarm(52,397),prec(397,35),acc(8,2,441,47),detect(2,47),falarm(8,441),prec(441,47),acc(27,11,422,38),detect(11,38),falarm(27,422),prec(422,38)),ncol=4,byrow=TRUE)
colnames(tab3) <- c("Accuracy","Detection rate","False alarm rate","Precision")
rownames(tab3) <- c("Lines of code","Cyclomatic complexity","Essential complexity","Design complexity")
tab3 <- as.table(tab3)
tab3
## Accuracy Detection rate False alarm rate Precision
## Lines of code 0.15662651 0.59183673 0.89086860 0.06759907
## Cyclomatic complexity 0.17469880 0.71428571 0.88418708 0.08101852
## Essential complexity 0.11044177 0.95918367 0.98218263 0.09631148
## Design complexity 0.13052209 0.77551020 0.93986637 0.08260870
mybar=function(tab,dec)
{n=nrow(tab)
name=c("Acc","Detect","Falarm","Prec")
barplot(tab,round(dec),names.arg=name,leg=TRUE,beside=TRUE, col=rainbow(n),ylim=c(0,2),args.legend = list(x = "topright",inset = c(0.15, 0)))
print(list(tab))}
mybar(tab3,4)
## [[1]]
## Accuracy Detection rate False alarm rate Precision
## Lines of code 0.15662651 0.59183673 0.89086860 0.06759907
## Cyclomatic complexity 0.17469880 0.71428571 0.88418708 0.08101852
## Essential complexity 0.11044177 0.95918367 0.98218263 0.09631148
## Design complexity 0.13052209 0.77551020 0.93986637 0.08260870