Select Lines and Sublines
You can also embed plots, for example:
rs = dbSendQuery(mydb, "select * from line")
lindat = fetch(rs, n=-1)
lindat[,"name"]=unlist(lapply(lindat[,"name"],iconv,"latin1","UTF-8"))
rs = dbSendQuery(mydb, "select * from subline")
sublindat = fetch(rs, n=-1)
sublindat[,"name"]=unlist(lapply(sublindat[,"name"],iconv,"latin1","UTF-8"))
rs = dbSendQuery(mydb, "select * from contract_type")
ctypedat = fetch(rs, n=-1)
ctypedat[,"name"]=unlist(lapply(ctypedat[,"name"],iconv,"latin1","UTF-8"))
rs = dbSendQuery(mydb, "select * from contract")
contdat = fetch(rs, n=-1)
contdat[,"title"]=unlist(lapply(contdat[,"title"],iconv,"latin1","UTF-8"))
rs = dbSendQuery(mydb, "select * from line_subline_rel")
lsubrel = fetch(rs, n=-1)
cadena = paste0("SELECT a.lineid as lineid, a.sublineid as sublineid,",
"c.contract_id as contractid ",
"from line_subline_rel a, ",
" contract_subline_rel c ",
"where a.sublineid = c.sublineid " ,
"order by a.lineid, a.sublineid ")
rs = dbSendQuery(mydb, cadena)
sublinrel = fetch(rs, n=-1)
rs = dbSendQuery(mydb,"select * from contract_contract_type_rel")
ctyperel = fetch(rs, n=-1)
#
linesh = sublinrel[,c("lineid","sublineid","contractid")]
linesh[,"LineName"] = lindat[linesh[,"lineid"],"name"]
linesh[,"SubLineName"]= sublindat[linesh[,"sublineid"],"name"]
linesh[,"ContractTitle"]= contdat[linesh[,"contractid"],"title"]
linesh[,"ContractStart"]=as.Date(contdat[linesh[,"contractid"],
"contract_date"])
linesh[,"ContractEnd"]=as.Date(contdat[linesh[,"contractid"],
"date_final_certificate_received"])
linesh[,"ContractType"]= ctypedat[ctyperel[linesh[,"contractid"],
"contract_type_id"],"name"]
linesh[,"ContractEffectiveCost"]= contdat[linesh[,"contractid"],
"effective_cost"]
linesh[,"ContractAwardedAmount"]= contdat[linesh[,"contractid"],
"awarded_amount"]
linesh[,"ContractStatus"]= contdat[linesh[,"contractid"],"status2"]
linesh[,"ContractEstDuration"]= contdat[linesh[,"contractid"],
"estimated_contract_duration"]
linesh[,"ContractCostOverrun"]= contdat[linesh[,"contractid"],
"cost_overrun"]
linesh[,"ContractTimeOverrun"]= contdat[linesh[,"contractid"],
"schedule_overrun"]
#
Incident analysis
We are recovering the Incidents and Certificatios per contract.
rs = dbSendQuery(mydb, "select * from incidents")
incdat = fetch(rs, n=-1)
incdat[,"incident_type"]=unlist(lapply(incdat[,"incident_type"],
iconv,"latin1","UTF-8"))
incdat[,"description"]=unlist(lapply(incdat[,"description"],
iconv,"latin1","UTF-8"))
incdat[,"status"]=unlist(lapply(incdat[,"status"],
iconv,"latin1","UTF-8"))
incdat[,"date_approval"]=as.Date(incdat[,"date_approval"])
#
rs = dbSendQuery(mydb, "select * from certifications")
cerdat = fetch(rs, n=-1)
cerdat[,"certificate_type"]=unlist(lapply(cerdat[,"certificate_type"],
iconv,"latin1","UTF-8"))
cerdat[,"cert_date"]=as.Date(cerdat[,"cert_date"])
#
for (i in 1:nrow(linesh)) {
cid = linesh[i,"contractid"]
linesh[i,"ContractNumCert"]= length(which(cerdat$contract_id==i))
linesh[i,"ContractNumInc"]= length(which(incdat$contract_id==i))
}
dbDisconnect(mydb)
## [1] TRUE
rm(rs)
detach("package:RMySQL", unload=TRUE)
#
Main statistical figures
Statistics at level of subline
cons1 = paste0("select lineid, sublineid, min(ContractStart) as MIN__Date, ",
"max(ContractEnd) as MAX__Date, ",
"sum(ContractEffectiveCost)/1000000. as PlanCost,",
"sum(ContractAwardedAmount)/1000000. as RealCost, ",
"sum(ContractNumCert) as NumCert, ",
"sum(ContractNumInc) as NumInc, ",
"count(ContractStart) as NumContracts ",
"from linesh group by lineid, sublineid ",
"order by lineid, sublineid")
dcons = sqldf(cons1,method = "name__class",stringsAsFactors=FALSE)
Function preparation for data extraction
extractsubline = function(idline,idsubline) {
cons2 = paste0("select contractid,ContractStart as Start__Date, ",
"ContractEnd as End__Date, ",
"ContractType, ContractAwardedAmount,ContractEffectiveCost, ",
"ContractTitle from linesh where lineid=",idline," ",
"and sublineid=",idsubline," ",
"and strftime('%Y',ContractStart) > 1000 ",
"order by ContractStart asc")
dcons = sqldf(cons2,method = "name__class",stringsAsFactors=FALSE)
return(dcons)
}
#
HSL=list()
for (i in 1:nrow(lsubrel)) {
line = lsubrel[i,"lineid"]
naml = lindat[line,"name"]
if (! naml %in% ls(HSL)) {
HSL[[naml]] = list()
}
subl = lsubrel[i,"sublineid"]
snml = sublindat[subl,"name"]
if (! snml %in% ls(HSL[[naml]])) {
dsl = extractsubline(line,subl)
HSL[[naml]][[snml]] = dsl
} else {
cat(paste0("Error: ",snml," was already defined !"))
}
}
#
Date analysis
#
DateRef = data.frame(lineid=NA, sublineid = NA,
Start=as.Date("2018-01-01"),
End=as.Date("2018-01-01"),
FirstConsStart=as.Date("2018-01-01"),
FirstConsEnd=as.Date("2018-01-01"),
NumModConsAntesPrimCons = NA,
NumConCons = NA,
FirstBalasStart = as.Date("2018-01-01"),
NumConMod = NA,
stringsAsFactors=FALSE)
k=0
for (i in ls(HSL)) {
for (j in ls(HSL[[i]])) {
k=k+1
cc = HSL[[i]][[j]]
if ( nrow(cc) > 0) {
DateRef[k,"lineid"] = lindat[which(lindat[,"name"]==i),"id"]
DateRef[k,"sublineid"] = sublindat[which(sublindat[,"name"]==j),"id"]
DateRef[k,"Start"] = as.Date(min(cc[,"Start"],na.rm=TRUE))
DateRef[k,"End"] = as.Date(max(cc[,"End"],na.rm=TRUE))
ss=subset(cc,Type=="CONS")
ss = ss[order(ss$Start),]
DateRef[k,"FirstConsStart"] = as.Date(ss[1,"Start"])
DateRef[k,"FirstConsEnd"] = as.Date(ss[1,"End"])
DateRef[k,"NumConCons"] = nrow(ss)
ss=subset(cc,Type=="BALAS")
ss = ss[order(ss$Start),]
DateRef[k,"FirstBalasStart"] = as.Date(ss[1,"Start"])
ss=subset(cc,Type=="MOD")
ss = ss[order(ss$Start),]
DateRef[k,"NumConMod"] = nrow(ss)
DateRef[k,"NumModConsAntesPrimCons"] = length(which(ss$Start <
DateRef[k,"FirstConsStart"]))
rm(cc,ss)
} else {
DateRef[k,"Start"] = NA
DateRef[k,"End"] = NA
DateRef[k,"FirstConsStart"] = NA
DateRef[k,"FirstConsEnd"] = NA
DateRef[k,"NumConCons"] = NA
DateRef[k,"FirstBalasStart"] = NA
DateRef[k,"NumConMod"] = NA
DateRef[k,"NumModConsAntesPrimCons"] = NA
}
}
}
#
dcons = merge(dcons,DateRef,by=c("lineid","sublineid"))
rm(cadena,cid,cons1,i,j,k,mat,mydb,line,naml,snml,subl,DateRef)
#
Now, pending to check the created features testing FCM relationships, or, maybe, to still create additional features.
#
# Created Objects:
#
# lindat : Stores name of lines
# sublindat : Stores name of sublines
# incdat : Stores Incident events
# lsubrel : Relationshp Line Sublines
# linesh : Information about Contracts per subline
# sublinrel : Relationshp Line Subline ContractID
# cerdat : Certification info related to contractID
# contdat : Contract info
# ctypedat : Typology of Contracts
# ctyperel : Type of used contracts
# refs : Type of keys for text-mining
# dcons : Statistical data at subline level
# HSL : Details of contracts per line - subline
save(lindat,sublindat,incdat,lsubrel,linesh,sublinrel,
cerdat,contdat,ctypedat,ctyperel,refs,dcons,HSL,
file="FeaturesADIF.RData")