Load required libraries for goodness of fit and statistic analysis
library(MASS)
library(survival)
library(fitdistrplus)
library(mc2d)
Load the data from a csv file
setwd("C:/Users/Ali/Documents/R")
importedData <- read.table("Problem_Dataset_06_01.csv",header = TRUE)
data=importedData$Mydata
Plot the empirical density and the histogram to gain insight of the data
plotdist(data, histo = TRUE, demp = TRUE)
The plot shows a right skewed distribution
lets look at the Cullen and Frey graph in addition to general summary statistics of distributions
options(width=300)
descdist(data, boot = 500)
## summary statistics
## ------
## min: 2.06 max: 48.65
## median: 9.015
## mean: 11.92048
## estimated sd: 9.832445
## estimated skewness: 1.790431
## estimated kurtosis: 6.895655
From previous plot, we could determine that there is few distributions that can be tested for goodness of fit
options(width=250)
testFitting<-function(data,mode){
expFit <- fitdist(data, "exp")
logFit <- fitdist(data, "lnorm")
norFit <- fitdist(data, "norm")
gamFit <- fitdist(data, "gamma")
WeiFit <- fitdist(data, "weibull")
uniFit <- fitdist(data, "unif")
triFit <- fitdist(data, "triang", method="mge", start = list(min=summary(data)[[1]],mode = summary(data)[[3]],max=summary(data)[[6]]),gof="KS")
if (mode == "plot") {
plot.legend <-c("exponenetial","lognormal","normal","gamma","Weibull","uniform","triangle" )
cdfcomp(list(expFit,logFit, norFit,gamFit,WeiFit,uniFit,triFit), legendtext = plot.legend )
denscomp(list(expFit,logFit, norFit,gamFit,WeiFit,uniFit,triFit), legendtext = plot.legend )
qqcomp(list(expFit,logFit, norFit,gamFit,WeiFit,uniFit,triFit), legendtext = plot.legend )
ppcomp(list(expFit,logFit, norFit,gamFit,WeiFit,uniFit,triFit), legendtext = plot.legend )
thestat<-gofstat(list(expFit,logFit, norFit,gamFit,WeiFit,uniFit,triFit), fitnames=c("exponenetial","lognormal","normal","gamma","Weibull","uniform","triangle" ))
print(thestat)
}
if(mode=="test")
{
ks_exp <- ks.test(data, "pexp", rate=expFit$estimate[1])
ks_log <- ks.test(data, "plnorm",logFit$estimate[1],logFit$estimate[2])
ks_norm <- ks.test(data, "pnorm",norFit$estimate[1],norFit$estimate[2])
ks_gamma <- ks.test(data, "pgamma", gamFit$estimate[1],gamFit$estimate[2])
ks_weibul <- ks.test(data, "pweibull",WeiFit$estimate[1],WeiFit$estimate[2])
ks_unif <- ks.test(data, "punif",uniFit$estimate[1],uniFit$estimate[2])
ks_tri <- ks.test(data, "ptriang",triFit$estimate[1],triFit$estimate[2],triFit$estimate[3])
names<-c("exponenetial","lognormal","normal","gamma","Weibull","uniform","triangle" )
P_Value<-c(ks_exp[[2]],ks_log[[2]],ks_norm[[2]],ks_gamma[[2]],ks_weibul[[2]],ks_unif[[2]],ks_tri[[2]])
ks<-c(ks_exp[[1]],ks_log[[1]],ks_norm[[1]],ks_gamma[[1]],ks_weibul[[1]],ks_unif[[1]],ks_tri[[1]])
df<-data.frame(names,round(P_Value,3),round(ks,3))
colnames(df)<-c("distribution","P_Value","ks")
print(df)
}
}
displayEq<-function(mode){
expFit <- fitdist(data, "exp")
logFit <- fitdist(data, "lnorm")
norFit <- fitdist(data, "norm")
gamFit <- fitdist(data, "gamma")
WeiFit <- fitdist(data, "weibull")
uniFit <- fitdist(data, "unif")
triFit <- fitdist(data, "triang", method="mge", start = list(min=summary(data)[[1]],mode = summary(data)[[3]],max=summary(data)[[6]]),gof="KS")
if(mode=="log"){
print(paste0("Random.Lognormal(",round(logFit$estimate[[1]],3),",",round(logFit$estimate[[2]],3),")" ))
}else if(mode=="gamma"){
print(paste0("Random.Gamma(",round(gamFit$estimate[[1]],3),",", round(gamFit$estimate[[2]],3),")"))
}else if(mode=="Weibull"){
print(paste0("Random.Weibull(",round(WeiFit$estimate[[1]],3),",",round(WeiFit$estimate[[1]],3),")" ))
}else if(mode=="exp"){
print(paste0("Random.Exponential(",round(summary(data)[[4]],3),")"))
}else if(mode=="unif"){
print(paste0("Random.Uniform(",round(uniFit$estimate[[1]],3),",",round(uniFit$estimate[[2]],3),")"))
}else if(mode=="triang"){
print(paste0("Random.Triangle(",round(triFit$estimate[[1]],3),",",round(triFit$estimate[[2]],3),",",round(triFit$estimate[[3]],3),")"))
}else if(mode=="norm"){
print(paste0("Random.Normal(",round(norFit$estimate[[1]],3),",",round(norFit$estimate[[2]],3),")"))
}
}
testFitting(data,"plot")
## Goodness-of-fit statistics
## exponenetial lognormal normal gamma Weibull uniform triangle
## Kolmogorov-Smirnov statistic 0.1640203 0.05044292 0.1550512 0.07461236 0.07934941 0.474688 0.07609876
## Cramer-von Mises statistic 0.2021652 0.01825121 0.3280986 0.05174037 0.05847634 4.379079 0.06804379
## Anderson-Darling statistic 1.4234709 0.14142579 1.9697634 0.35230811 0.44537170 Inf Inf
##
## Goodness-of-fit criteria
## exponenetial lognormal normal gamma Weibull uniform triangle
## Akaike's Information Criterion 294.1736 284.9146 314.1765 288.2076 290.3611 NA Inf
## Bayesian Information Criterion 295.9113 288.3900 317.6518 291.6830 293.8364 NA Inf
From the q-qplot we could visually determine that the lognormal fit produces the best goodness of fit of all the distributions. The generated data points are very close to the actual sample distribution trend. All other distributions seem to produce reliable results except the uniform distribution. We will test some of these distribution against the Null hypothesis using the k-test to find reliability of these fitted distributions..
testFitting(data,"test")
## distribution P_Value ks
## 1 exponenetial 0.187 0.164
## 2 lognormal 1.000 0.050
## 3 normal 0.239 0.155
## 4 gamma 0.960 0.075
## 5 Weibull 0.935 0.079
## 6 uniform 0.000 0.475
## 7 triangle 0.953 0.076
As expected, the p-value of 1 and ks of 0.05 shows that lognormal is the best fit followed by gamma, triangular and Weibull.
These are some simio functions utilizing the converged fitted parameters. I will choose the lognormal for my intervaltime for the given sample.
displayEq("log")
## [1] "Random.Lognormal(2.185,0.771)"
displayEq("gamma")
## [1] "Random.Gamma(1.855,0.156)"
displayEq("Weibull")
## [1] "Random.Weibull(1.348,1.348)"
Lets print a summary statistic of the data
importedData<- read.table("https://raw.githubusercontent.com/aliharb/Simio/master/Problem_Dataset_06_02.csv", header = TRUE)
data=importedData$Mydata
plotdist(data, histo = TRUE, demp = TRUE)
options(width=300)
descdist(data, boot = 500)
## summary statistics
## ------
## min: 3.8 max: 36.13
## median: 9.14
## mean: 10.89234
## estimated sd: 6.364957
## estimated skewness: 1.97635
## estimated kurtosis: 7.933002
the p-value of .973 and ks of 0.068 shows that lognormal is the best fit followed by triangular and gamma.
options(width=250)
par(mfrow=c(2,2))
testFitting(data,"plot")
## Goodness-of-fit statistics
## exponenetial lognormal normal gamma Weibull uniform triangle
## Kolmogorov-Smirnov statistic 0.3144366 0.06764782 0.1589070 0.09941791 0.1140618 0.4853473 0.07030009
## Cramer-von Mises statistic 1.1473789 0.03973639 0.3826912 0.11444195 0.2008969 4.8461900 0.06495798
## Anderson-Darling statistic 5.9711731 0.26739444 2.2740826 0.71667740 1.3230284 Inf Inf
##
## Goodness-of-fit criteria
## exponenetial lognormal normal gamma Weibull uniform triangle
## Akaike's Information Criterion 320.4776 282.6829 310.3453 288.3562 296.2684 NA Inf
## Bayesian Information Criterion 322.3278 286.3832 314.0456 292.0565 299.9687 NA Inf
testFitting(data,"test")
## distribution P_Value ks
## 1 exponenetial 0.000 0.314
## 2 lognormal 0.973 0.068
## 3 normal 0.167 0.159
## 4 gamma 0.704 0.099
## 5 Weibull 0.536 0.114
## 6 uniform 0.000 0.485
## 7 triangle 0.962 0.070
Based on the results lognormal produce the best fit with the smaales K followed by triangel and gamma.
I will choose lognormal for this distribution
displayEq("log")
## [1] "Random.Lognormal(2.258,0.491)"
displayEq("triang")
## [1] "Random.Triangle(2.248,5.057,23.681)"
displayEq("gamma")
## [1] "Random.Gamma(4.002,0.367)"
Lets print a summary statistic of the data
importedData<- read.table("https://raw.githubusercontent.com/aliharb/Simio/master/Problem_Dataset_06_03.csv", header = TRUE)
data=importedData$Mydata
plotdist(data, histo = TRUE, demp = TRUE)
options(width=300)
descdist(data, boot = 500)
## summary statistics
## ------
## min: 1 max: 8
## median: 3
## mean: 2.955556
## estimated sd: 1.536755
## estimated skewness: 1.02143
## estimated kurtosis: 4.428947
the p-value of .973 and ks of 0.068 shows that lognormal is the best fit followed by triangular and gamma.
options(width=250)
par(mfrow=c(2,2))
testFitting(data,"plot")
## Goodness-of-fit statistics
## exponenetial lognormal normal gamma Weibull uniform triangle
## Kolmogorov-Smirnov statistic 0.3361487 0.1658822 0.1797133 0.1512860 0.1533664 0.4380952 0.1444444
## Cramer-von Mises statistic 1.0714529 0.2198819 0.2403876 0.1892388 0.1880788 2.8294785 0.1895642
## Anderson-Darling statistic 5.6091745 1.3927199 1.4026455 1.1326605 1.0862931 Inf Inf
##
## Goodness-of-fit criteria
## exponenetial lognormal normal gamma Weibull uniform triangle
## Akaike's Information Criterion 189.5318 160.9673 169.3638 160.2218 162.3428 NA Inf
## Bayesian Information Criterion 191.3385 164.5806 172.9771 163.8352 165.9561 NA Inf
testFitting(data,"test")
## distribution P_Value ks
## 1 exponenetial 0.000 0.336
## 2 lognormal 0.168 0.166
## 3 normal 0.109 0.180
## 4 gamma 0.254 0.151
## 5 Weibull 0.240 0.153
## 6 uniform 0.000 0.438
## 7 triangle 0.305 0.144
Based on the results, triangle produce the best fit with the highest p value and the smalest K followed by gamma and weilbull.
I will choose gamma for this distribution due to the distribution non-leanier behavior
displayEq("gamma")
## [1] "Random.Gamma(3.854,1.304)"
displayEq("Weibull ")
displayEq("triang")
## [1] "Random.Triangle(-0.386,2.272,6.753)"
aots <- data.frame(In_LB = c(0.0,0.5,1.0,1.5,2.0,3.0,4.0,5.0,7.5,10),
probDemand = c(0.05,0.07,0.09,0.11,0.15,0.25,0.10,0.09,0.06,0.03))
peas <- data.frame(In_LB = c(0,0.5,1.0,1.5,2.0,3.0),
probDemand = c(0.1,0.2,0.2,0.3,0.1,0.1))
beans <- data.frame(In_LB = c(0,1.0,3.0,4.5),
probDemand = c(0.2,0.4,0.3,0.1))
barley <- data.frame(In_LB = c(0,0.5,1.0,3.5),
probDemand = c(0.2,0.4,0.3,0.1))
getDemand <- function(product,sampleSize) {
cdf <- runif(sampleSize)
sapply(cdf, function(x) product$In_LB[min(which(cumsum(product$probDemand) > x))])
}
testProduct<-function(Product,nboot,nsample){
mytest <- prop.table(table(getDemand(as.data.frame(Product), nsample)))
plotdist(as.numeric(mytest), histo = TRUE, demp = TRUE)
norFit <- fitdist(as.numeric(mytest), "norm")
(ks_norm <- ks.test(mytest, "pnorm",norFit$estimate[1],norFit$estimate[2]))
logFit <- fitdist(as.numeric(mytest), "lnorm")
(ks_log <- ks.test(as.numeric(mytest), "plnorm",logFit$estimate[1],logFit$estimate[2]))
}
testProduct(aots,500,1000)
##
## One-sample Kolmogorov-Smirnov test
##
## data: as.numeric(mytest)
## D = 0.08447, p-value = 1
## alternative hypothesis: two-sided
testProduct(peas,500,1000)
##
## One-sample Kolmogorov-Smirnov test
##
## data: as.numeric(mytest)
## D = 0.30644, p-value = 0.5279
## alternative hypothesis: two-sided
testProduct(barley,10,1000)
##
## One-sample Kolmogorov-Smirnov test
##
## data: as.numeric(mytest)
## D = 0.2383, p-value = 0.9367
## alternative hypothesis: two-sided
mytest <- prop.table(table(getDemand(as.data.frame(beans), 1000)))
plotdist(as.numeric(mytest), histo = TRUE, demp = TRUE)
norFit <- fitdist(as.numeric(mytest), "norm")
(ks_norm <- ks.test(mytest, "pnorm",norFit$estimate[1],norFit$estimate[2]))
##
## One-sample Kolmogorov-Smirnov test
##
## data: mytest
## D = 0.23115, p-value = 0.9512
## alternative hypothesis: two-sided
logFit <- fitdist(as.numeric(mytest), "lnorm")
(ks_log <- ks.test(as.numeric(mytest), "plnorm",logFit$estimate[1],logFit$estimate[2]))
##
## One-sample Kolmogorov-Smirnov test
##
## data: as.numeric(mytest)
## D = 0.21463, p-value = 0.9752
## alternative hypothesis: two-sided
# oats peas beans barly
buy <- c(1.05, 3.17, 1.99, 0.95)
sell <- c(1.29, 3.76, 2.23, 1.65)
Products<- matrix(NA, nrow = 90, ncol = 4)
colnames(Products) <- c("oats", "peas", "beans", "barley")
ProductsCost<- matrix(NA, nrow = 1, ncol = 4)
colnames(ProductsCost) <- c("oatsCost", "peasCost", "beansCost", "barleyCost")
ProductsSell<- matrix(NA, nrow = 1, ncol = 4)
colnames(ProductsSell) <- c("oatsSell", "peasSell", "beansSell", "barleySell")
ProductsProfit<- matrix(NA, nrow = 1, ncol = 4)
colnames(ProductsProfit) <- c("oatsProfit", "peasProfit", "beansProfit", "barleyProfit")
Products[,1] <- getDemand(aots,90)
Products[,2] <- getDemand(peas,90)
Products[,3] <- getDemand(beans,90)
Products[,4] <- getDemand(barley,90)
sums<-colSums(as.data.frame(Products))
for(i in 1:4){
ProductsCost[,i]<-sums[i]*buy[i]
ProductsSell[,i]<-sums[i]*sell[i]
ProductsProfit[,i]<-ProductsSell[,i]-ProductsCost[,i]
}
head(Products)
## oats peas beans barley
## [1,] 2 0.0 1 3.5
## [2,] 3 0.0 1 0.0
## [3,] 3 3.0 3 3.5
## [4,] 3 1.5 0 1.0
## [5,] 4 0.5 0 1.0
## [6,] 1 2.0 1 1.0
tail(Products)
## oats peas beans barley
## [85,] 3.0 3.0 0 0.0
## [86,] 3.0 1.0 1 3.5
## [87,] 1.5 0.5 3 0.0
## [88,] 3.0 1.5 0 1.0
## [89,] 1.0 1.5 1 1.0
## [90,] 3.0 1.0 1 0.0
getSessionPnl<-ProductsCost
colnames(getSessionPnl) <- c("oats", "peas", "beans", "barley")
getSessionPnl<-rbind(getSessionPnl,ProductsSell)
getSessionPnl<-rbind(getSessionPnl,ProductsProfit)
row.names(getSessionPnl)<-c("Cost","Sell","Profit")
ProductsRevenueProp<-(ProductsProfit/ProductsCost)*100
par(mfrow=c(2,2))
barplot(getSessionPnl[1,], main="Products Cost",col="blue")
barplot(getSessionPnl[2,], main="Products Sell",col="red")
barplot(getSessionPnl[3,], main="Products Profit",col="yellow")
barplot(ProductsRevenueProp, main="Products Revenue Propotrtion",col="green")
(totalOf90DaysProfit<-rowSums(ProductsProfit))
## [1] 215.425