data604 Assign8

Libraries

Load required libraries for goodness of fit and statistic analysis

library(MASS)
library(survival)
library(fitdistrplus)
library(mc2d)

Problem 1:

Load Data

Load the data from a csv file

setwd("C:/Users/Ali/Documents/R")

importedData <- read.table("Problem_Dataset_06_01.csv",header = TRUE)
data=importedData$Mydata

Get Insight

Plot the empirical density and the histogram to gain insight of the data

plotdist(data, histo = TRUE, demp = TRUE)

The plot shows a right skewed distribution

Descriptive Statistics

lets look at the Cullen and Frey graph in addition to general summary statistics of distributions

options(width=300)

descdist(data, boot = 500)

## summary statistics
## ------
## min:  2.06   max:  48.65 
## median:  9.015 
## mean:  11.92048 
## estimated sd:  9.832445 
## estimated skewness:  1.790431 
## estimated kurtosis:  6.895655

Fitting Distribution

From previous plot, we could determine that there is few distributions that can be tested for goodness of fit

options(width=250)

testFitting<-function(data,mode){

expFit <- fitdist(data, "exp")
logFit <- fitdist(data, "lnorm")
norFit <- fitdist(data, "norm")
gamFit <- fitdist(data, "gamma")
WeiFit <- fitdist(data, "weibull")
uniFit <- fitdist(data, "unif")

triFit <-  fitdist(data, "triang", method="mge", start = list(min=summary(data)[[1]],mode = summary(data)[[3]],max=summary(data)[[6]]),gof="KS")

if (mode == "plot") {
plot.legend <-c("exponenetial","lognormal","normal","gamma","Weibull","uniform","triangle" )
cdfcomp(list(expFit,logFit, norFit,gamFit,WeiFit,uniFit,triFit), legendtext = plot.legend )
denscomp(list(expFit,logFit, norFit,gamFit,WeiFit,uniFit,triFit), legendtext = plot.legend )
qqcomp(list(expFit,logFit, norFit,gamFit,WeiFit,uniFit,triFit), legendtext = plot.legend )
ppcomp(list(expFit,logFit, norFit,gamFit,WeiFit,uniFit,triFit), legendtext = plot.legend )
thestat<-gofstat(list(expFit,logFit, norFit,gamFit,WeiFit,uniFit,triFit), fitnames=c("exponenetial","lognormal","normal","gamma","Weibull","uniform","triangle" ))
print(thestat)

}
if(mode=="test")
{
ks_exp <- ks.test(data, "pexp", rate=expFit$estimate[1])
ks_log <- ks.test(data, "plnorm",logFit$estimate[1],logFit$estimate[2])
ks_norm <- ks.test(data, "pnorm",norFit$estimate[1],norFit$estimate[2])
ks_gamma <- ks.test(data, "pgamma", gamFit$estimate[1],gamFit$estimate[2])
ks_weibul <- ks.test(data, "pweibull",WeiFit$estimate[1],WeiFit$estimate[2])
ks_unif <- ks.test(data, "punif",uniFit$estimate[1],uniFit$estimate[2])
ks_tri <- ks.test(data, "ptriang",triFit$estimate[1],triFit$estimate[2],triFit$estimate[3])

names<-c("exponenetial","lognormal","normal","gamma","Weibull","uniform","triangle" )
P_Value<-c(ks_exp[[2]],ks_log[[2]],ks_norm[[2]],ks_gamma[[2]],ks_weibul[[2]],ks_unif[[2]],ks_tri[[2]])
ks<-c(ks_exp[[1]],ks_log[[1]],ks_norm[[1]],ks_gamma[[1]],ks_weibul[[1]],ks_unif[[1]],ks_tri[[1]])

df<-data.frame(names,round(P_Value,3),round(ks,3))
colnames(df)<-c("distribution","P_Value","ks")
print(df)
}
}


displayEq<-function(mode){
  
expFit <- fitdist(data, "exp") 
logFit <- fitdist(data, "lnorm")
norFit <- fitdist(data, "norm")
gamFit <- fitdist(data, "gamma")
WeiFit <- fitdist(data, "weibull")
uniFit <- fitdist(data, "unif")
triFit <-  fitdist(data, "triang", method="mge", start = list(min=summary(data)[[1]],mode = summary(data)[[3]],max=summary(data)[[6]]),gof="KS")

if(mode=="log"){
print(paste0("Random.Lognormal(",round(logFit$estimate[[1]],3),",",round(logFit$estimate[[2]],3),")" ))
}else if(mode=="gamma"){
  print(paste0("Random.Gamma(",round(gamFit$estimate[[1]],3),",", round(gamFit$estimate[[2]],3),")"))
}else if(mode=="Weibull"){
  print(paste0("Random.Weibull(",round(WeiFit$estimate[[1]],3),",",round(WeiFit$estimate[[1]],3),")" ))
}else if(mode=="exp"){
  print(paste0("Random.Exponential(",round(summary(data)[[4]],3),")"))
}else if(mode=="unif"){
  print(paste0("Random.Uniform(",round(uniFit$estimate[[1]],3),",",round(uniFit$estimate[[2]],3),")"))
}else if(mode=="triang"){
  print(paste0("Random.Triangle(",round(triFit$estimate[[1]],3),",",round(triFit$estimate[[2]],3),",",round(triFit$estimate[[3]],3),")"))
}else if(mode=="norm"){
  print(paste0("Random.Normal(",round(norFit$estimate[[1]],3),",",round(norFit$estimate[[2]],3),")"))
}
}

testFitting(data,"plot")

## Goodness-of-fit statistics
##                              exponenetial  lognormal    normal      gamma    Weibull  uniform   triangle
## Kolmogorov-Smirnov statistic    0.1640203 0.05044292 0.1550512 0.07461236 0.07934941 0.474688 0.07609876
## Cramer-von Mises statistic      0.2021652 0.01825121 0.3280986 0.05174037 0.05847634 4.379079 0.06804379
## Anderson-Darling statistic      1.4234709 0.14142579 1.9697634 0.35230811 0.44537170      Inf        Inf
## 
## Goodness-of-fit criteria
##                                exponenetial lognormal   normal    gamma  Weibull uniform triangle
## Akaike's Information Criterion     294.1736  284.9146 314.1765 288.2076 290.3611      NA      Inf
## Bayesian Information Criterion     295.9113  288.3900 317.6518 291.6830 293.8364      NA      Inf

From the q-qplot we could visually determine that the lognormal fit produces the best goodness of fit of all the distributions. The generated data points are very close to the actual sample distribution trend. All other distributions seem to produce reliable results except the uniform distribution. We will test some of these distribution against the Null hypothesis using the k-test to find reliability of these fitted distributions.
.

Testing Distributions

testFitting(data,"test")

##   distribution P_Value    ks
## 1 exponenetial   0.187 0.164
## 2    lognormal   1.000 0.050
## 3       normal   0.239 0.155
## 4        gamma   0.960 0.075
## 5      Weibull   0.935 0.079
## 6      uniform   0.000 0.475
## 7     triangle   0.953 0.076

As expected, the p-value of 1 and ks of 0.05 shows that lognormal is the best fit followed by gamma, triangular and Weibull.

Simio Intervaltime

These are some simio functions utilizing the converged fitted parameters. I will choose the lognormal for my intervaltime for the given sample.

displayEq("log")

## [1] "Random.Lognormal(2.185,0.771)"

displayEq("gamma")

## [1] "Random.Gamma(1.855,0.156)"

displayEq("Weibull")

## [1] "Random.Weibull(1.348,1.348)"

Problem 2

Lets print a summary statistic of the data

importedData<- read.table("https://raw.githubusercontent.com/aliharb/Simio/master/Problem_Dataset_06_02.csv", header = TRUE)
data=importedData$Mydata

plotdist(data, histo = TRUE, demp = TRUE)

options(width=300)
descdist(data, boot = 500)

## summary statistics
## ------
## min:  3.8   max:  36.13 
## median:  9.14 
## mean:  10.89234 
## estimated sd:  6.364957 
## estimated skewness:  1.97635 
## estimated kurtosis:  7.933002

the p-value of .973 and ks of 0.068 shows that lognormal is the best fit followed by triangular and gamma.

options(width=250)
par(mfrow=c(2,2))
testFitting(data,"plot")

## Goodness-of-fit statistics
##                              exponenetial  lognormal    normal      gamma   Weibull   uniform   triangle
## Kolmogorov-Smirnov statistic    0.3144366 0.06764782 0.1589070 0.09941791 0.1140618 0.4853473 0.07030009
## Cramer-von Mises statistic      1.1473789 0.03973639 0.3826912 0.11444195 0.2008969 4.8461900 0.06495798
## Anderson-Darling statistic      5.9711731 0.26739444 2.2740826 0.71667740 1.3230284       Inf        Inf
## 
## Goodness-of-fit criteria
##                                exponenetial lognormal   normal    gamma  Weibull uniform triangle
## Akaike's Information Criterion     320.4776  282.6829 310.3453 288.3562 296.2684      NA      Inf
## Bayesian Information Criterion     322.3278  286.3832 314.0456 292.0565 299.9687      NA      Inf

testFitting(data,"test")

##   distribution P_Value    ks
## 1 exponenetial   0.000 0.314
## 2    lognormal   0.973 0.068
## 3       normal   0.167 0.159
## 4        gamma   0.704 0.099
## 5      Weibull   0.536 0.114
## 6      uniform   0.000 0.485
## 7     triangle   0.962 0.070

Based on the results lognormal produce the best fit with the smaales K followed by triangel and gamma.

I will choose lognormal for this distribution

displayEq("log")

## [1] "Random.Lognormal(2.258,0.491)"

displayEq("triang")

## [1] "Random.Triangle(2.248,5.057,23.681)"

displayEq("gamma")

## [1] "Random.Gamma(4.002,0.367)"

Problem 3

Lets print a summary statistic of the data

importedData<- read.table("https://raw.githubusercontent.com/aliharb/Simio/master/Problem_Dataset_06_03.csv", header = TRUE)
data=importedData$Mydata

plotdist(data, histo = TRUE, demp = TRUE)

options(width=300)
descdist(data, boot = 500)

## summary statistics
## ------
## min:  1   max:  8 
## median:  3 
## mean:  2.955556 
## estimated sd:  1.536755 
## estimated skewness:  1.02143 
## estimated kurtosis:  4.428947

the p-value of .973 and ks of 0.068 shows that lognormal is the best fit followed by triangular and gamma.

options(width=250)
par(mfrow=c(2,2))
testFitting(data,"plot")

## Goodness-of-fit statistics
##                              exponenetial lognormal    normal     gamma   Weibull   uniform  triangle
## Kolmogorov-Smirnov statistic    0.3361487 0.1658822 0.1797133 0.1512860 0.1533664 0.4380952 0.1444444
## Cramer-von Mises statistic      1.0714529 0.2198819 0.2403876 0.1892388 0.1880788 2.8294785 0.1895642
## Anderson-Darling statistic      5.6091745 1.3927199 1.4026455 1.1326605 1.0862931       Inf       Inf
## 
## Goodness-of-fit criteria
##                                exponenetial lognormal   normal    gamma  Weibull uniform triangle
## Akaike's Information Criterion     189.5318  160.9673 169.3638 160.2218 162.3428      NA      Inf
## Bayesian Information Criterion     191.3385  164.5806 172.9771 163.8352 165.9561      NA      Inf

testFitting(data,"test")

##   distribution P_Value    ks
## 1 exponenetial   0.000 0.336
## 2    lognormal   0.168 0.166
## 3       normal   0.109 0.180
## 4        gamma   0.254 0.151
## 5      Weibull   0.240 0.153
## 6      uniform   0.000 0.438
## 7     triangle   0.305 0.144

Based on the results, triangle produce the best fit with the highest p value and the smalest K followed by gamma and weilbull.

I will choose gamma for this distribution due to the distribution non-leanier behavior

displayEq("gamma")

## [1] "Random.Gamma(3.854,1.304)"

displayEq("Weibull ")
displayEq("triang")

## [1] "Random.Triangle(-0.386,2.272,6.753)"

Problem 4

Problem 5

Problem 8

aots <- data.frame(In_LB = c(0.0,0.5,1.0,1.5,2.0,3.0,4.0,5.0,7.5,10),
                 probDemand = c(0.05,0.07,0.09,0.11,0.15,0.25,0.10,0.09,0.06,0.03))

peas <- data.frame(In_LB = c(0,0.5,1.0,1.5,2.0,3.0),
                   probDemand = c(0.1,0.2,0.2,0.3,0.1,0.1))


beans <- data.frame(In_LB = c(0,1.0,3.0,4.5),
                    probDemand = c(0.2,0.4,0.3,0.1))


barley <- data.frame(In_LB  = c(0,0.5,1.0,3.5),
                     probDemand = c(0.2,0.4,0.3,0.1))
  
getDemand <- function(product,sampleSize) {
  cdf <- runif(sampleSize)
  sapply(cdf, function(x) product$In_LB[min(which(cumsum(product$probDemand) > x))])
}

testProduct<-function(Product,nboot,nsample){
mytest <- prop.table(table(getDemand(as.data.frame(Product), nsample)))
plotdist(as.numeric(mytest), histo = TRUE, demp = TRUE)
norFit <- fitdist(as.numeric(mytest), "norm")
(ks_norm <- ks.test(mytest, "pnorm",norFit$estimate[1],norFit$estimate[2]))
logFit <- fitdist(as.numeric(mytest), "lnorm")
(ks_log <- ks.test(as.numeric(mytest), "plnorm",logFit$estimate[1],logFit$estimate[2]))
}

testProduct(aots,500,1000)

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  as.numeric(mytest)
## D = 0.08447, p-value = 1
## alternative hypothesis: two-sided

testProduct(peas,500,1000)

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  as.numeric(mytest)
## D = 0.30644, p-value = 0.5279
## alternative hypothesis: two-sided

testProduct(barley,10,1000)

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  as.numeric(mytest)
## D = 0.2383, p-value = 0.9367
## alternative hypothesis: two-sided

mytest <- prop.table(table(getDemand(as.data.frame(beans), 1000)))
plotdist(as.numeric(mytest), histo = TRUE, demp = TRUE)

norFit <- fitdist(as.numeric(mytest), "norm")
(ks_norm <- ks.test(mytest, "pnorm",norFit$estimate[1],norFit$estimate[2]))

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  mytest
## D = 0.23115, p-value = 0.9512
## alternative hypothesis: two-sided

logFit <- fitdist(as.numeric(mytest), "lnorm")
(ks_log <- ks.test(as.numeric(mytest), "plnorm",logFit$estimate[1],logFit$estimate[2]))

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  as.numeric(mytest)
## D = 0.21463, p-value = 0.9752
## alternative hypothesis: two-sided

#       oats  peas  beans barly  
buy <- c(1.05, 3.17, 1.99, 0.95)
sell <- c(1.29, 3.76, 2.23, 1.65)

Products<- matrix(NA, nrow = 90, ncol = 4)
colnames(Products) <- c("oats", "peas", "beans", "barley")

ProductsCost<- matrix(NA, nrow = 1, ncol = 4)
colnames(ProductsCost) <- c("oatsCost", "peasCost", "beansCost", "barleyCost")

ProductsSell<- matrix(NA, nrow = 1, ncol = 4)
colnames(ProductsSell) <- c("oatsSell", "peasSell", "beansSell", "barleySell")

ProductsProfit<- matrix(NA, nrow = 1, ncol = 4)
colnames(ProductsProfit) <- c("oatsProfit", "peasProfit", "beansProfit", "barleyProfit")

Products[,1] <- getDemand(aots,90)
Products[,2] <- getDemand(peas,90)
Products[,3] <- getDemand(beans,90)
Products[,4] <- getDemand(barley,90)

sums<-colSums(as.data.frame(Products)) 

for(i in 1:4){
    ProductsCost[,i]<-sums[i]*buy[i]
    ProductsSell[,i]<-sums[i]*sell[i]
    ProductsProfit[,i]<-ProductsSell[,i]-ProductsCost[,i]
}

head(Products)

##      oats peas beans barley
## [1,]    2  0.0     1    3.5
## [2,]    3  0.0     1    0.0
## [3,]    3  3.0     3    3.5
## [4,]    3  1.5     0    1.0
## [5,]    4  0.5     0    1.0
## [6,]    1  2.0     1    1.0

tail(Products)

##       oats peas beans barley
## [85,]  3.0  3.0     0    0.0
## [86,]  3.0  1.0     1    3.5
## [87,]  1.5  0.5     3    0.0
## [88,]  3.0  1.5     0    1.0
## [89,]  1.0  1.5     1    1.0
## [90,]  3.0  1.0     1    0.0

getSessionPnl<-ProductsCost
colnames(getSessionPnl) <- c("oats", "peas", "beans", "barley")
getSessionPnl<-rbind(getSessionPnl,ProductsSell)
getSessionPnl<-rbind(getSessionPnl,ProductsProfit)
row.names(getSessionPnl)<-c("Cost","Sell","Profit")
ProductsRevenueProp<-(ProductsProfit/ProductsCost)*100

par(mfrow=c(2,2))
barplot(getSessionPnl[1,], main="Products Cost",col="blue")
barplot(getSessionPnl[2,], main="Products Sell",col="red")
barplot(getSessionPnl[3,], main="Products Profit",col="yellow")
barplot(ProductsRevenueProp, main="Products Revenue Propotrtion",col="green")

(totalOf90DaysProfit<-rowSums(ProductsProfit))

## [1] 215.425