require(ggthemes)
library(tidyverse)
library(magrittr)
library(TTR)
library(tidyr)
library(dplyr)
library(ggplot2)
library(plotly)
library(fpp2)
library(caTools)
library(reshape2)
library(psych)
require(graphics)
library(fBasics)
library(caret)
library(gridExtra)
library(DAAG)
library(rpart)
library(randomForest)
library(data.table)
library(mice)
library(MASS)
library(kknn)
Attribute Information[1]:
Reference 1: http://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+%28Original%29 )
# Read a txt file
df<-read.table("breast-cancer-wisconsin.data.txt", stringsAsFactor = FALSE, header = F, sep = ",", na.strings="?")
head(df,2)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11
## 1 1000025 5 1 1 1 2 1 3 1 1 2
## 2 1002945 5 4 4 5 7 10 3 2 1 2
# Prescribing colnames to the data
colnames(df) <- c("ID", "Clump_Thickness", "Uniform_Cell_Size", "Uniform_Cell_Shape",
"Marg_Adhesion", "Single_Epith_Cell_Size", "Bare_Nuclei", "Bland_Chromatin",
"Normal_Nucleoli", "Mitoses", "Class")
df$Class <- as.factor(df$Class)
levels(df$Class) <- c(0, 1)
summary(df)
## ID Clump_Thickness Uniform_Cell_Size Uniform_Cell_Shape
## Min. : 61634 Min. : 1.000 Min. : 1.000 Min. : 1.000
## 1st Qu.: 870688 1st Qu.: 2.000 1st Qu.: 1.000 1st Qu.: 1.000
## Median : 1171710 Median : 4.000 Median : 1.000 Median : 1.000
## Mean : 1071704 Mean : 4.418 Mean : 3.134 Mean : 3.207
## 3rd Qu.: 1238298 3rd Qu.: 6.000 3rd Qu.: 5.000 3rd Qu.: 5.000
## Max. :13454352 Max. :10.000 Max. :10.000 Max. :10.000
##
## Marg_Adhesion Single_Epith_Cell_Size Bare_Nuclei Bland_Chromatin
## Min. : 1.000 Min. : 1.000 Min. : 1.000 Min. : 1.000
## 1st Qu.: 1.000 1st Qu.: 2.000 1st Qu.: 1.000 1st Qu.: 2.000
## Median : 1.000 Median : 2.000 Median : 1.000 Median : 3.000
## Mean : 2.807 Mean : 3.216 Mean : 3.545 Mean : 3.438
## 3rd Qu.: 4.000 3rd Qu.: 4.000 3rd Qu.: 6.000 3rd Qu.: 5.000
## Max. :10.000 Max. :10.000 Max. :10.000 Max. :10.000
## NA's :16
## Normal_Nucleoli Mitoses Class
## Min. : 1.000 Min. : 1.000 0:458
## 1st Qu.: 1.000 1st Qu.: 1.000 1:241
## Median : 1.000 Median : 1.000
## Mean : 2.867 Mean : 1.589
## 3rd Qu.: 4.000 3rd Qu.: 1.000
## Max. :10.000 Max. :10.000
##
#which column contains the missing data
df[is.na(df$Bare_Nuclei),]
## ID Clump_Thickness Uniform_Cell_Size Uniform_Cell_Shape Marg_Adhesion
## 24 1057013 8 4 5 1
## 41 1096800 6 6 6 9
## 140 1183246 1 1 1 1
## 146 1184840 1 1 3 1
## 159 1193683 1 1 2 1
## 165 1197510 5 1 1 1
## 236 1241232 3 1 4 1
## 250 169356 3 1 1 1
## 276 432809 3 1 3 1
## 293 563649 8 8 8 1
## 295 606140 1 1 1 1
## 298 61634 5 4 3 1
## 316 704168 4 6 5 6
## 322 733639 3 1 1 1
## 412 1238464 1 1 1 1
## 618 1057067 1 1 1 1
## Single_Epith_Cell_Size Bare_Nuclei Bland_Chromatin Normal_Nucleoli Mitoses
## 24 2 NA 7 3 1
## 41 6 NA 7 8 1
## 140 1 NA 2 1 1
## 146 2 NA 2 1 1
## 159 3 NA 1 1 1
## 165 2 NA 3 1 1
## 236 2 NA 3 1 1
## 250 2 NA 3 1 1
## 276 2 NA 2 1 1
## 293 2 NA 6 10 1
## 295 2 NA 2 1 1
## 298 2 NA 2 3 1
## 316 7 NA 4 9 1
## 322 2 NA 3 1 1
## 412 1 NA 2 1 1
## 618 1 NA 1 1 1
## Class
## 24 1
## 41 0
## 140 0
## 146 0
## 159 0
## 165 0
## 236 0
## 250 0
## 276 0
## 293 1
## 295 0
## 298 0
## 316 0
## 322 0
## 412 0
## 618 0
#check for % of missing observation (threshold < 5%)
print(sprintf("Percent of missing observation = %0.3f", 16/nrow(df)*100))
## [1] "Percent of missing observation = 2.289"
Observation1:
#mean imputation
df.mean<-df
df.mean<-df.mean %>% mutate_at(vars(Bare_Nuclei),~ifelse(is.na(.x), mean(.x, na.rm = TRUE), .x))
#check it was imputed correctly
head(df.mean,24)
## ID Clump_Thickness Uniform_Cell_Size Uniform_Cell_Shape Marg_Adhesion
## 1 1000025 5 1 1 1
## 2 1002945 5 4 4 5
## 3 1015425 3 1 1 1
## 4 1016277 6 8 8 1
## 5 1017023 4 1 1 3
## 6 1017122 8 10 10 8
## 7 1018099 1 1 1 1
## 8 1018561 2 1 2 1
## 9 1033078 2 1 1 1
## 10 1033078 4 2 1 1
## 11 1035283 1 1 1 1
## 12 1036172 2 1 1 1
## 13 1041801 5 3 3 3
## 14 1043999 1 1 1 1
## 15 1044572 8 7 5 10
## 16 1047630 7 4 6 4
## 17 1048672 4 1 1 1
## 18 1049815 4 1 1 1
## 19 1050670 10 7 7 6
## 20 1050718 6 1 1 1
## 21 1054590 7 3 2 10
## 22 1054593 10 5 5 3
## 23 1056784 3 1 1 1
## 24 1057013 8 4 5 1
## Single_Epith_Cell_Size Bare_Nuclei Bland_Chromatin Normal_Nucleoli Mitoses
## 1 2 1.000000 3 1 1
## 2 7 10.000000 3 2 1
## 3 2 2.000000 3 1 1
## 4 3 4.000000 3 7 1
## 5 2 1.000000 3 1 1
## 6 7 10.000000 9 7 1
## 7 2 10.000000 3 1 1
## 8 2 1.000000 3 1 1
## 9 2 1.000000 1 1 5
## 10 2 1.000000 2 1 1
## 11 1 1.000000 3 1 1
## 12 2 1.000000 2 1 1
## 13 2 3.000000 4 4 1
## 14 2 3.000000 3 1 1
## 15 7 9.000000 5 5 4
## 16 6 1.000000 4 3 1
## 17 2 1.000000 2 1 1
## 18 2 1.000000 3 1 1
## 19 4 10.000000 4 1 2
## 20 2 1.000000 3 1 1
## 21 5 10.000000 5 4 4
## 22 6 7.000000 7 10 1
## 23 2 1.000000 2 1 1
## 24 2 3.544656 7 3 1
## Class
## 1 0
## 2 0
## 3 0
## 4 0
## 5 0
## 6 1
## 7 0
## 8 0
## 9 0
## 10 0
## 11 0
## 12 0
## 13 1
## 14 0
## 15 1
## 16 1
## 17 0
## 18 0
## 19 1
## 20 0
## 21 1
## 22 1
## 23 0
## 24 1
#double check if mean was calculated correctly
mean(df.mean$Bare_Nuclei)
## [1] 3.544656
#found this mode function in the internet
getmode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
df.mode<-df
mode.result <- getmode(df.mode$Bare_Nuclei)
print(mode.result)
## [1] 1
#fill NA with Mode of 1s
df.mode$Bare_Nuclei[is.na(df.mode$Bare_Nuclei)] <- mode.result
#check it was imputed correctly
head(df.mode,24)
## ID Clump_Thickness Uniform_Cell_Size Uniform_Cell_Shape Marg_Adhesion
## 1 1000025 5 1 1 1
## 2 1002945 5 4 4 5
## 3 1015425 3 1 1 1
## 4 1016277 6 8 8 1
## 5 1017023 4 1 1 3
## 6 1017122 8 10 10 8
## 7 1018099 1 1 1 1
## 8 1018561 2 1 2 1
## 9 1033078 2 1 1 1
## 10 1033078 4 2 1 1
## 11 1035283 1 1 1 1
## 12 1036172 2 1 1 1
## 13 1041801 5 3 3 3
## 14 1043999 1 1 1 1
## 15 1044572 8 7 5 10
## 16 1047630 7 4 6 4
## 17 1048672 4 1 1 1
## 18 1049815 4 1 1 1
## 19 1050670 10 7 7 6
## 20 1050718 6 1 1 1
## 21 1054590 7 3 2 10
## 22 1054593 10 5 5 3
## 23 1056784 3 1 1 1
## 24 1057013 8 4 5 1
## Single_Epith_Cell_Size Bare_Nuclei Bland_Chromatin Normal_Nucleoli Mitoses
## 1 2 1 3 1 1
## 2 7 10 3 2 1
## 3 2 2 3 1 1
## 4 3 4 3 7 1
## 5 2 1 3 1 1
## 6 7 10 9 7 1
## 7 2 10 3 1 1
## 8 2 1 3 1 1
## 9 2 1 1 1 5
## 10 2 1 2 1 1
## 11 1 1 3 1 1
## 12 2 1 2 1 1
## 13 2 3 4 4 1
## 14 2 3 3 1 1
## 15 7 9 5 5 4
## 16 6 1 4 3 1
## 17 2 1 2 1 1
## 18 2 1 3 1 1
## 19 4 10 4 1 2
## 20 2 1 3 1 1
## 21 5 10 5 4 4
## 22 6 7 7 10 1
## 23 2 1 2 1 1
## 24 2 1 7 3 1
## Class
## 1 0
## 2 0
## 3 0
## 4 0
## 5 0
## 6 1
## 7 0
## 8 0
## 9 0
## 10 0
## 11 0
## 12 0
## 13 1
## 14 0
## 15 1
## 16 1
## 17 0
## 18 0
## 19 1
## 20 0
## 21 1
## 22 1
## 23 0
## 24 1
set.seed(123)
newdata<-df
missing.index<-which(is.na(newdata$Bare_Nuclei), arr.ind=TRUE)
newdata.1 <- newdata[-missing.index,2:10]# all other predictors data points except for the missing value and response variable
#Linear Model
model <- lm(Bare_Nuclei~Clump_Thickness+Uniform_Cell_Size+Uniform_Cell_Shape+Marg_Adhesion+Single_Epith_Cell_Size+Bland_Chromatin+Normal_Nucleoli+Mitoses,data=newdata.1 )
summary(model)
##
## Call:
## lm(formula = Bare_Nuclei ~ Clump_Thickness + Uniform_Cell_Size +
## Uniform_Cell_Shape + Marg_Adhesion + Single_Epith_Cell_Size +
## Bland_Chromatin + Normal_Nucleoli + Mitoses, data = newdata.1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.7316 -0.9426 -0.3002 0.6725 8.6998
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.616652 0.194975 -3.163 0.00163 **
## Clump_Thickness 0.230156 0.041691 5.521 4.83e-08 ***
## Uniform_Cell_Size -0.067980 0.076170 -0.892 0.37246
## Uniform_Cell_Shape 0.340442 0.073420 4.637 4.25e-06 ***
## Marg_Adhesion 0.339705 0.045919 7.398 4.13e-13 ***
## Single_Epith_Cell_Size 0.090392 0.062541 1.445 0.14883
## Bland_Chromatin 0.320577 0.059047 5.429 7.91e-08 ***
## Normal_Nucleoli 0.007293 0.044486 0.164 0.86983
## Mitoses -0.075230 0.059331 -1.268 0.20524
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.274 on 674 degrees of freedom
## Multiple R-squared: 0.615, Adjusted R-squared: 0.6104
## F-statistic: 134.6 on 8 and 674 DF, p-value: < 2.2e-16
# Fit the full model
full.model <- model
# Stepwise regression model
step.model <- stepAIC(full.model, direction = "both",
trace = FALSE)
summary(step.model)
##
## Call:
## lm(formula = Bare_Nuclei ~ Clump_Thickness + Uniform_Cell_Shape +
## Marg_Adhesion + Bland_Chromatin, data = newdata.1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.8115 -0.9531 -0.3111 0.6678 8.6889
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.53601 0.17514 -3.060 0.0023 **
## Clump_Thickness 0.22617 0.04121 5.488 5.75e-08 ***
## Uniform_Cell_Shape 0.31729 0.05086 6.239 7.76e-10 ***
## Marg_Adhesion 0.33227 0.04431 7.499 2.03e-13 ***
## Bland_Chromatin 0.32378 0.05606 5.775 1.17e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.274 on 678 degrees of freedom
## Multiple R-squared: 0.6129, Adjusted R-squared: 0.6107
## F-statistic: 268.4 on 4 and 678 DF, p-value: < 2.2e-16
Observation2:
# Set seed for reproducibility
set.seed(123)
# Set up repeated k-fold cross-validation
train.control <- trainControl(method = "cv", number = 10)
# Train the model
step.model <- train(Bare_Nuclei ~., data = newdata.1 ,
method = "leapBackward",
tuneGrid = data.frame(nvmax = 1:4),
trControl = train.control
)
step.model$results
## nvmax RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 2.548924 0.5228085 1.765516 0.2208983 0.08742394 0.1577356
## 2 2 2.432898 0.5624336 1.642851 0.2839852 0.10115619 0.1857652
## 3 3 2.362419 0.5900746 1.565549 0.2804584 0.09463731 0.1827845
## 4 4 2.278984 0.6185545 1.534310 0.2734543 0.08969728 0.1880005
step.model$bestTune
## nvmax
## 4 4
Observation3:
CV indicated the smaller model with the following predictors had the lowest RSME of 2.278 and highest R-Square of 0.618
The best model were a 4-predictors model: Clump_Thickness+Uniform_Cell_Shape+Marg_Adhesion+Bland_Chromatin
predicted.missing<-predict(step.model,newdata=df[missing.index,])
df.final.regression<-df
df.final.regression[missing.index,]$Bare_Nuclei<-as.integer(predicted.missing)#make predicted values integers
#final data with imputed regressed values
head(df.final.regression,24)
## ID Clump_Thickness Uniform_Cell_Size Uniform_Cell_Shape Marg_Adhesion
## 1 1000025 5 1 1 1
## 2 1002945 5 4 4 5
## 3 1015425 3 1 1 1
## 4 1016277 6 8 8 1
## 5 1017023 4 1 1 3
## 6 1017122 8 10 10 8
## 7 1018099 1 1 1 1
## 8 1018561 2 1 2 1
## 9 1033078 2 1 1 1
## 10 1033078 4 2 1 1
## 11 1035283 1 1 1 1
## 12 1036172 2 1 1 1
## 13 1041801 5 3 3 3
## 14 1043999 1 1 1 1
## 15 1044572 8 7 5 10
## 16 1047630 7 4 6 4
## 17 1048672 4 1 1 1
## 18 1049815 4 1 1 1
## 19 1050670 10 7 7 6
## 20 1050718 6 1 1 1
## 21 1054590 7 3 2 10
## 22 1054593 10 5 5 3
## 23 1056784 3 1 1 1
## 24 1057013 8 4 5 1
## Single_Epith_Cell_Size Bare_Nuclei Bland_Chromatin Normal_Nucleoli Mitoses
## 1 2 1 3 1 1
## 2 7 10 3 2 1
## 3 2 2 3 1 1
## 4 3 4 3 7 1
## 5 2 1 3 1 1
## 6 7 10 9 7 1
## 7 2 10 3 1 1
## 8 2 1 3 1 1
## 9 2 1 1 1 5
## 10 2 1 2 1 1
## 11 1 1 3 1 1
## 12 2 1 2 1 1
## 13 2 3 4 4 1
## 14 2 3 3 1 1
## 15 7 9 5 5 4
## 16 6 1 4 3 1
## 17 2 1 2 1 1
## 18 2 1 3 1 1
## 19 4 10 4 1 2
## 20 2 1 3 1 1
## 21 5 10 5 4 4
## 22 6 7 7 10 1
## 23 2 1 2 1 1
## 24 2 5 7 3 1
## Class
## 1 0
## 2 0
## 3 0
## 4 0
## 5 0
## 6 1
## 7 0
## 8 0
## 9 0
## 10 0
## 11 0
## 12 0
## 13 1
## 14 0
## 15 1
## 16 1
## 17 0
## 18 0
## 19 1
## 20 0
## 21 1
## 22 1
## 23 0
## 24 1
set.seed(123)
n <- rnorm(16, mean = predicted.missing, sd = sd(predicted.missing))#generate 16 random numbers based off missing predicted values
n
## [1] 4.2231494 7.4742594 4.4229474 1.7772688 1.2657577 5.9960455
## [7] 3.7312046 -1.0250090 0.5602511 5.1042934 3.6853723 3.3196246
## [13] 6.1272047 2.0073698 -0.2378873 4.6021659
#bounding the negative numbers to positive only
abs(n)
## [1] 4.2231494 7.4742594 4.4229474 1.7772688 1.2657577 5.9960455 3.7312046
## [8] 1.0250090 0.5602511 5.1042934 3.6853723 3.3196246 6.1272047 2.0073698
## [15] 0.2378873 4.6021659
df.final.regression.pertubed<-df
df.final.regression.pertubed[missing.index,]$Bare_Nuclei<-as.integer(abs(n))#make predicted values integers
#final data with imputed perturbed regressed values
head(df.final.regression.pertubed,24)
## ID Clump_Thickness Uniform_Cell_Size Uniform_Cell_Shape Marg_Adhesion
## 1 1000025 5 1 1 1
## 2 1002945 5 4 4 5
## 3 1015425 3 1 1 1
## 4 1016277 6 8 8 1
## 5 1017023 4 1 1 3
## 6 1017122 8 10 10 8
## 7 1018099 1 1 1 1
## 8 1018561 2 1 2 1
## 9 1033078 2 1 1 1
## 10 1033078 4 2 1 1
## 11 1035283 1 1 1 1
## 12 1036172 2 1 1 1
## 13 1041801 5 3 3 3
## 14 1043999 1 1 1 1
## 15 1044572 8 7 5 10
## 16 1047630 7 4 6 4
## 17 1048672 4 1 1 1
## 18 1049815 4 1 1 1
## 19 1050670 10 7 7 6
## 20 1050718 6 1 1 1
## 21 1054590 7 3 2 10
## 22 1054593 10 5 5 3
## 23 1056784 3 1 1 1
## 24 1057013 8 4 5 1
## Single_Epith_Cell_Size Bare_Nuclei Bland_Chromatin Normal_Nucleoli Mitoses
## 1 2 1 3 1 1
## 2 7 10 3 2 1
## 3 2 2 3 1 1
## 4 3 4 3 7 1
## 5 2 1 3 1 1
## 6 7 10 9 7 1
## 7 2 10 3 1 1
## 8 2 1 3 1 1
## 9 2 1 1 1 5
## 10 2 1 2 1 1
## 11 1 1 3 1 1
## 12 2 1 2 1 1
## 13 2 3 4 4 1
## 14 2 3 3 1 1
## 15 7 9 5 5 4
## 16 6 1 4 3 1
## 17 2 1 2 1 1
## 18 2 1 3 1 1
## 19 4 10 4 1 2
## 20 2 1 3 1 1
## 21 5 10 5 4 4
## 22 6 7 7 10 1
## 23 2 1 2 1 1
## 24 2 4 7 3 1
## Class
## 1 0
## 2 0
## 3 0
## 4 0
## 5 0
## 6 1
## 7 0
## 8 0
## 9 0
## 10 0
## 11 0
## 12 0
## 13 1
## 14 0
## 15 1
## 16 1
## 17 0
## 18 0
## 19 1
## 20 0
## 21 1
## 22 1
## 23 0
## 24 1
df1<-read.table("breast-cancer-wisconsin.data.txt", stringsAsFactor = FALSE, header = F, sep = ",", na.strings="?")
head(df1,2)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11
## 1 1000025 5 1 1 1 2 1 3 1 1 2
## 2 1002945 5 4 4 5 7 10 3 2 1 2
ctrl <- trainControl(method="repeatedcv",number=10,repeats = 3)
knn.mean <- train(x=df.mean[,1:10],y=as.factor(df.mean[,11]), method = "knn", trControl = ctrl,
preProcess = c("center","scale"), tuneLength = 10)
plot(knn.mean,col = "dark red",lwd=5,lty=2,cex.lab=1.25,cex.main=1.5,
main="Mean Imputed Dataset: Accuracy of kNN with repeated 10-fold CV",
xlab="Number of neighbors",
ylab="Accuracy of classification")
knn.regressed <- train(x=df.final.regression[,1:10],y=as.factor(df.final.regression[,11]), method = "knn", trControl = ctrl,
preProcess = c("center","scale"), tuneLength = 10)
plot(knn.regressed,col = "dark red",lwd=5,lty=2,cex.lab=1.25,cex.main=1.5,
main="Regression Imputed Dataset: Accuracy of kNN with repeated 10-fold CV",
xlab="Number of neighbors",
ylab="Accuracy of classification")
knn.perturbreg <- train(x=df.final.regression.pertubed[,1:10],y=as.factor(df.final.regression.pertubed[,11]), method = "knn", trControl = ctrl,
preProcess = c("center","scale"), tuneLength = 10)
plot(x=knn.perturbreg,col = "dark red",lwd=5,lty=2,cex.lab=1.25,cex.main=1.5,
main="Regression with Perturbation: Accuracy of kNN with repeated 10-fold CV",
xlab="Number of neighbors",
ylab="Accuracy of classification")
df1.removed<-na.omit(df1)
knn.removed <- train(x=df1.removed[,1:10],y=as.factor(df1.removed[,11]), method = "knn", trControl = ctrl,
preProcess = c("center","scale"), tuneLength = 10)
plot(knn.removed,col = "dark red",lwd=5,lty=2,cex.lab=1.25,cex.main=1.5,
main="Missing values removed: Accuracy of kNN with repeated 10-fold CV",
xlab="Number of neighbors",
ylab="Accuracy of classification")
Observation4:
On average the KNN accuracies were quite high for different approaches; a little over 90% overall
However, the number of K neighbors were different using different approaches:
Mean approach resulted in 5 K neighbors
Regular regression approach resulted in 20 K neighbors
Perturbed regression approach resulted in 19 K neighbors
Missing approach resulted in 19 K neighbors
- The column that had the 16 missing values was in the Bare_Nuclei column
- The missing value is well below the threshold of 5% so imputation is OK (missing only 2.3% of entire data set)
- The mean of Bare Nuclei was 3.54
- A first pass linear regression using all the other predictors resulted in multiple insignificant predictors
- Using Stepwise in both directions reduced the no. of predictors with RSME of 2.278 and R-Square of 0.618
- Imputation of mean, and missing along with and without pertubation regression were then compared using KNN. The results showed there wasn't much change in accuracy results as it was all in the high 90% range using KNN neighbors of especially with both regular regression and pertubed regression; around 20 K neighbors. The only exception was KNN in mean imputation which had an optimal results with lower K of 5 neighbors.
- Based on the above observations (although small sample size), mean imputation seems to be the better approach as its much simpler with only 5 K neighbours and above all its simple to implement. The key take away is since the missing values were really small relative to the entire data, using easy and simple approach would suffice. Simple is always wins out!
Question 15.1
Describe a situation or problem from your job, everyday life, current events, etc., for which optimization would be appropriate. What data would you need?
Capital Budgeting for in a company. How much and where to allocate scarce resources to capital projects. For example, Optimization (maximize) Net Income of all the projects that can be undertaken given the amount of capital dollars available (constraints).
Decision Variables: Each project will have a max and min variables as to how much capital dollars it can absorb. These decision variables are decided at the local department levels by the project manager who knows how much he/she need to accomplish their tasks. They take into considerations, not only capital dollars needed but also the amount of time, human resources they have available to them in order to fulfill their projects
Constraints: There are 2 sets of constraints: local constraints for the decision variables and a global constraints. Local constraints are ranges that each decision variables can take while global is to define and set limits on the feasible search space of the objective function. Example, total amount of capital dollars, minimum amount time etc..
Objective function:
Maximization of revenues or Net Income or Minimization of expenses