The following plots present the two factor interactions for the combination of the factors studied (this case discarting the alcohol and drug lowest level). 





APPENDIX
Complete code is provided below:
#loading data from TSV file
crimedata <- read.delim("CrimeData.tsv")
str(crimedata)
#creating new dataframe with variables selected for this study
probcrime <- ifelse(crimedata$YEYATTAK==1, 0.368,
ifelse(crimedata$YEYATTAK ==2, 0.92,
ifelse(crimedata$YEYATTAK ==3, 0.996,
ifelse(crimedata$YEYATTAK ==4, 0.999,
ifelse(crimedata$YEYATTAK ==5, 1, NA)))))
crime1 = ifelse(crimedata$ANYCRIME==0, 0,
ifelse(crimedata$ANYCRIME ==1, 1,NA))
newdata <- data.frame(id = crimedata$CASEID, IRSEX = crimedata$IRSEX, CATAG2 = crimedata$CATAG2, AGEYOUNG= crimedata$AGE2, INCOME_R = crimedata$INCOME_R, EDU_DUMMY= crimedata$EDU_DUMMY, EDUC2 = crimedata$IREDUC2, CATAG2 = crimedata$CATAG2, BINGEHVY = crimedata$BINGEHVY, CDCGMO = crimedata$CDCGMO, CDNOCGMO = crimedata$CDNOCGMO, CDUFLAG = crimedata$CDUFLAG, SUMFLAG = crimedata$SUMFLAG, MJOFLAG = crimedata$MJOFLAG, IEMFLAG = crimedata$IEMFLAG, ANYCRIME = crime1, CRIME2 = probcrime, ARREST = crimedata$NUMARREST, DEPRESS = crimedata$YODEPRESSIONINDEX)
save(newdata,file="data2.RData")
load("data2.RData")
head(newdata)
tail(newdata)
summary(newdata$IRSEX)
summary(newdata$CATAG2)
summary(newdata$INCOME_R)
summary(newdata$EDU_DUMMY)
summary(newdata$IREDUC2)
summary(newdata$DEPRESSIONINDEX2)
tapply(newdata$CRIME2, newdata$IRSEX, summary)
tapply(newdata$ANYCRIME, newdata$IRSEX, summary)
tapply(newdata$CRIME2, newdata$EDU_DUMMY, summary)
tapply(newdata$ANYCRIME, newdata$EDU_DUMMY, summary)
tapply(newdata$DEPRESS, newdata$EDU_DUMMY, summary)
tapply(newdata$DEPRESS, newdata$IRSEX, summary)
gen_levels <- c(male = 1, female = 2)
age_levels <- c(child = 1, young = 2)
edu_levels <- c(elem = 1, hs = 2)
alc_levels <- c(none = 1, low =2, high=3)
drug_levels <- c(none = 1, low =2, high=3)
cig_levels <- c(no=1, yes=2)
age_rel<- ifelse(newdata$AGEYOUNG==1, 1,
ifelse(newdata$AGEYOUNG==2, 1,
ifelse(newdata$AGEYOUNG==3, 1, 2)))
edu_rel<- ifelse(newdata$EDUC2==1, 1, 2)
#cigarrel<- ifelse(newdata$CDCGMO==1, 1, ifelse(newdata$CDNOCGMO==1,2,NA))
cigarrel<- ifelse(newdata$CDUFLAG==0, 1, 2)
alc_rel<- ifelse(newdata$BINGEHVY==4, 1,
ifelse(newdata$BINGEHVY==1, 3, 2))
drugrel<- ifelse(newdata$SUMFLAG==0, 1,
ifelse(newdata$MJOFLAG==1, 2, 3))
studydata <- data.frame(gender = (gender = factor(newdata$IRSEX, levels = gen_levels, labels = names(gen_levels))),
age = (age = factor(age_rel, levels = age_levels, labels = names(age_levels))),
educ = (educ = factor(edu_rel, levels = edu_levels, labels = names(edu_levels))),
cigar = (cigar = factor(cigarrel, levels = cig_levels, labels = names(cig_levels))),
alcohol = (alcohol = factor(alc_rel, levels = alc_levels, labels = names(alc_levels))),
druguse= (druguse = factor(drugrel, levels = drug_levels, labels = names(drug_levels))),
crime = newdata$ANYCRIME, prcrime=newdata$CRIME2, arrests = newdata$ARREST, depression = newdata$DEPRESS)
#studydata <- data.frame(gender = (gender = factor(newdata$IRSEX, levels = gen_levels, labels = names(gen_levels))),
# age = (age = factor(newdata$CATAG2, levels = age_levels, labels = names(age_levels))),
# alcohol = (alcohol = factor(alc_rel, levels = alc_levels, labels = names(alc_levels))),
# druguse= (druguse = factor(drugrel, levels = drug_levels, labels = names(drug_levels))),
# crime = newdata$ANYCRIME, prcrime=newdata$CRIME2)
data2proj3 = subset(studydata, studydata$depression>=0)
save(data2proj3,file="data3.RData")
load("data3.RData")
#Data Analysis
hist(data2proj3$depression, main = "Depression Index of the Youth Population", xlab= "Level of Depression (low 0 - high 9)")
boxplot(data2proj3$depression ~ data2proj3$alcohol, main = "Depression Index based on Alcohol", xlab= "Level of Alcohol Use", ylab ="depression index")
boxplot(data2proj3$depression ~ data2proj3$druguse, main = "Depression Index based on Drug Use", xlab= "Level of Drug Use", ylab ="depression index")
boxplot(data2proj3$depression ~ data2proj3$gender, main = "Depression Index based on Gender", xlab= "Gender", ylab ="depression index")
boxplot(data2proj3$depression ~ data2proj3$age, main = "Depression Index based on Age", xlab= "Age of Youth", ylab ="depression index")
boxplot(data2proj3$depression ~ data2proj3$educ , main = "Depression Index based on Education", xlab= "Education", ylab ="depression index")
boxplot(data2proj3$depression ~ data2proj3$cigar, main = "Depression Index based on Cigar Use", xlab= "Cigar Daily Use?", ylab ="depression index")
#Generating Design for 2^k design
proj3 <- data.frame(depression = data2proj3$depression, gender = data2proj3$gender, cigardaily = data2proj3$cigar, alcohol = data2proj3$alcohol, drugs = data2proj3$druguse, low_alc = NA, high_alc = NA, low_drug = NA, high_drug = NA)
head(proj3)
proj3$low_alc[proj3$alcohol == "none"] <- 0
proj3$low_alc[proj3$alcohol == "low"] <- 1
proj3$low_alc[proj3$alcohol == "high"] <- 0
proj3$high_alc[proj3$alcohol == "none"] <- 0
proj3$high_alc[proj3$alcohol == "low"] <- 0
proj3$high_alc[proj3$alcohol == "high"] <- 1
proj3$low_drug[proj3$drugs == "none"] <- 0
proj3$low_drug[proj3$drugs == "low"] <- 1
proj3$low_drug[proj3$drugs == "high"] <- 0
proj3$high_drug[proj3$drugs == "none"] <- 0
proj3$high_drug[proj3$drugs == "low"] <- 0
proj3$high_drug[proj3$drugs == "high"] <- 1
head(proj3)
#Full Factorial Design
expand.grid(gender = c(0,1), cigardaily = c(0,1), low_alc = c(0,1), high_alc = c(0,1), low_drug = c(0,1), high_drug = c(0,1))
#Fractional Factorial Design
library(FrF2)
runs <- 2^(6-3)
nam2 <- c("gender","cigardaily", "low_alc", "high_alc", "low_drug", "high_drug")
frac_design <- FrF2(runs, factor.names = nam2 , default.levels = c("0","1"))
summary(frac_design)
frac1 <-data.frame(frac_design)
#Aliasing Structure
aliasprint(frac_design)
frac2_design <- FrF2(runs, factor.names = c("A","B","C","D","E","F"), default.levels = c("-1","1"))
frac2 <-data.frame(frac2_design)
frac2["I"] = 1
#assigning response variable
run1 <- subset(proj3, (proj3$gender== 'female' & proj3$cigardaily== 'yes' & proj3$low_alc==0 & proj3$low_drug==0))
run2 <- subset(proj3, (proj3$gender== 'male' & proj3$cigardaily== 'no' & proj3$low_alc==0 & proj3$low_drug==1))
run3 <- subset(proj3, (proj3$gender== 'male' & proj3$cigardaily== 'no' & proj3$low_alc==1 & proj3$low_drug==0))
run4 <- subset(proj3, (proj3$gender== 'male' & proj3$cigardaily== 'yes' & proj3$low_alc==0 & proj3$low_drug==1))
run5 <- subset(proj3, (proj3$gender== 'male' & proj3$cigardaily== 'yes' & proj3$low_alc==1 & proj3$low_drug==0))
run6 <- subset(proj3, (proj3$gender== 'female' & proj3$cigardaily== 'no' & proj3$low_alc==1 & proj3$low_drug==1))
run7 <- subset(proj3, (proj3$gender== 'female' & proj3$cigardaily== 'no' & proj3$low_alc==0 & proj3$low_drug==0))
run8 <- subset(proj3, (proj3$gender== 'female' & proj3$cigardaily== 'yes' & proj3$low_alc==1 & proj3$low_drug==1))
rv <- cbind(sample(run1$depression,1),sample(run2$depression,1),sample(run3$depression,1),sample(run4$depression,1),sample(run5$depression,1),sample(run6$depression,1),sample(run7$depression,1),sample(run8$depression,1))
frac2["RV"]=NA
for (i in 1:8){ frac2$RV[i] = rv[i] }
rand_fd <- frac2[sample(nrow(frac2)),]
rand_fd
me_A <- 1.875
me_A
me_B <- 0.875
me_B
me_C <- -1.125
me_C
me_D <- -0.875
me_D
me_E <- 0.625
me_E
me_F <- 0.375
me_F
fit <- lm(proj3$depression ~ (proj3$gender + proj3$cigardaily + proj3$low_alc + proj3$high_alc + proj3$low_drug + proj3$high_drug)^2)
summary(fit)
anova(fit)
qqnorm(residuals(fit))
qqline(residuals(fit))
fit2 <- lm(proj3$depression ~ (proj3$gender + proj3$cigardaily + proj3$high_alc + proj3$high_drug)^2)
summary(fit2)
anova(fit2)
#Main effects
me_gender <- mean(subset(proj3$depression, proj3$gender== "female")) - mean(subset(proj3$depression, proj3$gender == "male"))
me_gender
me_cigar <- mean(subset(proj3$depression, proj3$cigardaily== "yes")) - mean(subset(proj3$depression, proj3$cigardaily == "no"))
me_cigar
me_lowalc <- mean(subset(proj3$depression, proj3$low_alc== 1)) - mean(subset(proj3$depression, proj3$low_alc== 0))
me_lowalc
me_highalc <- mean(subset(proj3$depression, proj3$high_alc== 1)) - mean(subset(proj3$depression, proj3$high_alc== 0))
me_highalc
me_lowdrug <- mean(subset(proj3$depression, proj3$low_drug== 1)) - mean(subset(proj3$depression, proj3$low_drug== 0))
me_lowdrug
me_highdrug <- mean(subset(proj3$depression, proj3$high_drug== 1)) - mean(subset(proj3$depression, proj3$high_drug== 0))
me_highdrug
#Interaction Plots
interaction.plot(proj3$gender, proj3$cigardaily, proj3$depression)
interaction.plot(proj3$gender, proj3$high_alc, proj3$depression)
interaction.plot (proj3$gender, proj3$high_drug, proj3$depression)
interaction.plot(proj3$cigardaily, proj3$high_alc, proj3$depression)
interaction.plot(proj3$cigardaily, proj3$high_drug, proj3$depression)
interaction.plot(proj3$high_alc, proj3$high_drug, proj3$depression)