attach('ALLsfilt.RData')
n <- ncol(ALLsfilt)
nr <- nrow(ALLsfilt)
dev.off
## function (which = dev.cur()) 
## {
##     if (which == 1) 
##         stop("cannot shut down device 1 (the null device)")
##     .External(C_devoff, as.integer(which))
##     dev.cur()
## }
## <bytecode: 0x7fbc3d801d80>
## <environment: namespace:grDevices>
pvals4 = apply(ALLsfilt, 1, function(x) lm(x~colnames(ALLsfilt)+mol.biol))

Is this an experimental or observational study?

- Is an observational study

Comment on the features of this distribution

Give a list of differentially expressed genes for which you estimate that less than 20% of the gene list are false positives.

What multiple testing correction would you do to ensure this?

- I would Adjust the p-values

Would a Bonferroni correction be appropriate- why or why not?

# increase the amoun of false negatives and ultimately not help to reject the null hypothesis
# It is better to rely more on ANOVA

# QUESTION 2
staph <- read.delim("~/Desktop/Victor/Victor/R-2017/staph.txt")
head(staph)
##   Incubator A_level B_level  DENSITY
## 1         1       1       1 2.47e+07
## 2         1       1       2 3.81e+07
## 3         1       1       3 3.05e+08
## 4         1       2       1 1.22e+07
## 5         1       2       2 8.93e+07
## 6         1       2       3 1.54e+09
staph$Incubator <- as.factor(staph$Incubator)
staph$A_level <- as.factor(staph$A_level)
staph$B_level <- as.factor(staph$B_level)
attach(staph)
str(staph)
## 'data.frame':    48 obs. of  4 variables:
##  $ Incubator: Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
##  $ A_level  : Factor w/ 4 levels "1","2","3","4": 1 1 1 2 2 2 3 3 3 4 ...
##  $ B_level  : Factor w/ 3 levels "1","2","3": 1 2 3 1 2 3 1 2 3 1 ...
##  $ DENSITY  : num  2.47e+07 3.81e+07 3.05e+08 1.22e+07 8.93e+07 1.54e+09 2.34e+07 9.58e+08 6.36e+09 5.00e+08 ...
anova(lm(DENSITY~Incubator))
## Analysis of Variance Table
## 
## Response: DENSITY
##           Df     Sum Sq    Mean Sq F value Pr(>F)
## Incubator  3 1.1546e+19 3.8487e+18  0.3041 0.8223
## Residuals 44 5.5688e+20 1.2656e+19
anova(lm(DENSITY~Incubator+A_level))
## Analysis of Variance Table
## 
## Response: DENSITY
##           Df     Sum Sq    Mean Sq F value  Pr(>F)  
## Incubator  3 1.1546e+19 3.8487e+18  0.3601 0.78208  
## A_level    3 1.1872e+20 3.9574e+19  3.7031 0.01902 *
## Residuals 41 4.3816e+20 1.0687e+19                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lm(DENSITY~Incubator+A_level+B_level))
## Analysis of Variance Table
## 
## Response: DENSITY
##           Df     Sum Sq    Mean Sq F value    Pr(>F)    
## Incubator  3 1.1546e+19 3.8487e+18  0.5161 0.6736184    
## A_level    3 1.1872e+20 3.9574e+19  5.3070 0.0036447 ** 
## B_level    2 1.4733e+20 7.3666e+19  9.8788 0.0003381 ***
## Residuals 39 2.9082e+20 7.4570e+18                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
a <- lm(DENSITY~Incubator)
b <- lm(DENSITY~Incubator+A_level)
c <- lm(DENSITY~Incubator+A_level+B_level)
par(mfrow=c(2,2))
plot(a) #, which=1)

par(mfrow=c(2,2))
plot(b) #, which=1)

par(mfrow=c(2,2))
plot(c) #,which=1)

#What kind of experimental design is this? # - This is an experimental study #Why was the incubator number recorded when the temperate was set to the same value for each? # - To check if there was a change due to incubator temperature fluctuations #What is the name given to this kind of variable?

#Analyze this experiment, including checking model assumptions: show and comment on the diagnostic plots. 
# -Overall the NormalQ-Q plots in the 3 scenarios show a 
#  normal distribution.
# -The Diagnostics plots in Scenario (DPS)- a is shows heteroscedasticity demonstrating uncorrelation
# -The DPS-b Bbut then it becomes scarced.
# -The DPS-c shos a nice correlation and a clear grouping.
#Show your final statistical model and give your conclusions from the analysis.
# - The P-values in A_level and B_level show a strong evidence of relationship
#   between cell density and nutrient types.