#Final Exam

Question 1

Q1.a.

#1.a.
curve(dnorm(x,10,1),5,15)

Q1.b.

#1.b.
pnorm(11,10,1)
## [1] 0.8413447
#0.8413447

Question 2

Q2.a.

#2.a.
dataq2<-read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/diameter.csv")
hist(dataq2$Diameter,main="Diameter histogram", xlab = "Measurements")

boxplot(dataq2$Diameter,main= "Diameter boxplot")

Q2.b.

Ho: M = 10

Ha: M != 10

#2.b.
#Ho: M = 10
#Ha: M != 10
t.test(dataq2$Diameter, mu=10, alternative = "two.sided")
## 
##  One Sample t-test
## 
## data:  dataq2$Diameter
## t = 7.6839, df = 99, p-value = 1.134e-11
## alternative hypothesis: true mean is not equal to 10
## 95 percent confidence interval:
##  10.12638 10.21438
## sample estimates:
## mean of x 
##  10.17038

At a 5% significance level, we reject the null hypothesis that the mean is equal to 10mm. So we have evidence that the mean diameter is not equal to 10mm.

Question 3

Q3.a.

#3.a.
dataq3<-read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/Fabric.csv")
boxplot(dataq3$ï..Abraided,dataq3$Unabraided,main= "Abraided/Unabraided fabric breaking strenght", names=c("Abraided","Unabraided"))

Q3.b.

Ho: MA = MU

Ha: MA != MU

#3.b.
#Ho: MA = MU
#Ha: MA != MU
t.test(dataq3$ï..Abraided,dataq3$Unabraided,var.equal = TRUE, conf.level = 0.90)
## 
##  Two Sample t-test
## 
## data:  dataq3$ï..Abraided and dataq3$Unabraided
## t = -1.3729, df = 14, p-value = 0.1914
## alternative hypothesis: true difference in means is not equal to 0
## 90 percent confidence interval:
##  -16.437078   2.037078
## sample estimates:
## mean of x mean of y 
##    36.375    43.575

Question 4

R Code

library(GAD)
dataq4<-read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/cropdata2.csv")
str(dataq4)
## 'data.frame':    96 obs. of  3 variables:
##  $ density   : int  1 2 1 2 1 2 1 2 1 2 ...
##  $ fertilizer: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ yield     : num  177 178 176 178 177 ...
dataq4$density<-as.fixed(dataq4$density)
dataq4$fertilizer<-as.fixed(dataq4$fertilizer)
interaction.plot(dataq4$fertilizer,dataq4$density,dataq4$yield)

mod<-lm(yield~density+fertilizer+density*fertilizer,dataq4)
gad(mod)
## Analysis of Variance Table
## 
## Response: yield
##                    Df  Sum Sq Mean Sq F value    Pr(>F)    
## density             1  5.1217  5.1217 15.1945 0.0001864 ***
## fertilizer          2  6.0680  3.0340  9.0011 0.0002732 ***
## density:fertilizer  2  0.4278  0.2139  0.6346 0.5325001    
## Residual           90 30.3367  0.3371                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod2<-lm(yield~density+fertilizer,dataq4)
gad(mod2)
## Analysis of Variance Table
## 
## Response: yield
##            Df  Sum Sq Mean Sq F value    Pr(>F)    
## density     1  5.1217  5.1217 15.3162 0.0001741 ***
## fertilizer  2  6.0680  3.0340  9.0731 0.0002533 ***
## Residual   92 30.7645  0.3344                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Q4.a.

The interaction between density:fertilizer is not significant (p-value=0.5325001 > alpha=0.05).

Q4.b.

The main effect Fertilizer is significant (p-value=0.0002533 < alpha=0.05).

The main effect Density is significant (p-value=0.0001741 < alpha=0.05).

Q4.c.

yieldf1 <- dataq4[1:32,3]
yieldf2 <- dataq4[33:64,3]
yieldf3 <- dataq4[65:96,3]

boxplot(yieldf1,yieldf2,yieldf3)

#### Considering the boxplot, i think that the Type C would give me the greatest yield (higher mean, smaller variance).

Q4.d.

yielddens1 <- dataq4 %>% filter(density==1)
yielddens2 <- dataq4 %>% filter(density==2)

yielddens3 <- yielddens1 %>% filter(fertilizer==1)
yielddens4 <- yielddens2 %>% filter(fertilizer==1)

boxplot(yielddens3$yield,yielddens4$yield)

#### Considering the boxplot, i think that the Sparce Density would give me the greatest yield (higher mean, smaller variance).

Question 5

First part.

pwr.anova.test(k=4,n=NULL, f=.5,sig.level = 0.05,power=0.85)
## 
##      Balanced one-way analysis of variance power calculation 
## 
##               k = 4
##               n = 13.32146
##               f = 0.5
##       sig.level = 0.05
##           power = 0.85
## 
## NOTE: n is number in each group

Second part.

trt1<-c("lvl1","lvl2","lvl3","lvl4")
design<-design.crd(trt=trt1,r=13,seed=123654)
design$book
##    plots  r trt1
## 1    101  1 lvl3
## 2    102  1 lvl4
## 3    103  2 lvl4
## 4    104  2 lvl3
## 5    105  1 lvl1
## 6    106  3 lvl3
## 7    107  2 lvl1
## 8    108  1 lvl2
## 9    109  3 lvl4
## 10   110  3 lvl1
## 11   111  2 lvl2
## 12   112  4 lvl1
## 13   113  4 lvl4
## 14   114  5 lvl1
## 15   115  5 lvl4
## 16   116  4 lvl3
## 17   117  6 lvl1
## 18   118  3 lvl2
## 19   119  7 lvl1
## 20   120  5 lvl3
## 21   121  4 lvl2
## 22   122  6 lvl3
## 23   123  7 lvl3
## 24   124  8 lvl1
## 25   125  5 lvl2
## 26   126  8 lvl3
## 27   127  9 lvl3
## 28   128  6 lvl4
## 29   129  9 lvl1
## 30   130 10 lvl3
## 31   131 10 lvl1
## 32   132  6 lvl2
## 33   133  7 lvl4
## 34   134  7 lvl2
## 35   135  8 lvl4
## 36   136  9 lvl4
## 37   137  8 lvl2
## 38   138 11 lvl3
## 39   139  9 lvl2
## 40   140 11 lvl1
## 41   141 12 lvl1
## 42   142 10 lvl2
## 43   143 10 lvl4
## 44   144 11 lvl4
## 45   145 11 lvl2
## 46   146 12 lvl3
## 47   147 12 lvl4
## 48   148 13 lvl1
## 49   149 13 lvl3
## 50   150 12 lvl2
## 51   151 13 lvl2
## 52   152 13 lvl4

Complete R Code

library(dplyr)
library(tidyr)
library(GAD)
library(car)
library(carData)
library(pwr)
library(agricolae)

#1 The amount of time that it takes to complete a certain job is known to be Normally distributed 
#with a mean of 10 minutes and a standard deviation of 1 minute.  
#a. Plot the probability density function corresponding to job completion times
#b. What is the probability a randomly selected job will be completed in less than 11 minutes?

#1.a.
curve(dnorm(x,10,1),5,15)

#1.b.
pnorm(11,10,1)
#0.8413447

#2. A critical measurement on the diameter of a part that is used in a subassembly is assumed to have a mean of 10mm.  
#The management would like to test this hypothesis against the alternative that it is not equal to 10mm at an alpha= 0.05 level of significance.  
#Towards this end, they have collected a random sample of n=100 parts and measured their diameter, which data is contained in the 
#file https://raw.githubusercontent.com/tmatis12/datafiles/main/diameter.csv
#a. Generate a histogram and boxplot of the collected measurements
#b. State the null and alternative hypothesis, perform the test, and state conclusions.

#2.a.
dataq2<-read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/diameter.csv")
hist(dataq2$Diameter,main="Diameter histogram", xlab = "Measurements")
boxplot(dataq2$Diameter,main= "Diameter boxplot")

#2.b.
#Ho: M = 10
#Ha: M != 10
t.test(dataq2$Diameter, mu=10, alternative = "two.sided")
#At a 5% significance level, we reject the null hypothesis that the mean is equal to 10mm.
#So we have evidence that the mean diameter is not equal to 10mm.

#3. Researchers at a textile production facility would like to test the hypothesis that the mean breaking strength of abraided fabric 
#is different than that of unabraided fabric at an alpha=0.10 level of significance.  
#Towards this end, they conducted an experiment in which they measured the breaking force of 8 samples of each type of fabric, 
#which collected data may be found in the file https://raw.githubusercontent.com/tmatis12/datafiles/main/Fabric.csv
# Assume the populations are approximately Normally distributed and use a two-sample t-test with a pooled variance.  
#a. Generate a side-by-side boxplot of the collected measurements on the breaking strength of abraided and unabraided fabric 
#b. State the null and alternative hypothesis, perform the test, and state conclusions

#3.1.
dataq3<-read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/Fabric.csv")
boxplot(dataq3$ï..Abraided,dataq3$Unabraided,main= "Abraided/Unabraided fabric breaking strenght", names=c("Abraided","Unabraided"))

#3.b.
#Ho: MA = MU
#Ha: MA != MU
t.test(dataq3$ï..Abraided,dataq3$Unabraided,var.equal = TRUE, conf.level = 0.90)
#At a 10% significance level, we fail to reject the null hypothesis that the mean breaking strength of abraided fabric is equal than that of unabraided fabric.
#So we have evidence that the mean breaking strenght of both fabrics are almost equal.

#4. Consider a designed experiment in which the crop yield was measured at 2 levels of crop density/spacing (1=dense, 2=sparse) 
#and 3 levels of fertilizer (1=typeA, 2=typeB, 3=typeC). A total of 96 observations were collected.  
#A colleague of yours did some preliminary analysis of the data in R using the following code (you may copy and paste this code).
#a. Is the interaction significant (alpha=0.05)?
#b. Are the main effects significant (alpha=0.05)? 
#c. Regardless of how dense the crops are planted, which fertilizer would give you the greatest yield? (justify your answer) 
#d. Suppose that you had to use fertilizer typeA, would you have a greater yield planting the crop dense or sparse? (justify your answer)

library(GAD)
dataq4<-read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/cropdata2.csv")
str(dataq4)
dataq4$density<-as.fixed(dataq4$density)
dataq4$fertilizer<-as.fixed(dataq4$fertilizer)
interaction.plot(dataq4$fertilizer,dataq4$density,dataq4$yield)
mod<-lm(yield~density+fertilizer+density*fertilizer,dataq4)
gad(mod)
mod2<-lm(yield~density+fertilizer,dataq4)
gad(mod2)

#4.a.
#The interaction between density:fertilizer is not significant (p-value=0.5325001 > alpha=0.05)

#4.b.
#The main effect Fertilizer is significant (p-value=0.0002533 < alpha=0.05)
#The main effect Density is significant (p-value=0.0001741 < alpha=0.05)

#4.c.
yieldf1 <- dataq4[1:32,3]
yieldf2 <- dataq4[33:64,3]
yieldf3 <- dataq4[65:96,3]

boxplot(yieldf1,yieldf2,yieldf3)

#Considering the boxplot, i think that the Type C would give me the greatest yield (higher mean, smaller variance).

#4.d.

yielddens1 <- dataq4 %>% filter(density==1)
yielddens2 <- dataq4 %>% filter(density==2)

yielddens3 <- yielddens1 %>% filter(fertilizer==1)
yielddens4 <- yielddens2 %>% filter(fertilizer==1)

boxplot(yielddens3$yield,yielddens4$yield)

#Considering the boxplot, i think that the Sparce Density would give me the greatest yield (higher mean, smaller variance).

#5. Consider designing an experiment in which we wish to test whether there is a difference in the mean between 
#4 levels of a single factor (i.e. between 4 populations).  Specifically, this is to be a Completely Randomized 
#Design that will be analyzed using ANOVA.  We would like to collect a sufficient number of samples such that the 
#test with an alpha=0.05 level of significance would be able to detect with a power of 85% a mean difference that 
#is 50% of the standard deviation.  Determine the number of samples to be collected and propose a randomized data 
#collection table for this experiment. 

pwr.anova.test(k=4,n=NULL, f=.5,sig.level = 0.05,power=0.85)
#After the power test we need to collect 13 samples on each population.

trt1<-c("lvl1","lvl2","lvl3","lvl4")
design<-design.crd(trt=trt1,r=13,seed=123654)
design$book