#Final Exam
#1.a.
curve(dnorm(x,10,1),5,15)
#1.b.
pnorm(11,10,1)
## [1] 0.8413447
#0.8413447
#2.a.
dataq2<-read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/diameter.csv")
hist(dataq2$Diameter,main="Diameter histogram", xlab = "Measurements")
boxplot(dataq2$Diameter,main= "Diameter boxplot")
#2.b.
#Ho: M = 10
#Ha: M != 10
t.test(dataq2$Diameter, mu=10, alternative = "two.sided")
##
## One Sample t-test
##
## data: dataq2$Diameter
## t = 7.6839, df = 99, p-value = 1.134e-11
## alternative hypothesis: true mean is not equal to 10
## 95 percent confidence interval:
## 10.12638 10.21438
## sample estimates:
## mean of x
## 10.17038
#3.a.
dataq3<-read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/Fabric.csv")
boxplot(dataq3$ï..Abraided,dataq3$Unabraided,main= "Abraided/Unabraided fabric breaking strenght", names=c("Abraided","Unabraided"))
#3.b.
#Ho: MA = MU
#Ha: MA != MU
t.test(dataq3$ï..Abraided,dataq3$Unabraided,var.equal = TRUE, conf.level = 0.90)
##
## Two Sample t-test
##
## data: dataq3$ï..Abraided and dataq3$Unabraided
## t = -1.3729, df = 14, p-value = 0.1914
## alternative hypothesis: true difference in means is not equal to 0
## 90 percent confidence interval:
## -16.437078 2.037078
## sample estimates:
## mean of x mean of y
## 36.375 43.575
library(GAD)
dataq4<-read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/cropdata2.csv")
str(dataq4)
## 'data.frame': 96 obs. of 3 variables:
## $ density : int 1 2 1 2 1 2 1 2 1 2 ...
## $ fertilizer: int 1 1 1 1 1 1 1 1 1 1 ...
## $ yield : num 177 178 176 178 177 ...
dataq4$density<-as.fixed(dataq4$density)
dataq4$fertilizer<-as.fixed(dataq4$fertilizer)
interaction.plot(dataq4$fertilizer,dataq4$density,dataq4$yield)
mod<-lm(yield~density+fertilizer+density*fertilizer,dataq4)
gad(mod)
## Analysis of Variance Table
##
## Response: yield
## Df Sum Sq Mean Sq F value Pr(>F)
## density 1 5.1217 5.1217 15.1945 0.0001864 ***
## fertilizer 2 6.0680 3.0340 9.0011 0.0002732 ***
## density:fertilizer 2 0.4278 0.2139 0.6346 0.5325001
## Residual 90 30.3367 0.3371
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod2<-lm(yield~density+fertilizer,dataq4)
gad(mod2)
## Analysis of Variance Table
##
## Response: yield
## Df Sum Sq Mean Sq F value Pr(>F)
## density 1 5.1217 5.1217 15.3162 0.0001741 ***
## fertilizer 2 6.0680 3.0340 9.0731 0.0002533 ***
## Residual 92 30.7645 0.3344
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
yieldf1 <- dataq4[1:32,3]
yieldf2 <- dataq4[33:64,3]
yieldf3 <- dataq4[65:96,3]
boxplot(yieldf1,yieldf2,yieldf3)
#### Considering the boxplot, i think that the Type C would give me the greatest yield (higher mean, smaller variance).
yielddens1 <- dataq4 %>% filter(density==1)
yielddens2 <- dataq4 %>% filter(density==2)
yielddens3 <- yielddens1 %>% filter(fertilizer==1)
yielddens4 <- yielddens2 %>% filter(fertilizer==1)
boxplot(yielddens3$yield,yielddens4$yield)
#### Considering the boxplot, i think that the Sparce Density would give me the greatest yield (higher mean, smaller variance).
pwr.anova.test(k=4,n=NULL, f=.5,sig.level = 0.05,power=0.85)
##
## Balanced one-way analysis of variance power calculation
##
## k = 4
## n = 13.32146
## f = 0.5
## sig.level = 0.05
## power = 0.85
##
## NOTE: n is number in each group
trt1<-c("lvl1","lvl2","lvl3","lvl4")
design<-design.crd(trt=trt1,r=13,seed=123654)
design$book
## plots r trt1
## 1 101 1 lvl3
## 2 102 1 lvl4
## 3 103 2 lvl4
## 4 104 2 lvl3
## 5 105 1 lvl1
## 6 106 3 lvl3
## 7 107 2 lvl1
## 8 108 1 lvl2
## 9 109 3 lvl4
## 10 110 3 lvl1
## 11 111 2 lvl2
## 12 112 4 lvl1
## 13 113 4 lvl4
## 14 114 5 lvl1
## 15 115 5 lvl4
## 16 116 4 lvl3
## 17 117 6 lvl1
## 18 118 3 lvl2
## 19 119 7 lvl1
## 20 120 5 lvl3
## 21 121 4 lvl2
## 22 122 6 lvl3
## 23 123 7 lvl3
## 24 124 8 lvl1
## 25 125 5 lvl2
## 26 126 8 lvl3
## 27 127 9 lvl3
## 28 128 6 lvl4
## 29 129 9 lvl1
## 30 130 10 lvl3
## 31 131 10 lvl1
## 32 132 6 lvl2
## 33 133 7 lvl4
## 34 134 7 lvl2
## 35 135 8 lvl4
## 36 136 9 lvl4
## 37 137 8 lvl2
## 38 138 11 lvl3
## 39 139 9 lvl2
## 40 140 11 lvl1
## 41 141 12 lvl1
## 42 142 10 lvl2
## 43 143 10 lvl4
## 44 144 11 lvl4
## 45 145 11 lvl2
## 46 146 12 lvl3
## 47 147 12 lvl4
## 48 148 13 lvl1
## 49 149 13 lvl3
## 50 150 12 lvl2
## 51 151 13 lvl2
## 52 152 13 lvl4
library(dplyr)
library(tidyr)
library(GAD)
library(car)
library(carData)
library(pwr)
library(agricolae)
#1 The amount of time that it takes to complete a certain job is known to be Normally distributed
#with a mean of 10 minutes and a standard deviation of 1 minute.
#a. Plot the probability density function corresponding to job completion times
#b. What is the probability a randomly selected job will be completed in less than 11 minutes?
#1.a.
curve(dnorm(x,10,1),5,15)
#1.b.
pnorm(11,10,1)
#0.8413447
#2. A critical measurement on the diameter of a part that is used in a subassembly is assumed to have a mean of 10mm.
#The management would like to test this hypothesis against the alternative that it is not equal to 10mm at an alpha= 0.05 level of significance.
#Towards this end, they have collected a random sample of n=100 parts and measured their diameter, which data is contained in the
#file https://raw.githubusercontent.com/tmatis12/datafiles/main/diameter.csv
#a. Generate a histogram and boxplot of the collected measurements
#b. State the null and alternative hypothesis, perform the test, and state conclusions.
#2.a.
dataq2<-read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/diameter.csv")
hist(dataq2$Diameter,main="Diameter histogram", xlab = "Measurements")
boxplot(dataq2$Diameter,main= "Diameter boxplot")
#2.b.
#Ho: M = 10
#Ha: M != 10
t.test(dataq2$Diameter, mu=10, alternative = "two.sided")
#At a 5% significance level, we reject the null hypothesis that the mean is equal to 10mm.
#So we have evidence that the mean diameter is not equal to 10mm.
#3. Researchers at a textile production facility would like to test the hypothesis that the mean breaking strength of abraided fabric
#is different than that of unabraided fabric at an alpha=0.10 level of significance.
#Towards this end, they conducted an experiment in which they measured the breaking force of 8 samples of each type of fabric,
#which collected data may be found in the file https://raw.githubusercontent.com/tmatis12/datafiles/main/Fabric.csv
# Assume the populations are approximately Normally distributed and use a two-sample t-test with a pooled variance.
#a. Generate a side-by-side boxplot of the collected measurements on the breaking strength of abraided and unabraided fabric
#b. State the null and alternative hypothesis, perform the test, and state conclusions
#3.1.
dataq3<-read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/Fabric.csv")
boxplot(dataq3$ï..Abraided,dataq3$Unabraided,main= "Abraided/Unabraided fabric breaking strenght", names=c("Abraided","Unabraided"))
#3.b.
#Ho: MA = MU
#Ha: MA != MU
t.test(dataq3$ï..Abraided,dataq3$Unabraided,var.equal = TRUE, conf.level = 0.90)
#At a 10% significance level, we fail to reject the null hypothesis that the mean breaking strength of abraided fabric is equal than that of unabraided fabric.
#So we have evidence that the mean breaking strenght of both fabrics are almost equal.
#4. Consider a designed experiment in which the crop yield was measured at 2 levels of crop density/spacing (1=dense, 2=sparse)
#and 3 levels of fertilizer (1=typeA, 2=typeB, 3=typeC). A total of 96 observations were collected.
#A colleague of yours did some preliminary analysis of the data in R using the following code (you may copy and paste this code).
#a. Is the interaction significant (alpha=0.05)?
#b. Are the main effects significant (alpha=0.05)?
#c. Regardless of how dense the crops are planted, which fertilizer would give you the greatest yield? (justify your answer)
#d. Suppose that you had to use fertilizer typeA, would you have a greater yield planting the crop dense or sparse? (justify your answer)
library(GAD)
dataq4<-read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/cropdata2.csv")
str(dataq4)
dataq4$density<-as.fixed(dataq4$density)
dataq4$fertilizer<-as.fixed(dataq4$fertilizer)
interaction.plot(dataq4$fertilizer,dataq4$density,dataq4$yield)
mod<-lm(yield~density+fertilizer+density*fertilizer,dataq4)
gad(mod)
mod2<-lm(yield~density+fertilizer,dataq4)
gad(mod2)
#4.a.
#The interaction between density:fertilizer is not significant (p-value=0.5325001 > alpha=0.05)
#4.b.
#The main effect Fertilizer is significant (p-value=0.0002533 < alpha=0.05)
#The main effect Density is significant (p-value=0.0001741 < alpha=0.05)
#4.c.
yieldf1 <- dataq4[1:32,3]
yieldf2 <- dataq4[33:64,3]
yieldf3 <- dataq4[65:96,3]
boxplot(yieldf1,yieldf2,yieldf3)
#Considering the boxplot, i think that the Type C would give me the greatest yield (higher mean, smaller variance).
#4.d.
yielddens1 <- dataq4 %>% filter(density==1)
yielddens2 <- dataq4 %>% filter(density==2)
yielddens3 <- yielddens1 %>% filter(fertilizer==1)
yielddens4 <- yielddens2 %>% filter(fertilizer==1)
boxplot(yielddens3$yield,yielddens4$yield)
#Considering the boxplot, i think that the Sparce Density would give me the greatest yield (higher mean, smaller variance).
#5. Consider designing an experiment in which we wish to test whether there is a difference in the mean between
#4 levels of a single factor (i.e. between 4 populations). Specifically, this is to be a Completely Randomized
#Design that will be analyzed using ANOVA. We would like to collect a sufficient number of samples such that the
#test with an alpha=0.05 level of significance would be able to detect with a power of 85% a mean difference that
#is 50% of the standard deviation. Determine the number of samples to be collected and propose a randomized data
#collection table for this experiment.
pwr.anova.test(k=4,n=NULL, f=.5,sig.level = 0.05,power=0.85)
#After the power test we need to collect 13 samples on each population.
trt1<-c("lvl1","lvl2","lvl3","lvl4")
design<-design.crd(trt=trt1,r=13,seed=123654)
design$book