Assignment 9

FirstVector<-c(2,3,4,5) #c means concantenante
SecondVector<-c("A","B","C","D")
df1<-data.frame(FirstVector,SecondVector)
df1$FirstVector #name of dataframe $ column name

## [1] 2 3 4 5

df2<-cbind(FirstVector,SecondVector) #cbind forces all varaibles to be the same type, creates a matrix
df2

##      FirstVector SecondVector
## [1,] "2"         "A"         
## [2,] "3"         "B"         
## [3,] "4"         "C"         
## [4,] "5"         "D"

response <- c(3.4, 5.7, 9.5, 7.5) #response is a continuous variable 
factor1 <- SecondVector
df<-data.frame(response,factor1) 
str(df) #always check variable types!!!

## 'data.frame':    4 obs. of  2 variables:
##  $ response: num  3.4 5.7 9.5 7.5
##  $ factor1 : chr  "A" "B" "C" "D"

str(df) #need to convert Factor 1 to a Factor in R

## 'data.frame':    4 obs. of  2 variables:
##  $ response: num  3.4 5.7 9.5 7.5
##  $ factor1 : chr  "A" "B" "C" "D"

df$factor1<-as.factor(df$factor1) #converting Factor 1 to a Factor

str(df) #aov or anova needs to have factors be recognized as factors in R

## 'data.frame':    4 obs. of  2 variables:
##  $ response: num  3.4 5.7 9.5 7.5
##  $ factor1 : Factor w/ 4 levels "A","B","C","D": 1 2 3 4

str(df) #all variable types set correctly

## 'data.frame':    4 obs. of  2 variables:
##  $ response: num  3.4 5.7 9.5 7.5
##  $ factor1 : Factor w/ 4 levels "A","B","C","D": 1 2 3 4

?expand.grid

fluid.type<-c(rep(1,6),rep(2,6),rep(3,6),rep(4,6))
fluid.type<-as.factor(fluid.type)

fluid1<-c(17.6, 18.9, 16.3, 17.4, 20.1, 21.6)
fluid2<-c(16.9, 15.3, 18.6, 17.1, 19.5, 20.3)
fluid3<-c(21.4, 23.6, 19.4, 18.5, 20.5, 22.3)
fluid4<-c(19.3, 21.1, 16.9, 17.5, 18.3, 19.8)
response<-c(fluid1,fluid2,fluid3,fluid4) #can create vectors from vectors

fluid.type

##  [1] 1 1 1 1 1 1 2 2 2 2 2 2 3 3 3 3 3 3 4 4 4 4 4 4
## Levels: 1 2 3 4

response

##  [1] 17.6 18.9 16.3 17.4 20.1 21.6 16.9 15.3 18.6 17.1 19.5 20.3 21.4 23.6 19.4
## [16] 18.5 20.5 22.3 19.3 21.1 16.9 17.5 18.3 19.8

df<-data.frame(response,fluid.type)
df

##    response fluid.type
## 1      17.6          1
## 2      18.9          1
## 3      16.3          1
## 4      17.4          1
## 5      20.1          1
## 6      21.6          1
## 7      16.9          2
## 8      15.3          2
## 9      18.6          2
## 10     17.1          2
## 11     19.5          2
## 12     20.3          2
## 13     21.4          3
## 14     23.6          3
## 15     19.4          3
## 16     18.5          3
## 17     20.5          3
## 18     22.3          3
## 19     19.3          4
## 20     21.1          4
## 21     16.9          4
## 22     17.5          4
## 23     18.3          4
## 24     19.8          4

boxplot(response~fluid.type) # continous variable ~ factor varaible

aov(response~fluid.type,data=df) #aov is the engine

## Call:
##    aov(formula = response ~ fluid.type, data = df)
## 
## Terms:
##                 fluid.type Residuals
## Sum of Squares    30.16500  65.99333
## Deg. of Freedom          3        20
## 
## Residual standard error: 1.816498
## Estimated effects may be unbalanced

aov(df$response~df$fluid.type)

## Call:
##    aov(formula = df$response ~ df$fluid.type)
## 
## Terms:
##                 df$fluid.type Residuals
## Sum of Squares       30.16500  65.99333
## Deg. of Freedom             3        20
## 
## Residual standard error: 1.816498
## Estimated effects may be unbalanced

anova(aov(response~fluid.type,data=df)) #anova() is a wrapper for aov()

## Analysis of Variance Table
## 
## Response: response
##            Df Sum Sq Mean Sq F value  Pr(>F)  
## fluid.type  3 30.165 10.0550  3.0473 0.05246 .
## Residuals  20 65.993  3.2997                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

model<-aov(response~fluid.type,data=df) #write aov to an object, easy to call over and over
summary(model) #summary of an aov object gives back anova

##             Df Sum Sq Mean Sq F value Pr(>F)  
## fluid.type   3  30.16   10.05   3.047 0.0525 .
## Residuals   20  65.99    3.30                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plot(model)

plot(model,1) #residuals versus fitted

plot(model,2) #NPP

plot(model,3) #fitted versus sqrt of stand res

plot(model,4) #cooks distance (not applicable to DOE)

plot(model,5) #Factor levels versus stand res

plot(model,6) #cooks distance vs leverage (not applicable to DOE)

mean(c(2,3,4,5)) #this is the mean

## [1] 3.5

cat("this is the output:",mean(c(2,3,4,5))) #cat is useful for R console, but not RMarkdown

## this is the output: 3.5

#Question 1

sigma <- c(min = 2.5, intermediate = 3.5, max = 5.0)
within_var <- sigma^2
f <- 0.5 / sigma 
between_var<-(f^2)*within_var

get_n<-function(bv,wv) {
  stats::power.anova.test(groups=4, between.var = bv, within.var = wv, sig.level = .05, power = .80)$n
}
n_raw<-mapply(get_n, between_var, within_var)
n    <-ceiling(n_raw)
n #prints vector: min, intermediate, max

##          min intermediate          max 
##           92          180          365

#Question 2 

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.1
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(car)

## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some

library(emmeans)

## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'

library(multcomp)

## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## 
## Attaching package: 'TH.data'
## 
## The following object is masked from 'package:MASS':
## 
##     geyser

library(multcompView)

# put data in tidy form
df <- tibble(
  Fluid = factor(rep(1:4, each = 6)),
  Life  = c(17.6,18.9,16.3,17.4,20.1,21.6,
            16.9,15.3,18.6,17.1,19.5,20.3,
            21.4,23.6,19.4,18.5,20.5,22.3,
            19.3,21.1,16.9,17.5,18.3,19.8)
)

df %>% 
  group_by(Fluid) %>% 
  summarise(n = n(), mean = mean(Life),  sd = sd(Life), .groups="drop")

## # A tibble: 4 × 4
##   Fluid     n  mean    sd
##   <fct> <int> <dbl> <dbl>
## 1 1         6  18.6  1.95
## 2 2         6  18.0  1.85
## 3 3         6  21.0  1.88
## 4 4         6  18.8  1.55

#test at alpha = 0.10
fit <- lm(Life ~ Fluid, data = df)
anova(fit)

## Analysis of Variance Table
## 
## Response: Life
##           Df Sum Sq Mean Sq F value  Pr(>F)  
## Fluid      3 30.165 10.0550  3.0473 0.05246 .
## Residuals 20 65.993  3.2997                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#model checks
par(mfrow=c(1,2))
plot(fit, which = 1) 
plot(fit, which = 2)

par(mfrow=c(1,1))
leveneTest(Life ~ Fluid, data = df, center = "median")

## Levene's Test for Homogeneity of Variance (center = "median")
##       Df F value Pr(>F)
## group  3   0.137 0.9368
##       20

shapiro.test(residuals(fit))

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(fit)
## W = 0.95671, p-value = 0.376

emm  <- emmeans(fit, ~ Fluid)
pairs(emm, adjust = "tukey", level = 0.90)   # 90% CIs for α=0.10

##  contrast        estimate   SE df t.ratio p.value
##  Fluid1 - Fluid2    0.700 1.05 20   0.667  0.9081
##  Fluid1 - Fluid3   -2.300 1.05 20  -2.193  0.1593
##  Fluid1 - Fluid4   -0.167 1.05 20  -0.159  0.9985
##  Fluid2 - Fluid3   -3.000 1.05 20  -2.861  0.0441
##  Fluid2 - Fluid4   -0.867 1.05 20  -0.826  0.8413
##  Fluid3 - Fluid4    2.133 1.05 20   2.034  0.2091
## 
## P value adjustment: tukey method for comparing a family of 4 estimates

multcomp::cld(emm, Letters = letters, adjust = "tukey")

## Note: adjust = "tukey" was changed to "sidak"
## because "tukey" is only appropriate for one set of pairwise comparisons

##  Fluid emmean    SE df lower.CL upper.CL .group
##  2       17.9 0.742 20     15.9     20.0  a    
##  1       18.6 0.742 20     16.6     20.7  ab   
##  4       18.8 0.742 20     16.8     20.8  ab   
##  3       20.9 0.742 20     18.9     23.0   b   
## 
## Confidence level used: 0.95 
## Conf-level adjustment: sidak method for 4 estimates 
## P value adjustment: tukey method for comparing a family of 4 estimates 
## significance level used: alpha = 0.05 
## NOTE: If two or more means share the same grouping symbol,
##       then we cannot show them to be different.
##       But we also did not show them to be the same.

plot(emm, comparisons = TRUE, adjust = "tukey")

## Warning: `aes_()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`
## ℹ The deprecated feature was likely used in the emmeans package.
##   Please report the issue at <https://github.com/rvlenth/emmeans/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Question 1 Answer: The samples needed of each fluid would be a min of 5, intermediate of 8, and max of 14 variability per fluid collected.

Question 2 Answer: a.)Since the results came back at p=.052 being tested at a=.10, we woulf not reject the null hypothesis. b.)Yes, based on the graphs and models producef, the model is adequate. c.)The fluids that significantlly differ are Fluids 1 and 4. While, Fluids 2 and 3 are the closest or those who do not differ much as compared to Fluids 1 and 4.

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)

##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Assignment 9

Samuel Martinez

2025-10-05

R Markdown

Including Plots