FirstVector<-c(2,3,4,5) #c means concantenante
SecondVector<-c("A","B","C","D")
df1<-data.frame(FirstVector,SecondVector)
df1$FirstVector #name of dataframe $ column name
## [1] 2 3 4 5
df2<-cbind(FirstVector,SecondVector) #cbind forces all varaibles to be the same type, creates a matrix
df2
## FirstVector SecondVector
## [1,] "2" "A"
## [2,] "3" "B"
## [3,] "4" "C"
## [4,] "5" "D"
response <- c(3.4, 5.7, 9.5, 7.5) #response is a continuous variable
factor1 <- SecondVector
df<-data.frame(response,factor1)
str(df) #always check variable types!!!
## 'data.frame': 4 obs. of 2 variables:
## $ response: num 3.4 5.7 9.5 7.5
## $ factor1 : chr "A" "B" "C" "D"
str(df) #need to convert Factor 1 to a Factor in R
## 'data.frame': 4 obs. of 2 variables:
## $ response: num 3.4 5.7 9.5 7.5
## $ factor1 : chr "A" "B" "C" "D"
df$factor1<-as.factor(df$factor1) #converting Factor 1 to a Factor
str(df) #aov or anova needs to have factors be recognized as factors in R
## 'data.frame': 4 obs. of 2 variables:
## $ response: num 3.4 5.7 9.5 7.5
## $ factor1 : Factor w/ 4 levels "A","B","C","D": 1 2 3 4
str(df) #all variable types set correctly
## 'data.frame': 4 obs. of 2 variables:
## $ response: num 3.4 5.7 9.5 7.5
## $ factor1 : Factor w/ 4 levels "A","B","C","D": 1 2 3 4
?expand.grid
fluid.type<-c(rep(1,6),rep(2,6),rep(3,6),rep(4,6))
fluid.type<-as.factor(fluid.type)
fluid1<-c(17.6, 18.9, 16.3, 17.4, 20.1, 21.6)
fluid2<-c(16.9, 15.3, 18.6, 17.1, 19.5, 20.3)
fluid3<-c(21.4, 23.6, 19.4, 18.5, 20.5, 22.3)
fluid4<-c(19.3, 21.1, 16.9, 17.5, 18.3, 19.8)
response<-c(fluid1,fluid2,fluid3,fluid4) #can create vectors from vectors
fluid.type
## [1] 1 1 1 1 1 1 2 2 2 2 2 2 3 3 3 3 3 3 4 4 4 4 4 4
## Levels: 1 2 3 4
response
## [1] 17.6 18.9 16.3 17.4 20.1 21.6 16.9 15.3 18.6 17.1 19.5 20.3 21.4 23.6 19.4
## [16] 18.5 20.5 22.3 19.3 21.1 16.9 17.5 18.3 19.8
df<-data.frame(response,fluid.type)
df
## response fluid.type
## 1 17.6 1
## 2 18.9 1
## 3 16.3 1
## 4 17.4 1
## 5 20.1 1
## 6 21.6 1
## 7 16.9 2
## 8 15.3 2
## 9 18.6 2
## 10 17.1 2
## 11 19.5 2
## 12 20.3 2
## 13 21.4 3
## 14 23.6 3
## 15 19.4 3
## 16 18.5 3
## 17 20.5 3
## 18 22.3 3
## 19 19.3 4
## 20 21.1 4
## 21 16.9 4
## 22 17.5 4
## 23 18.3 4
## 24 19.8 4
boxplot(response~fluid.type) # continous variable ~ factor varaible
aov(response~fluid.type,data=df) #aov is the engine
## Call:
## aov(formula = response ~ fluid.type, data = df)
##
## Terms:
## fluid.type Residuals
## Sum of Squares 30.16500 65.99333
## Deg. of Freedom 3 20
##
## Residual standard error: 1.816498
## Estimated effects may be unbalanced
aov(df$response~df$fluid.type)
## Call:
## aov(formula = df$response ~ df$fluid.type)
##
## Terms:
## df$fluid.type Residuals
## Sum of Squares 30.16500 65.99333
## Deg. of Freedom 3 20
##
## Residual standard error: 1.816498
## Estimated effects may be unbalanced
anova(aov(response~fluid.type,data=df)) #anova() is a wrapper for aov()
## Analysis of Variance Table
##
## Response: response
## Df Sum Sq Mean Sq F value Pr(>F)
## fluid.type 3 30.165 10.0550 3.0473 0.05246 .
## Residuals 20 65.993 3.2997
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model<-aov(response~fluid.type,data=df) #write aov to an object, easy to call over and over
summary(model) #summary of an aov object gives back anova
## Df Sum Sq Mean Sq F value Pr(>F)
## fluid.type 3 30.16 10.05 3.047 0.0525 .
## Residuals 20 65.99 3.30
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(model)
plot(model,1) #residuals versus fitted
plot(model,2) #NPP
plot(model,3) #fitted versus sqrt of stand res
plot(model,4) #cooks distance (not applicable to DOE)
plot(model,5) #Factor levels versus stand res
plot(model,6) #cooks distance vs leverage (not applicable to DOE)
mean(c(2,3,4,5)) #this is the mean
## [1] 3.5
cat("this is the output:",mean(c(2,3,4,5))) #cat is useful for R console, but not RMarkdown
## this is the output: 3.5
#Question 1
sigma <- c(min = 2.5, intermediate = 3.5, max = 5.0)
within_var <- sigma^2
f <- 0.5 / sigma
between_var<-(f^2)*within_var
get_n<-function(bv,wv) {
stats::power.anova.test(groups=4, between.var = bv, within.var = wv, sig.level = .05, power = .80)$n
}
n_raw<-mapply(get_n, between_var, within_var)
n <-ceiling(n_raw)
n #prints vector: min, intermediate, max
## min intermediate max
## 92 180 365
#Question 2
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.1
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(multcomp)
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
##
## Attaching package: 'TH.data'
##
## The following object is masked from 'package:MASS':
##
## geyser
library(multcompView)
# put data in tidy form
df <- tibble(
Fluid = factor(rep(1:4, each = 6)),
Life = c(17.6,18.9,16.3,17.4,20.1,21.6,
16.9,15.3,18.6,17.1,19.5,20.3,
21.4,23.6,19.4,18.5,20.5,22.3,
19.3,21.1,16.9,17.5,18.3,19.8)
)
df %>%
group_by(Fluid) %>%
summarise(n = n(), mean = mean(Life), sd = sd(Life), .groups="drop")
## # A tibble: 4 × 4
## Fluid n mean sd
## <fct> <int> <dbl> <dbl>
## 1 1 6 18.6 1.95
## 2 2 6 18.0 1.85
## 3 3 6 21.0 1.88
## 4 4 6 18.8 1.55
#test at alpha = 0.10
fit <- lm(Life ~ Fluid, data = df)
anova(fit)
## Analysis of Variance Table
##
## Response: Life
## Df Sum Sq Mean Sq F value Pr(>F)
## Fluid 3 30.165 10.0550 3.0473 0.05246 .
## Residuals 20 65.993 3.2997
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#model checks
par(mfrow=c(1,2))
plot(fit, which = 1)
plot(fit, which = 2)
par(mfrow=c(1,1))
leveneTest(Life ~ Fluid, data = df, center = "median")
## Levene's Test for Homogeneity of Variance (center = "median")
## Df F value Pr(>F)
## group 3 0.137 0.9368
## 20
shapiro.test(residuals(fit))
##
## Shapiro-Wilk normality test
##
## data: residuals(fit)
## W = 0.95671, p-value = 0.376
emm <- emmeans(fit, ~ Fluid)
pairs(emm, adjust = "tukey", level = 0.90) # 90% CIs for α=0.10
## contrast estimate SE df t.ratio p.value
## Fluid1 - Fluid2 0.700 1.05 20 0.667 0.9081
## Fluid1 - Fluid3 -2.300 1.05 20 -2.193 0.1593
## Fluid1 - Fluid4 -0.167 1.05 20 -0.159 0.9985
## Fluid2 - Fluid3 -3.000 1.05 20 -2.861 0.0441
## Fluid2 - Fluid4 -0.867 1.05 20 -0.826 0.8413
## Fluid3 - Fluid4 2.133 1.05 20 2.034 0.2091
##
## P value adjustment: tukey method for comparing a family of 4 estimates
multcomp::cld(emm, Letters = letters, adjust = "tukey")
## Note: adjust = "tukey" was changed to "sidak"
## because "tukey" is only appropriate for one set of pairwise comparisons
## Fluid emmean SE df lower.CL upper.CL .group
## 2 17.9 0.742 20 15.9 20.0 a
## 1 18.6 0.742 20 16.6 20.7 ab
## 4 18.8 0.742 20 16.8 20.8 ab
## 3 20.9 0.742 20 18.9 23.0 b
##
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 4 estimates
## P value adjustment: tukey method for comparing a family of 4 estimates
## significance level used: alpha = 0.05
## NOTE: If two or more means share the same grouping symbol,
## then we cannot show them to be different.
## But we also did not show them to be the same.
plot(emm, comparisons = TRUE, adjust = "tukey")
## Warning: `aes_()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`
## ℹ The deprecated feature was likely used in the emmeans package.
## Please report the issue at <https://github.com/rvlenth/emmeans/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Question 1 Answer: The samples needed of each fluid would be a min of 5, intermediate of 8, and max of 14 variability per fluid collected.
Question 2 Answer: a.)Since the results came back at p=.052 being tested at a=.10, we woulf not reject the null hypothesis. b.)Yes, based on the graphs and models producef, the model is adequate. c.)The fluids that significantlly differ are Fluids 1 and 4. While, Fluids 2 and 3 are the closest or those who do not differ much as compared to Fluids 1 and 4.
\
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE
parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.