Statapult

Our goal with this project is to test 4 different factors regarding the distance a ball was thrown via catapult.(Shown above)

During the course of this project, all of the various labeled setting you can see (Bungee Position, Pin Elevation, and Angle of Throw) were tested in a series of ANOVA tests to find which factor affected the distance a ball flew when launched. In addition to the factors shown in the picture of the statpult, we had 3 different types of balls, as shown below:

Ball Types

From left to right that is Yellow Foam, Red Tennis, and Grey Rubber.

Part 1 (Ball Type)

For this experiment, we pulled the catapult all the way over to an angle of 90 degrees (parallel with the floor) for each throw. Our pin position and bungee position were fixed at 3 and 4(the position where the pin currently is in the Statapult picture), respectively. The only thing tested in this first part was the type of ball.

Number of Replications

In order to find out how many throws were needed to get statistically appropriate results, we used the pwr.anova function in R. Here we want to know how many throws we need to have of each ball type in order to be 95% sure we have detected a difference between the means, and 75% sure we will correctly reject our null hypothesis.

library(pwr)
k <- 3
d <- .5
(pwr.anova.test(k=3,n=NULL,f=(d*sqrt((k^2)-1)/(2*k)),sig.level=0.05,power=0.75))#max
## 
##      Balanced one-way analysis of variance power calculation 
## 
##               k = 3
##               n = 52.55574
##               f = 0.2357023
##       sig.level = 0.05
##           power = 0.75
## 
## NOTE: n is number in each group

The result of this tells us we needed to have 53 samples of each type of ball in order to meet our given design criteria. Our significance level was .05, meaning our probability of correctly identifying a difference in means is 95%. Our power level is chosen to be 75%, and the effect size .2.

Randomized Design

In order to remove any bias from our experiment, we performed a completely randomized design test, generated by R. Shown below is the order that our balls were thrown in, with each ball having a total of 53 throws (for a grand total of 159 entries). The distance was recorded in inches by a member of the team, who measured for where the ball first landed when thrown, regardless of any further bounce or roll.

types <- c('Foam','Tennis','Rubber')
library(agricolae)
design <- design.crd(trt=types,r=53,seed=8238350)
design$book
##     plots  r  types
## 1     101  1 Rubber
## 2     102  2 Rubber
## 3     103  1 Tennis
## 4     104  3 Rubber
## 5     105  2 Tennis
## 6     106  4 Rubber
## 7     107  3 Tennis
## 8     108  4 Tennis
## 9     109  5 Rubber
## 10    110  6 Rubber
## 11    111  7 Rubber
## 12    112  1   Foam
## 13    113  8 Rubber
## 14    114  5 Tennis
## 15    115  6 Tennis
## 16    116  9 Rubber
## 17    117  7 Tennis
## 18    118 10 Rubber
## 19    119  2   Foam
## 20    120  3   Foam
## 21    121 11 Rubber
## 22    122  8 Tennis
## 23    123  4   Foam
## 24    124  9 Tennis
## 25    125 12 Rubber
## 26    126 10 Tennis
## 27    127 11 Tennis
## 28    128 13 Rubber
## 29    129  5   Foam
## 30    130 12 Tennis
## 31    131  6   Foam
## 32    132 14 Rubber
## 33    133  7   Foam
## 34    134 13 Tennis
## 35    135 15 Rubber
## 36    136  8   Foam
## 37    137  9   Foam
## 38    138 14 Tennis
## 39    139 10   Foam
## 40    140 11   Foam
## 41    141 15 Tennis
## 42    142 16 Rubber
## 43    143 17 Rubber
## 44    144 12   Foam
## 45    145 16 Tennis
## 46    146 13   Foam
## 47    147 14   Foam
## 48    148 17 Tennis
## 49    149 18 Tennis
## 50    150 19 Tennis
## 51    151 18 Rubber
## 52    152 19 Rubber
## 53    153 20 Rubber
## 54    154 21 Rubber
## 55    155 20 Tennis
## 56    156 15   Foam
## 57    157 16   Foam
## 58    158 22 Rubber
## 59    159 23 Rubber
## 60    160 24 Rubber
## 61    161 17   Foam
## 62    162 21 Tennis
## 63    163 25 Rubber
## 64    164 18   Foam
## 65    165 22 Tennis
## 66    166 19   Foam
## 67    167 23 Tennis
## 68    168 20   Foam
## 69    169 24 Tennis
## 70    170 21   Foam
## 71    171 22   Foam
## 72    172 25 Tennis
## 73    173 26 Tennis
## 74    174 26 Rubber
## 75    175 23   Foam
## 76    176 27 Tennis
## 77    177 24   Foam
## 78    178 27 Rubber
## 79    179 28 Tennis
## 80    180 28 Rubber
## 81    181 29 Tennis
## 82    182 29 Rubber
## 83    183 25   Foam
## 84    184 26   Foam
## 85    185 30 Rubber
## 86    186 31 Rubber
## 87    187 27   Foam
## 88    188 32 Rubber
## 89    189 30 Tennis
## 90    190 31 Tennis
## 91    191 33 Rubber
## 92    192 28   Foam
## 93    193 32 Tennis
## 94    194 29   Foam
## 95    195 33 Tennis
## 96    196 30   Foam
## 97    197 31   Foam
## 98    198 34 Tennis
## 99    199 32   Foam
## 100   200 35 Tennis
## 101   201 33   Foam
## 102   202 36 Tennis
## 103   203 34 Rubber
## 104   204 37 Tennis
## 105   205 34   Foam
## 106   206 35   Foam
## 107   207 35 Rubber
## 108   208 36   Foam
## 109   209 38 Tennis
## 110   210 36 Rubber
## 111   211 37 Rubber
## 112   212 37   Foam
## 113   213 38   Foam
## 114   214 38 Rubber
## 115   215 39   Foam
## 116   216 39 Rubber
## 117   217 39 Tennis
## 118   218 40 Tennis
## 119   219 41 Tennis
## 120   220 40 Rubber
## 121   221 41 Rubber
## 122   222 42 Rubber
## 123   223 43 Rubber
## 124   224 44 Rubber
## 125   225 42 Tennis
## 126   226 40   Foam
## 127   227 45 Rubber
## 128   228 43 Tennis
## 129   229 41   Foam
## 130   230 42   Foam
## 131   231 46 Rubber
## 132   232 43   Foam
## 133   233 47 Rubber
## 134   234 48 Rubber
## 135   235 49 Rubber
## 136   236 44   Foam
## 137   237 44 Tennis
## 138   238 45 Tennis
## 139   239 45   Foam
## 140   240 46   Foam
## 141   241 47   Foam
## 142   242 48   Foam
## 143   243 49   Foam
## 144   244 50 Rubber
## 145   245 46 Tennis
## 146   246 47 Tennis
## 147   247 51 Rubber
## 148   248 48 Tennis
## 149   249 49 Tennis
## 150   250 50   Foam
## 151   251 52 Rubber
## 152   252 51   Foam
## 153   253 50 Tennis
## 154   254 51 Tennis
## 155   255 53 Rubber
## 156   256 52 Tennis
## 157   257 53 Tennis
## 158   258 52   Foam
## 159   259 53   Foam

An example of our experiment data is shown below. The full file of data can be seen at https://raw.githubusercontent.com/vernonkat/Coursework/main/Data.csv

##   Distance Type
## 1       60 Foam
## 2       63 Foam
## 3       58 Foam
## 4       61 Foam
## 5       60 Foam
## 6       57 Foam

ANOVA Results

We then performed an ANOVA test to see if our data showed a significant difference between ball type. Here, our Null hypothesis is that the means of these 3 samples are statistically equal, and the alternative hypothesis is that they are not.

Ho: \(\mu1=\mu2=\mu3\)

Ha: \(\mu1\neq\mu2\neq\mu3\)

anovatest <- aov(Distance~Type,data=dat)
summary(anovatest)
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## Type          2   7636    3818   15.15 9.71e-07 ***
## Residuals   156  39317     252                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

We can see here that our P value is well below our alpha of .05, meaning our data does show a significant difference in mean distance given ball type, and we reject the null hypothesis. We can state that type of ball does influence how far the ball will go.

ANOVA Assumptions

Given that ANOVA result, we then checked to see if the model was adequate by looking at our variances and residuals for any patterns or indications of assumption violations.

We can see from our plots of residuals that our variance does seem to be roughly equal between ball types, and our data is normally distributed based on the Q-Q plot.

Our data also passes the ‘fat pencil test’ where it says that of all the data points in the normal probability plot are covered by a hypothetical fat pencil then it is normally distributed.

Tukey’s Test

We perform the Tukey’s test to find which types in particular showed a significant difference in the mean distance the ball went. Here, our null hypothesis is that each separate pairs of ball type means are not different from one another.

Ho: \(\mu1-\mu2=0\)

Ha: \(\mu1-\mu2\neq0\)

(run for every combination of the 3 types)

TukeyHSD(anovatest)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Distance ~ Type, data = dat)
## 
## $Type
##                     diff       lwr        upr     p adj
## Rubber-Foam    -7.905660 -15.20317 -0.6081502 0.0302362
## Tennis-Foam   -16.962264 -24.25977 -9.6647540 0.0000005
## Tennis-Rubber  -9.056604 -16.35411 -1.7590936 0.0106117
plot(TukeyHSD(anovatest))

Because each of these pairs does not cross zero, this tells us that each ball had an affect on the distance a ball went.

We can see this difference between the means in a more intuitive sense by plotting the distances for each type:

library(ggplot2)
ggplot(dat,aes(Distance,Type,color=Type))+geom_point(size=3)

As we can see, it’s clear that Foam and Tennis seem to be sortof centered around two different areas, while Rubber varies greatly. This corroborates with what we saw in the residual graphs as well, with one sample showing a wider spread then the other two.


Part 2 (Pin Elevation and Angle)

In part 2, we will perform a designed experiment to determine the effect of Pin Elevation and Release Angle on distance that the Red Tennis ball is thrown. The design will be replicated three times. We will be throwing our red tennis ball using the Statapult, and changing our settings from position 1 and 3 multiple times, and we’ll adjust angles at 110, 140, and 170.

Model Equation

We will be using a traditional value of .05 as our significance level.

Our model equation is as follows: \(y_{ijk}=\mu+\alpha_{i}+\beta_j+\alpha\beta_{ij}+\epsilon_{ijk}\)

Where \(\alpha\) represents the deviation from a mean given by the factor “pin setting”, and \(\beta\) represents the deviation from the mean given by the factor “throwing angle”. \(\alpha\beta_{ij}\) represents the possible deviation due to the interaction between the two factors.

Experiment Layout

Propose a layout with a randomized run order.

We did so through the design.ab function found in the agricolae package in R. As shown below:

library(agricolae)
layout <- design.ab(trt=c(2,3),r=3,design="crd")
layout$book
##    plots r A B
## 1    101 1 2 3
## 2    102 1 2 2
## 3    103 1 2 1
## 4    104 2 2 2
## 5    105 2 2 3
## 6    106 1 1 2
## 7    107 1 1 1
## 8    108 2 1 2
## 9    109 1 1 3
## 10   110 2 1 1
## 11   111 2 1 3
## 12   112 3 2 2
## 13   113 3 1 2
## 14   114 2 2 1
## 15   115 3 2 3
## 16   116 3 1 3
## 17   117 3 1 1
## 18   118 3 2 1

This chart shows us in what order to collect our data in, with the column ‘A’ indicating one of our two pin positions, and column ‘B’ indicating an angle to which the throwing arm was pulled to.

The ‘r’ column counts how many times the particular combination of pin position and throwing angle has occurred. Because we were requested to have 3 replications, each combination happens 3 times.

Collected Data

We have two factors to keep track of (Pin position and Angle of throwing arm). Our experiment was performed in the given order found in part a, and the results are shown below:

angles <- rep(c(110,140,170),6)
pinsetting <- c(rep(1,9),rep(3,9))
dat <- c(22,35,48,22,37,48,21,39,44,27,47,59,25,44,50,24,42,58)
dafr <- data.frame(angles,pinsetting)
dafr$result <- dat
head(dafr)
##   angles pinsetting result
## 1    110          1     22
## 2    140          1     35
## 3    170          1     48
## 4    110          1     22
## 5    140          1     37
## 6    170          1     48

ANOVA Results

Test the hypotheses and state conclusions, determining those effects that are significant. Show any plots that might be useful/necessary to show your findings. You may also show residual plots and make appropriate comments, but do not transform the data (i.e. use the raw data regardless of normality and variance constancy)

We will be using the GAD library in R in order to set our angle as a random factor, and our pin as a fixed. We will be testing (with our .05 significance level) whether either of the two factors have an impact on the distance our ball is thrown. For each factor, our null hypothesis will be if the error represented by that factor is equal to 0 or not. Mathematically:

Null:\(\alpha_{i}=0\) vs Alternative:\(\alpha_{i}\neq0\)

Null:\(\beta_{j}=0\) vs Alternative:\(\beta_{j}\neq0\)

Null:\(\alpha\beta_{ij}=0\) vs Alternative:\(\alpha\beta_{ij}\neq0\)

We will be using a significance level of .05

library(GAD)
dafr$angles <- as.random(dafr$angles)
dafr$pinsetting <- as.fixed(dafr$pinsetting)
model <- aov(result~angles+pinsetting+pinsetting*angles,dafr)
GAD::gad(model)
## Analysis of Variance Table
## 
## Response: result
##                   Df  Sum Sq Mean Sq  F value    Pr(>F)    
## angles             2 2340.78 1170.39 164.5859 1.893e-09 ***
## pinsetting         1  200.00  200.00  17.9104   0.05155 .  
## angles:pinsetting  2   22.33   11.17   1.5703   0.24787    
## Residual          12   85.33    7.11                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The interaction effect was definitely not present, at a p-value of .2. We can see that angle of the throwing arm is definitely significant with a p-value well less than .05. In this case, we would reject our null hypothesis and state that it does have an effect on the distance the ball was thrown. Pin Setting was very nearly significant in our model, only having 0.00155 difference between the significance level of .05 and its p-value.

par(mfrow=c(2,2))
plot(model)

From our residuals, we can see that our data may benefit from a transformation, as the residuals do show a difference in variance. However, we were told to not transform our data at this point in the project. We additionally can see that it does appear to be normally distributed though.


Part 3 (All Factors)

Finally, for part 3, we wished to test all 4 factors simultaneously

We used the design function in R for creating a \(2^4\) factorial design. Each factor (A,B,C,D) has two settings:

A: Pin Elevation (Position 1 or Position 3)

B: Bungee Position (Position 2 or Position 3)

C: Release Angle (\(140^\circ\) or \(170^\circ\))

D: Ball Type (Yellow Foam or Red Tennis)

Data Collection Layout

Each combination of these four factors with their two levels will be tested once in the order given below:

library(agricolae)
design <- design.ab(trt=c(2,2,2,2),r=1,design="crd")
design$book
##    plots r A B C D
## 1    101 1 2 1 2 1
## 2    102 1 2 1 2 2
## 3    103 1 2 2 1 2
## 4    104 1 1 1 2 2
## 5    105 1 1 1 1 1
## 6    106 1 1 2 1 2
## 7    107 1 2 1 1 1
## 8    108 1 1 2 2 1
## 9    109 1 1 2 1 1
## 10   110 1 1 1 1 2
## 11   111 1 2 2 1 1
## 12   112 1 1 1 2 1
## 13   113 1 2 2 2 2
## 14   114 1 2 2 2 1
## 15   115 1 1 2 2 2
## 16   116 1 2 1 1 2

Statapult

The picture above shows the locations of the pin elevation, angle of throw, and bungee position we are testing.

Collected Data

Our data was collected in the order given and is shown below:

A <- c(1,-1,1,-1,-1,1,-1,-1,1,-1,1,1,1,-1,-1,1)
B   <- c(-1,1,1,-1,-1,1,1,-1,1,1,-1,-1,1,1,-1,-1)
C <- c(1,1,-1,-1,1,1,-1,-1,-1,1,-1,-1,1,-1,1,1)
D   <- c(-1,1,1,-1,-1,-1,1,1,-1,-1,1,-1,1,-1,1,1)
RESULTS <- c(58,43,37,19,36,63,33,18,37,44,35,37,50,33,38,41)
dafr <- data.frame(RESULTS,A,B,C,D)
head(dafr)
##   RESULTS  A  B  C  D
## 1      58  1 -1  1 -1
## 2      43 -1  1  1  1
## 3      37  1  1 -1  1
## 4      19 -1 -1 -1 -1
## 5      36 -1 -1  1 -1
## 6      63  1  1  1 -1

The negative ones (-1) in our matrix represent the low level option (i.e. Position 2 of Bungee Position), and the positive ones (1) represent the high level option (i.e. Ball Type Red Tennis).

Model Equation Development

Our full model equation is:

\(Y_{i,j,k,l}\) = \(\mu\) + \(\alpha_i\) + \(\beta_j\) + \(\gamma_k\) + \(\delta_l\) + \(\alpha\beta_{i,j}\) + \(\alpha\gamma_{i,k}\) + \(\alpha\delta_{i,l}\) + \(\beta\gamma_{j,k}\) + \(\beta\delta_{j,l}\) + \(\gamma\delta_{k,l}\) + \(\alpha\beta\gamma_{i,j,k}\) + \(\alpha\gamma\delta_{i,k,l}\) + \(\beta\gamma\delta_{j,k,l}\) + \(\alpha\beta\gamma\delta_{i,j,k,l}\) + \(\epsilon_{i,j,k,l}\)

The following halfnormal plot shows which of the above factors from our full equation significantly deviate from the normal distribution.

library(DoE.base)
halfnormal(aov(RESULTS~A*B*C*D))

Using the halfnormal plot, we can see that only the main effects A (Pin Elevation) and C (Release Angle) are significant on the distance that the ball is launched. No interactions are significant in this experiment. This is interesting, as ball type was significant in Part 1 on our experiment. It seems when these other factors are taking into account, ball type becomes insignificant.

ANOVA

summary(aov(RESULTS~A+C))
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## A            1  552.3   552.3   13.28 0.002968 ** 
## C            1  961.0   961.0   23.11 0.000342 ***
## Residuals   13  540.5    41.6                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Our factors A (Pin Elevation) and C (Release Angle) are indeed significant with p-values below our significance level of 0.05. We will find their coefficients to make a final model equation to predict the ball’s launching distance.

coef(aov(RESULTS~A+C))
## (Intercept)           A           C 
##      38.875       5.875       7.750

Our final model equation for predicting the distance the ball will go is:

\(y=38.875+5.875A+7.750C\)

All Code Used:

library(pwr)
k <- 3
d <- .5
(pwr.anova.test(k=3,n=NULL,f=(d*sqrt((k^2)-1)/(2*k)),sig.level=0.05,power=0.75))#max

types <- c('Foam','Tennis','Rubber')
library(agricolae)
design <- design.crd(trt=types,r=53,seed=8238350)
design$book

dat <- read.csv('https://raw.githubusercontent.com/vernonkat/Coursework/main/Data.csv')
colnames(dat) <- c('Distance','Type')
dat <- dat[,1:2]
dat$Type <- factor(dat$Type)
head(dat)

anovatest <- aov(Distance~Type,data=dat)
summary(anovatest)

par(mfcol=c(1,2))
resonfit <- plot(anovatest,which=1)
stdresonfit <- plot(anovatest,which=3)
qq <- plot(anovatest,which=2)
stdresonfacotr <- plot(anovatest,which=5)

TukeyHSD(anovatest)
plot(TukeyHSD(anovatest))

library(ggplot2)
ggplot(dat,aes(Distance,Type,color=Type))+geom_point(size=3)



library(agricolae)
layout <- design.ab(trt=c(2,3),r=3,design="crd")
layout$book

angles <- rep(c(110,140,170),6)
pinsetting <- c(rep(1,9),rep(3,9))
dat <- c(22,    35, 48, 22  ,37,    48, 21  ,39,    44, 27, 47  ,59 ,25,    44, 50, 24, 42, 58)
dafr <- data.frame(angles,pinsetting)
dafr$result <- dat
head(dafr)

library(GAD)
dafr$angles <- as.random(dafr$angles)
dafr$pinsetting <- as.fixed(dafr$pinsetting)
model <- aov(result~angles+pinsetting+pinsetting*angles,dafr)
GAD::gad(model)

par(mfrow=c(2,2))
plot(model)


library(agricolae)
design <- design.ab(trt=c(2,2,2,2),r=1,design="crd")
design$book

A <- c(1,-1,1,-1,-1,1,-1,-1,1,-1,1,1,1,-1,-1,1)
B   <- c(-1,1,1,-1,-1,1,1,-1,1,1,-1,-1,1,1,-1,-1)
C <- c(1,1,-1,-1,1,1,-1,-1,-1,1,-1,-1,1,-1,1,1)
D   <- c(-1,1,1,-1,-1,-1,1,1,-1,-1,1,-1,1,-1,1,1)
RESULTS <- c(58,43,37,19,36,63,33,18,37,44,35,37,50,33,38,41)
dafr <- data.frame(RESULTS,A,B,C,D)
head(dafr)

library(DoE.base)
halfnormal(aov(RESULTS~A*B*C*D))

summary(aov(RESULTS~A+C))

coef(aov(RESULTS~A+C))