Module 09 Exercise

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.0.5

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(magrittr)
deer = read.csv("Deer.csv")

t-test for all 3 actors

Legolas = rnorm(50, mean=195, sd=15)
aragorn = rnorm(50, mean=180, sd=10)
gimli = rnorm(50, 132, 15)
t.test(aragorn, Legolas, alternative = "greater")

## 
##  Welch Two Sample t-test
## 
## data:  aragorn and Legolas
## t = -5.7528, df = 79.373, p-value = 1
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  -18.31623       Inf
## sample estimates:
## mean of x mean of y 
##  179.9778  194.1842

t.test(gimli, Legolas, alternative = "greater")

## 
##  Welch Two Sample t-test
## 
## data:  gimli and Legolas
## t = -17.734, df = 94.487, p-value = 1
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  -64.95049       Inf
## sample estimates:
## mean of x mean of y 
##  134.7966  194.1842

F-test for gimli and legolas

var.test(gimli, Legolas)

## 
##  F test to compare two variances
## 
## data:  gimli and Legolas
## F = 1.4777, num df = 49, denom df = 49, p-value = 0.1752
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.8385758 2.6040365
## sample estimates:
## ratio of variances 
##           1.477729

Correlation for the 3 individual species

setosa = iris %>%
  filter(Species == "setosa")
versicolor = iris %>%
  filter(Species == "versicolor")
virginica = iris %>%
  filter(Species == "virginica")
cor.test(setosa$Sepal.Length, setosa$Sepal.Width)

## 
##  Pearson's product-moment correlation
## 
## data:  setosa$Sepal.Length and setosa$Sepal.Width
## t = 7.6807, df = 48, p-value = 6.71e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5851391 0.8460314
## sample estimates:
##       cor 
## 0.7425467

cor.test(versicolor$Sepal.Length, versicolor$Sepal.Width)

## 
##  Pearson's product-moment correlation
## 
## data:  versicolor$Sepal.Length and versicolor$Sepal.Width
## t = 4.2839, df = 48, p-value = 8.772e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2900175 0.7015599
## sample estimates:
##       cor 
## 0.5259107

cor.test(virginica$Sepal.Length, virginica$Sepal.Width)

## 
##  Pearson's product-moment correlation
## 
## data:  virginica$Sepal.Length and virginica$Sepal.Width
## t = 3.5619, df = 48, p-value = 0.0008435
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2049657 0.6525292
## sample estimates:
##       cor 
## 0.4572278

There appears to be little correlation between the length and width for each individual species given the low p values.

Deer caught per month

table(deer$Month)

## 
##   1   2   3   4   5   6   7   8   9  10  11  12 
## 256 165  27   3   2  35  11  19  58 168 189 188

chisq.test(table(deer$Month))

## 
##  Chi-squared test for given probabilities
## 
## data:  table(deer$Month)
## X-squared = 997.07, df = 11, p-value < 2.2e-16

The p value suggests a significant difference in deer caught per month

Tb across farms

table(deer$Tb, deer$Farm)

##    
##      AL  AU  BA  BE  CB CRC  HB LCV  LN MAN  MB  MO  NC  NV  PA  PN  QM  RF  RN
##   0  10  23  67   7  88   4  22   0  28  27  16 186  24  18  11  39  67  23  21
##   1   3   0   5   0   3   0   1   1   6  24   5  31   4   1   0   0   7   1   0
##    
##      RO SAL SAU  SE  TI  TN VISO  VY
##   0  31   0   3  16   9  16   13  15
##   1   0   1   0  10   0   2    1   4

chisq.test(table(deer$Tb, deer$Farm))

## Warning in chisq.test(table(deer$Tb, deer$Farm)): Chi-squared approximation may
## be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  table(deer$Tb, deer$Farm)
## X-squared = 129.09, df = 26, p-value = 1.243e-15

The p value suggests there is a large difference in the distribution of tb between farms.