Alright let’s get our feet wet and see what R can do!

The following is called a code chunk. It contains R code that you can use

1 + 1
## [1] 2

Basic math

30 * 30
## [1] 900
(27 / 3) - 1
## [1] 8

Your turn! Do some simple math

5+8
## [1] 13

Dataset

For this exercise, we’ll use a dataset called ChickWeight. This dataset contains the weight of several chickens fed different diets.

  1. Run the following code to get help on the ChickWeight dataset
?ChickWeight
  1. View the first few rows of the data with this code:
head(ChickWeight)
##   weight Time Chick Diet
## 1     42    0     1    1
## 2     51    2     1    1
## 3     59    4     1    1
## 4     64    6     1    1
## 5     76    8     1    1
## 6     93   10     1    1
  1. View the entire dataset in a new window with this code:
View(ChickWeight)
  1. To see the names of the columns in the dataset, use the names() function:
names(ChickWeight)
## [1] "weight" "Time"   "Chick"  "Diet"

Basic descriptive statistics

Now, let’s calculate some simple descriptive statistics.

  1. What was the median weight of the chicks across all time periods?
median(ChickWeight$weight)
## [1] 103
  1. What was the standard deviation of the weights of the chicks?
sd(ChickWeight$weight)
## [1] 71.07196
  1. What was the minimum time point?
min(ChickWeight$Time)
## [1] 0
  1. Your turn! What was the maximum time point?
max(ChickWeight$Time)
## [1] 21

Grouped aggregation

Now, we’ll calculate statistics for different groups

  1. What was the mean weight at each time point?
aggregate(formula = weight ~ Time,
          FUN = mean,
          data = ChickWeight)
##    Time    weight
## 1     0  41.06000
## 2     2  49.22000
## 3     4  59.95918
## 4     6  74.30612
## 5     8  91.24490
## 6    10 107.83673
## 7    12 129.24490
## 8    14 143.81250
## 9    16 168.08511
## 10   18 190.19149
## 11   20 209.71739
## 12   21 218.68889
  1. What was the maximum weight at each time point?
aggregate(formula = weight ~ Time,
          FUN = max,
          data = ChickWeight)
##    Time weight
## 1     0     43
## 2     2     55
## 3     4     69
## 4     6     96
## 5     8    131
## 6    10    163
## 7    12    217
## 8    14    240
## 9    16    287
## 10   18    332
## 11   20    361
## 12   21    373
  1. Your turn! What was the median weight for each diet?
aggregate(formula = weight ~ Diet,
          FUN = median,
          data = ChickWeight)
##   Diet weight
## 1    1   88.0
## 2    2  104.5
## 3    3  125.5
## 4    4  129.5

Plotting

  1. Create a histogram of all weights
hist(x = ChickWeight$weight,
     xlab = "Weights",
     main = "Distribution of Chick weights"
     )

  1. Create a histogram of weights for each diet
par(mfrow = c(2, 2))

for (diet.i in 1:4) {
  
  hist(x = ChickWeight$weight[ChickWeight$Diet == diet.i],
       xlab = "weights",
       xlim = c(0, 400),
       main = paste("Chick Weights\nDiet ", diet.i, sep = "")
       )
  
}

  1. Create a PireatePlot of the weights
library(yarrr)

par(mfrow = c(1, 1))

pirateplot(dv.name = "weight",
           iv.name = "Diet",
           data = ChickWeight
           )

  1. Show the change in median weights over time
# Set up the plot
plot(1, 
     xlim = c(0, 25), 
     ylim = c(0, 400), 
     type = "n",
     xlab = "Week",
     ylab = "Median Weight",
     main = "Chick Weights across Time"
     )

# Loop over diets
for(diet.i in 1:4) {

data.temp <-  subset(ChickWeight, Diet == diet.i)
  
# Add raw data
  
points(data.temp$Time,
       y = data.temp$weight,
       pch = 16,
       col = piratepal("basel", trans = .9)[diet.i]
       )
  
  
# Calculate average weight for current diet
avg.weights <- aggregate(formula = weight ~ Time, 
                         data = data.temp, 
                         FUN = median)

# Add average lines
lines(x = avg.weights$Time, 
      y = avg.weights$weight,
      col = piratepal("basel")[diet.i],
      lwd = 2
      )
}

# Add legend
legend("topleft",
       legend = paste("Diet.", 1:4, sep = ""),
       lwd = 2,
       col = piratepal("basel")[1:4],
       bty = "n"
       )

Frequentist statistics

  1. Is there a significant effect of time on weight?
time.weight.aov <- aov(formula = weight ~ Time,
       data = ChickWeight)

summary(time.weight.aov)
##              Df  Sum Sq Mean Sq F value Pr(>F)    
## Time          1 2042344 2042344    1349 <2e-16 ***
## Residuals   576  872212    1514                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Yes, there is a significant effect of time on weight: F(1, 576) = 1349, p < .01

  1. Your turn! Is there a significant effect of Diet on weight?
diet.weight.aov <- aov(formula = weight ~ Diet,
       data = ChickWeight)

summary(diet.weight.aov)
##              Df  Sum Sq Mean Sq F value   Pr(>F)    
## Diet          3  155863   51954   10.81 6.43e-07 ***
## Residuals   574 2758693    4806                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Yes, there is a significant effect of time on weight: F(3, 574) = 10.81, p < .01

Make fun plots!

  1. Use a Basel themed color palette
piratepal("basel", action = "show")
## Loading required package: jpeg

  1. Use the palette to make a pretty scatterplot
x <- rnorm(1000)
y <- x + rnorm(1000)

par(mfrow = c(1, 2))

plot(x, y, main = "Boring Plot")


plot(x, y, 
     main = "Basel themed plot!",
     pch = 16, 
     cex = rnorm(1000, sd = 1),
     bty = "n",
     xlab = "",
     ylab = "",
     col = piratepal("basel", trans = .5)
     
     )

Make up data

i <- 0
current.p <- 1

while (current.p > .05) {
  
i <- i + 1

males <- round(rnorm(n = 10, mean = 100, sd = 1), 0)
females <- round(rnorm(n = 10, mean = 100, sd = 1), 0)

test.result <- t.test(x = males, y = females)
 
current.p <- test.result$p.value
 
}

print(paste("I collected data from 10 males and 10 females. The mean of males was ", 
      round(mean(males), 2),
      ". The mean of females was ",
      round(mean(females), 2),
      ". A t-test was significant: t(",
      round(test.result$parameter, 2), ") = ", 
      round(test.result$statistic, 2),
      ", p = ", 
      round(test.result$p.value, 2),
      " (in reality, I collected data from ",
      i, " groups, but I'm only showing you the one group that showed a significant effect!)",
      sep = ""
      ))
## [1] "I collected data from 10 males and 10 females. The mean of males was 100.3. The mean of females was 99.7. A t-test was significant: t(16.3) = 2.29, p = 0.04 (in reality, I collected data from 7 groups, but I'm only showing you the one group that showed a significant effect!)"