Lab 2 Lab Manual Exercise

my_numbers <- c(1,2,3,4)
# Create a sequence of numbers and populate the variable one_to_one_hundred
one_to_one_hundred <- seq(1,100,1)
# Create repeated numbers 
rep(10,5)
## [1] 10 10 10 10 10
rep(1,25)
##  [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
# Create variable all_together_now. 
all_together_now <- c(rep(10,5),rep(1,25))
# Sum across numbers. 
one_to_one_hundred <- seq(1,100,1)
sum(one_to_one_hundred)
## [1] 5050
# How many numbers in variable one_to_one_hundred. 
length(one_to_one_hundred)
## [1] 100
# Generate mean of one-to-one-hundred variable. 
sum(one_to_one_hundred)/length(one_to_one_hundred)
## [1] 50.5
# Faster way to generate mean
mean(one_to_one_hundred)
## [1] 50.5
# Generate median. 
median(c(1,2,3))
## [1] 2
# Create function to calculate the Mode. 
getmode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

mode_value <- getmode(one_to_one_hundred)
mode_value
## [1] 1
# Create Histogram
sample_numbers <-rnorm(100,10,5)
hist(sample_numbers)

# Create Range 
range(sample_numbers)
## [1] -1.625525 23.064066
# Create Variance. 
var(sample_numbers)
## [1] 22.89914
# Create Standard Deviation
sd(sample_numbers)
## [1] 4.785304
# All Descriptives
sample_numbers<- rnorm(100,10,5)
sum(sample_numbers)
## [1] 1036.165
length(sample_numbers)
## [1] 100
mean(sample_numbers)
## [1] 10.36165
median(sample_numbers)
## [1] 10.33754
range(sample_numbers)
## [1] -0.8191308 25.1298302
var(sample_numbers)
## [1] 21.89019
sd(sample_numbers)
## [1] 4.678695
# Descriptives by Conditions. 
scores <- rnorm(100,10,5)
conditions <- rep(c("A","B","C","D","E","F","G","H","I","J"), each =10)
my_df <- data.frame(conditions,scores)
# Group_by and summarize across conditions. 
library(dplyr)
summary_df <- my_df %>%
group_by(conditions) %>%
summarise(means = mean(scores))
knitr::kable(summary_df)
conditions means
A 10.678686
B 8.711485
C 9.143707
D 12.648255
E 9.286208
F 10.899951
G 10.182934
H 7.571149
I 9.482191
J 11.328197
# Group Mutiple Descriptives 
summary_df <- my_df %>%
group_by(conditions) %>%
summarise(means = mean(scores),
sds = sd(scores))
knitr::kable(summary_df)
conditions means sds
A 10.678686 5.370312
B 8.711485 5.987950
C 9.143707 5.821652
D 12.648255 3.314298
E 9.286208 4.384743
F 10.899951 1.843308
G 10.182934 4.814669
H 7.571149 5.085531
I 9.482191 2.703270
J 11.328197 4.590326
summary_df <- my_df %>%
group_by(conditions) %>%
summarise(means = mean(scores),
sds = sd(scores),
min = min(scores),
max = max(scores))
knitr::kable(summary_df)
conditions means sds min max
A 10.678686 5.370312 2.1710948 19.87210
B 8.711485 5.987950 0.1184026 18.71503
C 9.143707 5.821652 4.2484013 21.39294
D 12.648255 3.314298 5.1351017 16.90735
E 9.286208 4.384743 2.7631865 15.24517
F 10.899951 1.843308 7.9970792 13.91989
G 10.182934 4.814669 3.1764056 17.43008
H 7.571149 5.085531 -0.9220949 14.38264
I 9.482191 2.703270 3.8332531 12.86312
J 11.328197 4.590326 2.6498537 15.81329

Lab 2 Generalization exercises

library(gapminder)
gapminder_df <- gapminder
summary_df <- gapminder_df %>%
group_by(continent) %>%
summarise(means = mean(lifeExp),
sds = sd(lifeExp),
min = min(lifeExp),
max = max(lifeExp))
knitr::kable(summary_df)
continent means sds min max
Africa 48.86533 9.150210 23.599 76.442
Americas 64.65874 9.345088 37.579 80.653
Asia 60.06490 11.864532 28.801 82.603
Europe 71.90369 5.433178 43.585 81.757
Oceania 74.32621 3.795611 69.120 81.235
summary_all <- gapminder_df %>%
  summarise(means = mean(lifeExp, na.rm = TRUE),
            sds = sd(lifeExp, na.rm = TRUE),
            min = min(lifeExp, na.rm = TRUE),
            max = max(lifeExp, na.rm = TRUE))

knitr::kable(summary_all)
means sds min max
59.47444 12.91711 23.599 82.603
summary_2007_all <- gapminder_df %>%
  filter(year == 2007) %>%
  summarise(means = mean(lifeExp, na.rm = TRUE),
            sds = sd(lifeExp, na.rm = TRUE),
            min = min(lifeExp, na.rm = TRUE),
            max = max(lifeExp, na.rm = TRUE))

knitr::kable(summary_2007_all)
means sds min max
67.00742 12.07302 39.613 82.603
  1. For the life expectancy for all the gapminder data across all the years and countries the mean is 59.47444, the sd is 12.91711, the min is 23.599, and the max is 82.603.
  2. For 2007 the summary statistics for life expentacy for all continents is mean of 67, sd of 12, min of 39.6 and max of 82.6.

Lab 2 Written answer question

  1. The mode is the value that appears most frequently in a dataset.
  2. A dataset with two modes would only happen if there is two numbers that have the same high frequency.
  3. The median is the middle value in a data set when values are in assending or descending order.
  4. The mean is the average of a data set.
  5. The range is the difference between the highest and lowest values in a data set.
  6. The difference scores repersent the distance between each data point and the mean of the data set.
  7. The difference scores are squared to eliminate the effect of negative values.
  8. The varianceis the square of the standard deviation Since the variance of the second set is 10^2 = 100 and the first set is 5^2 = 25 therefore the second set variability.