Lab 2 Lab Manual Exercise

my_numbers <- c(1,2,3,4)

# Create a sequence of numbers and populate the variable one_to_one_hundred
one_to_one_hundred <- seq(1,100,1)

# Create repeated numbers 
rep(10,5)

## [1] 10 10 10 10 10

rep(1,25)

##  [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1

# Create variable all_together_now. 
all_together_now <- c(rep(10,5),rep(1,25))

# Sum across numbers. 
one_to_one_hundred <- seq(1,100,1)
sum(one_to_one_hundred)

## [1] 5050

# How many numbers in variable one_to_one_hundred. 
length(one_to_one_hundred)

## [1] 100

# Generate mean of one-to-one-hundred variable. 
sum(one_to_one_hundred)/length(one_to_one_hundred)

## [1] 50.5

# Faster way to generate mean
mean(one_to_one_hundred)

## [1] 50.5

# Generate median. 
median(c(1,2,3))

## [1] 2

# Create function to calculate the Mode. 
getmode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

mode_value <- getmode(one_to_one_hundred)
mode_value

## [1] 1

# Create Histogram
sample_numbers <-rnorm(100,10,5)
hist(sample_numbers)

# Create Range 
range(sample_numbers)

## [1] -1.625525 23.064066

# Create Variance. 
var(sample_numbers)

## [1] 22.89914

# Create Standard Deviation
sd(sample_numbers)

## [1] 4.785304

# All Descriptives
sample_numbers<- rnorm(100,10,5)
sum(sample_numbers)

## [1] 1036.165

length(sample_numbers)

## [1] 100

mean(sample_numbers)

## [1] 10.36165

median(sample_numbers)

## [1] 10.33754

range(sample_numbers)

## [1] -0.8191308 25.1298302

var(sample_numbers)

## [1] 21.89019

sd(sample_numbers)

## [1] 4.678695

# Descriptives by Conditions. 
scores <- rnorm(100,10,5)
conditions <- rep(c("A","B","C","D","E","F","G","H","I","J"), each =10)
my_df <- data.frame(conditions,scores)

# Group_by and summarize across conditions. 
library(dplyr)
summary_df <- my_df %>%
group_by(conditions) %>%
summarise(means = mean(scores))
knitr::kable(summary_df)

conditions	means
A	10.678686
B	8.711485
C	9.143707
D	12.648255
E	9.286208
F	10.899951
G	10.182934
H	7.571149
I	9.482191
J	11.328197

# Group Mutiple Descriptives 
summary_df <- my_df %>%
group_by(conditions) %>%
summarise(means = mean(scores),
sds = sd(scores))
knitr::kable(summary_df)

conditions	means	sds
A	10.678686	5.370312
B	8.711485	5.987950
C	9.143707	5.821652
D	12.648255	3.314298
E	9.286208	4.384743
F	10.899951	1.843308
G	10.182934	4.814669
H	7.571149	5.085531
I	9.482191	2.703270
J	11.328197	4.590326

summary_df <- my_df %>%
group_by(conditions) %>%
summarise(means = mean(scores),
sds = sd(scores),
min = min(scores),
max = max(scores))
knitr::kable(summary_df)

conditions	means	sds	min	max
A	10.678686	5.370312	2.1710948	19.87210
B	8.711485	5.987950	0.1184026	18.71503
C	9.143707	5.821652	4.2484013	21.39294
D	12.648255	3.314298	5.1351017	16.90735
E	9.286208	4.384743	2.7631865	15.24517
F	10.899951	1.843308	7.9970792	13.91989
G	10.182934	4.814669	3.1764056	17.43008
H	7.571149	5.085531	-0.9220949	14.38264
I	9.482191	2.703270	3.8332531	12.86312
J	11.328197	4.590326	2.6498537	15.81329

Lab 2 Generalization exercises

library(gapminder)
gapminder_df <- gapminder
summary_df <- gapminder_df %>%
group_by(continent) %>%
summarise(means = mean(lifeExp),
sds = sd(lifeExp),
min = min(lifeExp),
max = max(lifeExp))
knitr::kable(summary_df)

continent	means	sds	min	max
Africa	48.86533	9.150210	23.599	76.442
Americas	64.65874	9.345088	37.579	80.653
Asia	60.06490	11.864532	28.801	82.603
Europe	71.90369	5.433178	43.585	81.757
Oceania	74.32621	3.795611	69.120	81.235

summary_all <- gapminder_df %>%
  summarise(means = mean(lifeExp, na.rm = TRUE),
            sds = sd(lifeExp, na.rm = TRUE),
            min = min(lifeExp, na.rm = TRUE),
            max = max(lifeExp, na.rm = TRUE))

knitr::kable(summary_all)

means	sds	min	max
59.47444	12.91711	23.599	82.603

summary_2007_all <- gapminder_df %>%
  filter(year == 2007) %>%
  summarise(means = mean(lifeExp, na.rm = TRUE),
            sds = sd(lifeExp, na.rm = TRUE),
            min = min(lifeExp, na.rm = TRUE),
            max = max(lifeExp, na.rm = TRUE))

knitr::kable(summary_2007_all)

means	sds	min	max
67.00742	12.07302	39.613	82.603

For the life expectancy for all the gapminder data across all the years and countries the mean is 59.47444, the sd is 12.91711, the min is 23.599, and the max is 82.603.
For 2007 the summary statistics for life expentacy for all continents is mean of 67, sd of 12, min of 39.6 and max of 82.6.

Lab 2 Written answer question

The mode is the value that appears most frequently in a dataset.
A dataset with two modes would only happen if there is two numbers that have the same high frequency.
The median is the middle value in a data set when values are in assending or descending order.
The mean is the average of a data set.
The range is the difference between the highest and lowest values in a data set.
The difference scores repersent the distance between each data point and the mean of the data set.
The difference scores are squared to eliminate the effect of negative values.
The varianceis the square of the standard deviation Since the variance of the second set is 10^2 = 100 and the first set is 5^2 = 25 therefore the second set variability.

Lab 2 Descriptives Harper Pemberton

Harper Pemberton

2/23/2025

Lab 2 Lab Manual Exercise

Lab 2 Generalization exercises

Lab 2 Written answer question