Statistics is a field of study that uses mathematical techniques to gather, review, analyze, and draw conclusions from data. It is a critical part of decision-making and is used to identify patterns, trends, and relationships in diverse datasets.
We will work with numeric data, categorical data, and data with multiple variables.
The following 50 values are fake IQ values for 50 randomly selected people from a population:
106, 108, 92, 127, 104, 102, 119, 89, 93, 136, 100, 125, 78, 97, 106, 105, 85, 100, 84, 111, 116, 67, 119, 81, 107, 110, 97, 90, 102, 107, 113, 69, 75, 121, 116, 107, 111, 114, 60, 117, 93, 103, 96, 113, 95, 108, 94, 84, 118, 111
We will use the data to:
Another set of data we will use are fake grades of the 50 people:
B, B, B, B, B, B, C, B, F, B, B, F, C, B, B, B, C, B, C, B, D, C, C, B, A, C, A, B, A, B, B, C, D, B, B, B, C, A, D, C, D, A, B, B, B, B, B, B, C, B
We will use the data to:
A third set of data we will use are the (fake) numbers of good friends of the 50 people:
32, 28, 25, 29, 24, 22, 31, 20, 27, 47, 24, 33, 14, 25, 29, 32, 20, 18, 21, 38, 40, 11, 28, 33, 30, 32, 29, 16, 25, 23, 30, 16, 11, 39, 36, 26, 34, 37, 17, 34, 25, 29, 34, 39, 30, 30, 27, 24, 30, 24
We will use the data to:
Below is the R code used for all analyses and visualizations in this report.
# IQ values dataset
iq_data <- c(106, 108, 92, 127, 104, 102, 119, 89, 93, 136, 100, 125, 78, 97, 106, 105, 85, 100, 84, 111, 116, 67, 119, 81,
107, 110, 97, 90, 102, 107, 113, 69, 75, 121, 116, 107, 111, 114, 60, 117, 93, 103, 96, 113, 95, 108, 94, 84, 118, 111)
# Summary statistics
summary(iq_data)
mean(iq_data)
median(iq_data)
range(iq_data)
# Sample standard deviation
sd(iq_data)
# Histogram
hist(iq_data, main = "Histogram of IQ Values", xlab = "IQ", col = "skyblue", border = "black")
# Boxplot
boxplot(iq_data, main = "Boxplot of IQ Values", ylab = "IQ", col = "lightgreen")
# Grades dataset
grades_data <- c("B", "B", "B", "B", "B", "B", "C", "B", "F", "B", "B", "F", "C", "B", "B", "B", "C", "B", "C", "B",
"D", "C", "C", "B", "A", "C", "A", "B", "A", "B", "B", "C", "D", "B", "B", "B", "C", "A", "D", "C", "D", "A",
"B", "B", "B", "B", "B", "B", "C", "B")
# Barplot of grade distribution
barplot(table(grades_data), main = "Distribution of Grades", col = "lightblue",
xlab = "Grades", ylab = "Frequency", border = "black")
# Number of good friends dataset
friends_data <- c(32, 28, 25, 29, 24, 22, 31, 20, 27, 47, 24, 33, 14, 25, 29, 32, 20, 18, 21, 38, 40, 11, 28, 33, 30, 32, 29,
16, 25, 23, 30, 16, 11, 39, 36, 26, 34, 37, 17, 34, 25, 29, 34, 39, 30, 30, 27, 24, 30, 24)
# Scatterplot of IQ vs. number of good friends
plot(iq_data, friends_data, main = "IQ vs. Number of Good Friends", xlab = "IQ", ylab = "Number of Good Friends",
pch = 19, col = "darkblue")
# Create a factor for good vs. bad grades
grades_factor <- factor(grades_data, levels = c("A", "B", "C", "D", "F"))
good_grades <- iq_data[grades_factor %in% c("A", "B")]
bad_grades <- iq_data[grades_factor %in% c("C", "D", "F")]
good_friends_good_grades <- friends_data[grades_factor %in% c("A", "B")]
good_friends_bad_grades <- friends_data[grades_factor %in% c("C", "D", "F")]
# Scatterplot for good grades
plot(good_grades, good_friends_good_grades, main = "IQ vs. Number of Good Friends (Good Grades)",
xlab = "IQ", ylab = "Number of Good Friends", pch = 19, col = "green", xlim = range(iq_data), ylim = range(friends_data))
# Add bad grades data to the same plot
points(bad_grades, good_friends_bad_grades, pch = 19, col = "red")
legend("topright", legend = c("Good Grades", "Bad Grades"), col = c("green", "red"), pch = 19, bty = "n")