#read data file
midwest <- read.csv(file="https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/ggplot2/midwest.csv", header=TRUE, sep=",")
#colnames(midwest)
loading libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.1 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)
CREATING SAMPLES
selected_columns <- midwest[, c("popdensity", "state", "popwhite", "popblack", "inmetro", "percollege")]
num_samples <- sample(5:10, 1)
subsample_size <- floor(nrow(selected_columns) * 0.5)
sampled_data <- list()
#Generate random samples with replacement
for (i in 1:num_samples) {
sample_indices <- sample(nrow(selected_columns), size = subsample_size, replace = TRUE)
sampled_data[[i]] <- selected_columns[sample_indices, ]
#data frame for each subsample
assign(paste0("sample_", i), sampled_data[[i]])
}
Bar graphs to compare states and their population density:
bar_graphs <- list()
# Create and store bar graphs for each sample
for (i in 1:length(sampled_data)) {
bar_graph <- ggplot(sampled_data[[i]], aes(x = state, y = popdensity)) +
geom_bar(stat = "identity") +
labs(title = paste("Sample", i))
# Store the bar graph in the list
bar_graphs[[i]] <- bar_graph
}
#Visualize the individual bar graphs
for (i in 1:length(bar_graphs)) {
print(bar_graphs[[i]])
}
Distribution of population by race in different states
pop_race <- list()
for (i in 1:length(sampled_data)) {
long_data <- gather(sampled_data[[i]], key = "population", value = "count", popwhite, popblack)
# Create a grouped bar chart
popracegraph <- ggplot(long_data, aes(x = state, y = count, fill = population)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Population by Race and State", y = "Population") +
scale_fill_manual(values = c("popwhite" = "blue", "popblack" = "red")) +
theme_minimal()
# Store the bar graph in the list
pop_race[[i]] <- popracegraph
}
#Visualize the individual bar graphs
for (i in 1:length(pop_race)) {
print(pop_race[[i]])
}
Statistical Tests (t-test): results provide insights into how different sub-samples compare in terms of the variable “popdensity”
t_test_results <- lapply(2:num_samples, function(i) {
t.test(sampled_data[[1]]$popdensity, sampled_data[[i]]$popdensity)
})
#t_test_results