blkwhite <- as.matrix(read.csv("C:/Users/clair/OneDrive/Documents/AAA Spring 2025/Data/Final Project/blkwhitecontrol.csv",header=FALSE))
brds_0402 <- as.matrix(read.csv("C:/Users/clair/OneDrive/Documents/AAA Spring 2025/Data/Final Project/040225.csv",header=FALSE))
brds_0430 <- as.matrix(read.csv("C:/Users/clair/OneDrive/Documents/AAA Spring 2025/Data/Final Project/043025_brds.csv", header=FALSE))
set.seed(1234)
clsize <- 4
blkwhite_vec <- as.vector(blkwhite)
tt <- stats::kmeans(blkwhite_vec, centers = clsize, nstart = 10)
cluster_colors <- hcl.colors(clsize, "YlOrRd")
cluster_ordered <- tt$cluster
df_blkwhite <- data.frame(temp = blkwhite_vec, cluster = factor(cluster_ordered))
par(mar = c(5.1, 4.1, 4.1, 2.1))
hist_data <- hist(blkwhite_vec, breaks = "FD", plot = FALSE)
breaks <- hist_data$breaks
mids <- hist_data$mids
barcolors <- character(length(mids))
for (j in seq_along(mids)) {
bin_min <- breaks[j]
bin_max <- breaks[j + 1]
in_bin <- df_blkwhite$temp >= bin_min & df_blkwhite$temp < bin_max
bin_clusters <- df_blkwhite$cluster[in_bin]
barcolors[j] <- "gray90"
if (length(bin_clusters) > 0) {
tab <- table(bin_clusters)
most_common <- names(tab)[which.max(tab)]
cluster_num <- suppressWarnings(as.numeric(most_common))
if (!is.na(cluster_num) && cluster_num >= 1 && cluster_num <= clsize) {
barcolors[j] <- cluster_colors[cluster_num]
}
}
}
graphics::barplot(
hist_data$counts,
col = barcolors,
names.arg = round(mids, 1),
xlab = "Temperature (F)",
ylab = "Frequency",
main = "Black and White Boards Histogram by Cluster"
)
set.seed(1234)
clsize <- 4
brds_0402_vec <- as.vector(brds_0402)
tt <- stats::kmeans(brds_0402_vec, centers = clsize, nstart = 10)
cluster_colors <- hcl.colors(clsize, "YlOrRd")
cluster_ordered <- tt$cluster
df_0402 <- data.frame(temp = brds_0402_vec, cluster = factor(cluster_ordered))
par(mar = c(5.1, 4.1, 4.1, 2.1))
hist_data <- hist(brds_0402_vec, breaks = "FD", plot = FALSE)
breaks <- hist_data$breaks
mids <- hist_data$mids
barcolors <- character(length(mids))
for (j in seq_along(mids)) {
bin_min <- breaks[j]
bin_max <- breaks[j + 1]
in_bin <- df_0402$temp >= bin_min & df_0402$temp < bin_max
bin_clusters <- df_0402$cluster[in_bin]
barcolors[j] <- "gray90"
if (length(bin_clusters) > 0) {
tab <- table(bin_clusters)
most_common <- names(tab)[which.max(tab)]
cluster_num <- suppressWarnings(as.numeric(most_common))
if (!is.na(cluster_num) && cluster_num >= 1 && cluster_num <= clsize) {
barcolors[j] <- cluster_colors[cluster_num]
}
}
}
graphics::barplot(
hist_data$counts,
col = barcolors,
names.arg = round(mids, 1),
xlab = "Temperature (F)",
ylab = "Frequency",
main = "Black, White and Seedum Histogram by Cluster, April 2nd"
)
library(ggplot2)
blkwhite_vec <- as.vector(blkwhite)
brds_0402_vec <- as.vector(brds_0402)
brds_0430_vec <- as.vector(brds_0430)
temp_df <- data.frame(
Temperature = c(blkwhite_vec, brds_0402_vec, brds_0430_vec),
Source = factor(c(
rep("blkwhite", length(blkwhite_vec)),
rep("brds_0402", length(brds_0402_vec)),
rep("brds_0430", length(brds_0430_vec))
))
)
ggplot(temp_df, aes(x = Source, y = Temperature, fill = Source)) +
geom_boxplot(alpha = 0.7) +
scale_fill_manual(values = c("darkblue", "darkgreen", "firebrick")) +
labs(title = "Boxplot of Temperatures by Source", x = "Source", y = "Temperature (\u00b0F)") +
theme_minimal()
set.seed(1234)
clsize <- 4
brds_0430_vec <- as.vector(brds_0430)
tt <- stats::kmeans(brds_0430_vec, centers = clsize, nstart = 10)
cluster_colors <- hcl.colors(clsize, "YlOrRd")
cluster_ordered <- tt$cluster
df_0430 <- data.frame(temp = brds_0430_vec, cluster = factor(cluster_ordered))
par(mar = c(5.1, 4.1, 4.1, 2.1))
hist_data <- hist(brds_0430_vec, breaks = "FD", plot = FALSE)
breaks <- hist_data$breaks
mids <- hist_data$mids
barcolors <- character(length(mids))
for (j in seq_along(mids)) {
bin_min <- breaks[j]
bin_max <- breaks[j + 1]
in_bin <- df_0430$temp >= bin_min & df_0430$temp < bin_max
bin_clusters <- df_0430$cluster[in_bin]
barcolors[j] <- "gray90"
if (length(bin_clusters) > 0) {
tab <- table(bin_clusters)
most_common <- names(tab)[which.max(tab)]
cluster_num <- suppressWarnings(as.numeric(most_common))
if (!is.na(cluster_num) && cluster_num >= 1 && cluster_num <= clsize) {
barcolors[j] <- cluster_colors[cluster_num]
}
}
}
graphics::barplot(
hist_data$counts,
col = barcolors,
names.arg = round(mids, 1),
xlab = "Temperature (F)",
ylab = "Frequency",
main = "Black, White and Seedum Histogram by Cluster, April 30th"
)
Note that the
echo = FALSE parameter was added to the code
chunk to prevent printing of the R code that generated the plot.