Breaking Down Homework #4
In this homework assignment, we look into data structures like lists and dataframes that simplify:
Also introduced the factor, which is a special kind of variable that is used to represent categorical objects.
From the spuRs package you can obtain the dataset ufc.csv, with forest inventory observations from the University of Idaho Experimental Forest.
What are the species of the three tallest trees? Of the five fattest trees? (Use the order command.)
What are the mean diameters by species?
What are the two species that have the largest third quartile diameters?
What are the two species with the largest median slenderness (height/diameter) ratios? How about the two species with the smallest median slenderness ratios?
What is the identity of the tallest tree of the species that was the fattest on average?
Before answering any question we have to upload ufc data
What’s contained in this dataset?
Now onto part a)!
We want to use the order function to determine the heights of trees in the ufc dataset
Now, for the fattest trees we will be using the dbh.cm column
tapply() is a function that allows us to vectorize the application of a function to subsets of data. In conjunction with factors, this can make for exceptionally efficient code. it looks like the form: tapply(X, INDEX, FUN, ...)
Using tapply() we are able to calculate the mean DBH for each species separately.
so, the function is applied to each subset of dbh.cm defined by the corresponding species & calculates the average diameter for trees of each species.
We want to find the two species with the largest third quartile diameters
This means we are going to use the tapply() function again
So, in return it will calculate all species’ third quartile diameters then sort by the largest to smallest
tapply(), we compute the median ratio for each species.# Calculate the median slenderness for each species
slenderness <- tapply(ufc$height.m / ufc$dbh.cm, ufc$species, median)
# Sort the ratios for output
print(sort(slenderness)) WC DF GF WL
0.6435644 0.6487342 0.7136517 0.9244338
# Display the largest and smallest median ratios
cat('largest: ', names(sort(slenderness, decreasing = TRUE))[1:2], '\n')largest: WL GF
smallest: WC DF
The names() function is used for part of this question to extract the species name
# Sorts the species by their average volume in descending order and selects the one with the highest average fattest" species
fattest_species <- names(sort(fatness_per_species, decreasing= TRUE))[1]
# Filters the dataset to include only trees belonging to the fattest species (using subset())
fattest_species_list <- subset(ufc, subset= species %in% c(fattest_species), select=c(plot, tree, species, dbh.cm, height.m,volume.m3))
# Sorts the trees of the fattest species by height in descending order and selects the tallest one
tallest_tree_in_fattest_species <- fattest_species_list[order(fattest_species_list$height.m, decreasing=TRUE), ][1, ]
print(tallest_tree_in_fattest_species) plot tree species dbh.cm height.m volume.m3
141 55 2 DF 99.8 42 16.42745
Do the same for three close friends, & write a program to check if there are any shared birthdays or names in the four lists.
list(...) command, with comma-separated argumentsmy_info <- list(
name = "Bronwyn Kiernan",
gender = "Female",
age = 20,
favorite_movies = c("Pitch Perfect", "Mamma Mia", "Maze Runner"),
support_UN = TRUE,
family_birthdays = list(
Bronwyn = c(day = 7, month = 9), # My birthday
Jennifer = c(day = 20, month = 8), # Immediate family member 1 (mother)
Erin = c(day = 27, month = 9) # Immediate family member 2 (sister)
)
)check_shared_birthdays <- function(my_info, friend_1, friend_2, friend_3) {
all_families <- list(my_info, friend_1, friend_2, friend_3)
birthdays <- list()
for (family in all_families) {
birthdays <- c(birthdays, family$family_birthdays)
}
class(birthdays)
print(birthdays)
shared_birthdays <- list()
for (i in 1:(length(birthdays)-1)) {
for (j in (i + 1):length(birthdays)) {
print(birthdays[[i]])
print(birthdays[[j]])
if (birthdays[[i]]["day"] == birthdays[[j]]["day"] &&
birthdays[[i]]["month"] == birthdays[[j]]["month"]) {
shared_birthdays <- append(shared_birthdays, list(birthdays[[i]]))
}
}
}
return(shared_birthdays)
}$Bronwyn
day month
7 9
$Jennifer
day month
20 8
$Erin
day month
27 9
$Hayden
day month
29 9
$PJ
day month
23 12
$Wendy
day month
16 4
$Ruby
day month
24 10
$Posey
day month
23 4
$Sunny
day month
18 5
$Cassie
day month
25 7
$Tori
day month
20 6
$Denise
day month
20 8
day month
7 9
day month
20 8
day month
7 9
day month
27 9
day month
7 9
day month
29 9
day month
7 9
day month
23 12
day month
7 9
day month
16 4
day month
7 9
day month
24 10
day month
7 9
day month
23 4
day month
7 9
day month
18 5
day month
7 9
day month
25 7
day month
7 9
day month
20 6
day month
7 9
day month
20 8
day month
20 8
day month
27 9
day month
20 8
day month
29 9
day month
20 8
day month
23 12
day month
20 8
day month
16 4
day month
20 8
day month
24 10
day month
20 8
day month
23 4
day month
20 8
day month
18 5
day month
20 8
day month
25 7
day month
20 8
day month
20 6
day month
20 8
day month
20 8
day month
27 9
day month
29 9
day month
27 9
day month
23 12
day month
27 9
day month
16 4
day month
27 9
day month
24 10
day month
27 9
day month
23 4
day month
27 9
day month
18 5
day month
27 9
day month
25 7
day month
27 9
day month
20 6
day month
27 9
day month
20 8
day month
29 9
day month
23 12
day month
29 9
day month
16 4
day month
29 9
day month
24 10
day month
29 9
day month
23 4
day month
29 9
day month
18 5
day month
29 9
day month
25 7
day month
29 9
day month
20 6
day month
29 9
day month
20 8
day month
23 12
day month
16 4
day month
23 12
day month
24 10
day month
23 12
day month
23 4
day month
23 12
day month
18 5
day month
23 12
day month
25 7
day month
23 12
day month
20 6
day month
23 12
day month
20 8
day month
16 4
day month
24 10
day month
16 4
day month
23 4
day month
16 4
day month
18 5
day month
16 4
day month
25 7
day month
16 4
day month
20 6
day month
16 4
day month
20 8
day month
24 10
day month
23 4
day month
24 10
day month
18 5
day month
24 10
day month
25 7
day month
24 10
day month
20 6
day month
24 10
day month
20 8
day month
23 4
day month
18 5
day month
23 4
day month
25 7
day month
23 4
day month
20 6
day month
23 4
day month
20 8
day month
18 5
day month
25 7
day month
18 5
day month
20 6
day month
18 5
day month
20 8
day month
25 7
day month
20 6
day month
25 7
day month
20 8
day month
20 6
day month
20 8
Here we can see all 5 separate plots
par(mfrow = c(2, 3)) for (habitat in habitat_types) { habitat_data <- subset(treegrowth, habitat == treegrowth\(habitat) plot(habitat_data\)age, habitat_data\(height.ft, main = paste("habitat:", habitat), xlab = "age", ylab = "height", type = "n") points(habitat_data\)age, habitat_data$height.ft, pch = 19) }
depth_10_triangle <- pascals_triangle(10)
depth_11_triangle <- extend_pascals_triangle(depth_10_triangle)
# The 11th row of Pascal's triangle
eleventh_row <- depth_11_triangle[[11]]
print(eleventh_row) [1] 1 10 45 120 210 252 210 120 45 10 1
[[1]]
[1] 1
[[2]]
[1] 1 1
[[3]]
[1] 1 2 1
[[4]]
[1] 1 3 3 1
[[5]]
[1] 1 4 6 4 1
[[6]]
[1] 1 5 10 10 5 1
[[7]]
[1] 1 6 15 20 15 6 1
[[8]]
[1] 1 7 21 35 35 21 7 1
[[9]]
[1] 1 8 28 56 70 56 28 8 1
[[10]]
[1] 1 9 36 84 126 126 84 36 9 1
[[11]]
[1] 1 10 45 120 210 252 210 120 45 10 1
read_racing_data <- function() {
raw_data <- readLines("/Users/bronwynkiernan/math 329/racing.txt")
# Initialize variables
races <- list()
current_race <- NULL
current_horses <- list()
for (line in raw_data) {
# Split the line by tab or spaces
tokens <- as.numeric(unlist(strsplit(line, "\\s+")))
tokens <- tokens[!is.na(tokens)]
race_num <- tokens[1]
win_loss <- tokens[2] == 1
off_time <- tokens[3]
# Extract the times and prices from remaining tokens in pairs
times_prices <- tokens[-c(1:3)]
times <- times_prices[seq(2, length(times_prices), by = 2)]
prices <- times_prices[seq(1, length(times_prices), by = 2)]
# Create a horse entry
horse <- list(win = win_loss, times = times, prices = prices)
# Check if we are still in the same race or a new race has started
if (is.null(current_race)) {
current_race <- race_num
current_horses <- list(horse)
} else if (race_num == current_race) {
current_horses <- append(current_horses, list(horse))
} else {
races[[current_race]] <- current_horses
current_race <- race_num
current_horses <- list(horse)
}
}
# Add the last race
if (!is.null(current_race)) {
races[[current_race]] <- current_horses
}
return(races)
}# Function to plot a single race
plot_race <- function(race) {
# Initialize plot
plot(NULL, xlim = range(unlist(lapply(race, function(horse) horse$times), use.names = FALSE), na.rm = TRUE),
ylim = range(log(unlist(lapply(race, function(horse) horse$prices), use.names = FALSE)), na.rm = TRUE),
xlab = "Time", ylab = "Log(Price)", main = "Log Price vs Time")
# Set colors
colors <- rep("black", length(race))
for (i in 1:length(race)) {
if (race[[i]]$win) {
colors[i] <- "red" # Highlight the winning horse in red
}
}
# Plot each horse
for (i in 1:length(race)) {
lines(race[[i]]$times, log(race[[i]]$prices), col = colors[i], lwd = ifelse(race[[i]]$win, 2, 1))
}
legend("topright", legend = c("Winning Horse", "Losing Horse"), col = c("red", "black"), lty = 1, lwd = c(2, 1))
}change <- function(x, y.vec = c()) {
# finds possible ways of making up amount x using Australian coins
# x is given in cents and we assume it is divisible by 5
# y.vec are coins already used (so total amount is x + sum(y.vec))
if (x == 0) {
cat(y.vec, "\n")
} else {
coins <- c(200, 100, 50, 20, 10, 5)
new.x <- x - coins
new.x <- new.x[new.x >= 0]
for (z in new.x) {
y.tmp <- c(y.vec, x - z)
if (identical(y.tmp, sort(y.tmp))) {
change(z, y.tmp)
}
}
return(invisible(NULL))
}
}premierships <- list(
Adelaide = c(1997, 1998),
Carlton = c(1906, 1907, 1908, 1914, 1915, 1938, 1945, 1947,1968, 1970, 1972, 1979, 1981, 1982, 1987, 1995),
Collingwood = c(1902, 1903, 1910, 1917, 1919, 1927, 1928, 1929, 1930, 1935, 1936, 1953, 1958, 1990, 2010),
Essendon = c(1897, 1901, 1911, 1912, 1923, 1924, 1942, 1946, 1949, 1950, 1962, 1965, 1984, 1985, 1993, 2000),
Fitzroy_Brisbane = c(1898, 1899, 1904, 1905, 1913, 1916, 1922, 1944, 2001, 2002, 2003),
Footscray_W.B. = c(1954),
Fremantle = c(),
Geelong = c(1925, 1931, 1937, 1951, 1952, 1963, 2007, 2009, 2011),
Hawthorn = c(1961, 1971, 1976, 1978, 1983, 1986, 1988, 1989, 1991, 2008, 2013),
Melbourne = c(1900, 1926, 1939, 1940, 1941, 1948, 1955, 1956, 1957, 1959, 1960, 1964),
N.Melb_Kangaroos = c(1975, 1977, 1996, 1999),
PortAdelaide = c(2004),
Richmond = c(1920, 1921, 1932, 1934, 1943, 1967, 1969, 1973, 1974, 1980),
StKilda = c(1966),
S.Melb_Sydney = c(1909, 1918, 1933, 2005, 2012),
WestCoast = c(1992, 1994, 2006)
)
year <- 1967
for (i in 1:length(premierships)) {
if (year %in% premierships[[i]]) {
winner <- names(premierships)[i]
}
}
winner[1] "Richmond"
team_winner <- function(year, premierships) {
if (!is.numeric(year) || length(year) != 1 || year < 1870 || year > 2023) {
stop("provide a valid year between up to 2023.")
}
winner <- NULL
for (i in 1:length(premierships)) {
if (year %in% premierships[[i]]) {
winner <- names(premierships)[i]
break
}
}
if (!is.null(winner)) {
return(paste("The team that won in", year, "is", winner))
} else {
return(paste("None found", year))
}
}
team_winner(1967, premierships)[1] "The team that won in 1967 is Richmond"