# Load necessary libraries
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(patchwork)
## Warning: package 'patchwork' was built under R version 4.4.2
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.4.2
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(knitr)

knitr::opts_chunk$set(echo = TRUE)

Exercise 7: Ages of the Vice Presidents at the Time of Their Death

Given Data

The ages of U.S. Vice Presidents at the time of their death are listed as follows:

90, 83, 80, 73, 70, 51, 68, 79, 70, 71, 72, 74, 67, 54, 81, 66, 62, 63, 68, 57, 66, 96, 78, 55, 60, 66, 57, 71, 66, 77, 93, 70”

(a) Frequency Distribution with 6 Classes

To construct the frequency distribution, we first identify the minimum and maximum values in the data: - Minimum age: 51 - Maximum age: 96

Using this range, we divide the data into 6 classes. The class width can be calculated as follows:

\[ \text{Class width} = \frac{\text{Maximum age} - \text{Minimum age}}{6} = \frac{96 - 51}{6} = 7.5 \]

Since the class width should be a whole number, we round it to 8. Therefore, the classes will be:

  • 51 - 58
  • 59 - 66
  • 67 - 74
  • 75 - 82
  • 83 - 90
  • 91 - 98

Now, we count how many ages fall within each class:

  • 51 - 58: 4 values (51, 54, 55, 57)
  • 59 - 66: 9 values (60, 62, 63, 66, 66, 66, 62, 63, 68)
  • 67 - 74: 5 values (67, 68, 70, 71, 74)
  • 75 - 82: 5 values (79, 80, 81, 78, 80)
  • 83 - 90: 5 values (83, 87, 88, 88, 90)
  • 91 - 98: 3 values (93, 96, 93)

Thus, the frequency distribution is:

Age Range Frequency
51 - 58 4
59 - 66 9
67 - 74 5
75 - 82 5
83 - 90 5
91 - 98 3
# Create frequency distribution table in R
age_data <- c(90, 83, 80, 73, 70, 51, 68, 79, 70, 71, 72, 74, 67, 54, 81, 66, 62, 63, 68, 57, 66, 96, 78, 55, 60, 66, 57, 71, 66, 77, 93, 70)

# Define class intervals and break points
breaks <- seq(50, 100, by = 8)
age_hist <- hist(age_data, breaks = breaks, plot = FALSE)

# Calculate relative frequency
relative_freq <- age_hist$counts / length(age_data)

# Cumulative frequency
cumulative_freq <- cumsum(age_hist$counts)

# Ogive plot
plot(age_hist$mids, cumulative_freq, type = "o", col = "blue", pch = 16, xlab = "Age", ylab = "Cumulative Frequency", main = "Ogive of Ages")

Exercise 8: Activities While Driving

activities <- c("Drink beverage", "Talk on cell phone", "Eat a meal", "Experience road rage", "Smoke")
percentages <- c(80, 73, 41, 23, 21)
activity_data <- data.frame(Activity = activities, Percentage = percentages)

ggplot(activity_data, aes(x = reorder(Activity, Percentage), y = Percentage)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  coord_flip() +
  ggtitle("Activities While Driving") +
  theme_minimal()

Exercise 9: Calories of Nuts

nuts <- c("Peanuts", "Almonds", "Macadamia", "Pecans", "Cashews")
calories <- c(160, 170, 200, 190, 160)
nuts_data <- data.frame(Nut = nuts, Calories = calories)

ggplot(nuts_data, aes(x = reorder(Nut, Calories), y = Calories)) +
  geom_bar(stat = "identity", fill = "orange") +
  ggtitle("Calories in Different Nuts") +
  xlab("Nut Type") + ylab("Calories") +
  theme_minimal()

Exercise 10: Space Launches Time Series

years <- c("60-69", "70-79", "80-89", "90-99", "100-109")
launches <- c(614, 247, 199, 300, 206)
launch_data <- data.frame(Year = years, Launches = launches)

ggplot(launch_data, aes(x = Year, y = Launches, group = 1)) +
  geom_line(color = "blue") + geom_point(color = "red") +
  ggtitle("U.S. Space Launches Time Series") +
  theme_minimal()

Exercise 11: High School Dropout Rate

years <- c(2003, 2004, 2005, 2006, 2007, 2008, 2009)
dropout_rate <- c(9.9, 10.3, 9.4, 9.3, 8.7, 8.0, 8.1)
dropout_data <- data.frame(Year = years, Rate = dropout_rate)

ggplot(dropout_data, aes(x = Year, y = Rate)) +
  geom_line(color = "blue") + geom_point(color = "red") +
  ggtitle("High School Dropout Rate Time Series") +
  theme_minimal()

Exercise 12: Spending of College Freshmen

items <- c("Electronics", "Dorm items", "Clothing", "Shoes")
spending <- c(728, 344, 141, 72)
spending_data <- data.frame(Item = items, Spending = spending)

ggplot(spending_data, aes(x = "", y = Spending, fill = Item)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0) +
  ggtitle("College Freshmen Spending") +
  theme_minimal()

Exercise 13: Career Changes

answers <- c("Yes", "No", "Undecided")
values <- c(660, 260, 80)
career_data <- data.frame(Answer = answers, Count = values)

ggplot(career_data, aes(x = "", y = Count, fill = Answer)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y") +
  ggtitle("Career Changes Survey") +
  theme_minimal()

Exercise 14: Peyton Manning’s Touchdowns (Dotplot)

touchdowns <- c(26, 33, 27, 49, 31, 27, 33, 26, 26, 29, 28, 31, 33)
touchdown_data <- data.frame(Touchdowns = touchdowns)

ggplot(touchdown_data, aes(x = Touchdowns)) +
  geom_dotplot(binwidth = 1, dotsize = 0.5, fill = "blue") +
  ggtitle("Peyton Manning's Touchdowns") +
  theme_minimal()

Exercise 15: Songs on CDs (Dotplot)

songs <- c(10, 14, 18, 11, 11, 15, 16, 10, 10, 17, 10, 15, 22, 9, 14, 12, 18, 12, 12, 15, 21, 22, 20, 15, 10, 19, 20, 21, 17, 9, 13, 15, 11, 12, 12, 9, 14, 20, 12, 10)
song_data <- data.frame(Songs = songs)

ggplot(song_data, aes(x = Songs)) +
  geom_dotplot(binwidth = 1, dotsize = 0.5, fill = "purple") +
  ggtitle("Number of Songs on CDs") +
  theme_minimal()

Exercise 16: Traffic in X-City

time <- c("1-2pm", "2-3pm", "3-4pm", "4-5pm")
cars <- c(37, 44, 23, 29)
buses <- c(45, 34, 39, 41)
bikes <- c(42, 26, 27, 48)
traffic_data <- data.frame(Time = rep(time, 3), Vehicles = c(cars, buses, bikes), Type = rep(c("Cars", "Buses", "Bikes"), each = 4))

ggplot(traffic_data, aes(x = Time, y = Vehicles, color = Type, group = Type)) +
  geom_line() + geom_point() +
  ggtitle("Traffic in X-City") +
  theme_minimal()

Exercise 17: Agricultural Production

year <- c(2010, 2011, 2012, 2013)
food_grains <- c(100, 120, 130, 150)
vegetables <- c(30, 40, 45, 52)
others <- c(10, 15, 25, 25)
agri_data <- data.frame(Year = rep(year, 3), Type = rep(c("Food Grains", "Vegetables", "Others"), each = 4), Quantity = c(food_grains, vegetables, others))

ggplot(agri_data, aes(x = Year, y = Quantity, fill = Type)) +
  geom_bar(stat = "identity", position = "dodge") +
  ggtitle("Agricultural Production (2010-2013)") +
  theme_minimal()