install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.0
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2) # Load ggplot2 for creating plots
# Load the mtcars dataset (built into R)
data_mtcars <- mtcars
# View the first few rows to understand the data
head(data_mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
# Convert 'am' (transmission type) and 'cyl' (number of cylinders) to factors for categorical plotting
data_mtcars$am <- as.factor(data_mtcars$am)
data_mtcars$cyl <- as.factor(data_mtcars$cyl)
# View the first few rows to understand the data
head(data_mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
# Create a scatter plot of car weight vs. miles per gallon, colored by cylinder count
ggplot(data_mtcars, aes(x = wt, y = mpg, color = cyl)) +
  geom_point() + # Add points to the plot
  labs(title = "Miles Per Gallon vs. Weight ", x = "Weight (1000 lbs)", y = "Miles Per Gallon") # Add plot labels

#Create a line graph of ordered mpg by the row number.
data_mtcars_line <- data_mtcars %>% mutate(index = row_number()) #add index column so we can plot it

ggplot(data_mtcars_line, aes(x = index, y = mpg)) +
  geom_line() + # add a line to the plot
  labs(title = "Miles Per Gallon by Index", x = "Index", y = "Miles Per Gallon") # add plot labels

# Create a horizontal bar chart of the average horsepower grouped by cylinder count
hp_by_cyl <- data_mtcars %>% group_by(cyl) %>% summarize(avg_hp = mean(hp)) # Calculate average horsepower for each cylinder group

ggplot(hp_by_cyl, aes(y = cyl, x = avg_hp)) +
  geom_bar(stat = 'identity') + # Create bars based on the calculated averages
  labs(title = "Average HP by Cylinder Count", y = "Cylinder Count", x = "Average Horsepower") # Add plot labels

#Create a stacked bar chart of average mpg, disp, hp, and wt, grouped by cyl.
bar_data_mtcars <- data_mtcars %>% group_by(cyl) %>% summarize(mpg = mean(mpg), disp = mean(disp), hp = mean(hp), wt = mean(wt)) %>% pivot_longer(cols = c("mpg", "disp", "hp", "wt"), names_to = "Measurement", values_to = "Average") #Calculate average values for each measurement, and pivot the data into a long format.

ggplot(bar_data_mtcars, aes(x = cyl, fill = Measurement, y = Average)) +
  geom_bar(stat = "identity") + #Create bars based on the calculated averages
  labs(title = "Average Measurements by Cylinder Count", x = "Cylinder Count", y = "Average Measurement") #add plot labels