plot(cars)
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
data_mtcars <- mtcars #loads mt cars
head(data_mtcars) #shows first few rows
data_mtcars$am <- as.factor(data_mtcars$am) #converting from doubles to factoes to help with categorical plotting
data_mtcars$cyl <- as.factor(data_mtcars$cyl)
# Create a scatter plot of car weight vs. miles per gallon, colored by cylinder count
ggplot(data_mtcars, aes(x = wt, y = mpg, color = cyl)) +
geom_point() + # Add points to the plot
labs(title = "Weight vs. Miles Per Gallon", x = "Weight (1000 lbs)", y = "Miles Per Gallon") # Add plot labels
#Weight is not the most significant actor for MPG, instead it is the number of the cylinders as can be seen by the clusters
Line Graph
#Create a line graph of ordered mpg by the row number.
data_mtcars_line <- data_mtcars %>% mutate(index = row_number()) #add index column so we can plot it
ggplot(data_mtcars_line, aes(x = index, y = mpg)) +
geom_line() + # add a line to the plot
labs(title = "Miles Per Gallon by Index", x = "Index", y = "Miles Per Gallon") # add plot labels
#This line graph shows the mpg based on the line of the csv file. Row 20 has the highest mpg, and 15-16 had the lowest, by coincidence
Horizontal Bar Chart
# Create a horizontal bar chart of the average horsepower grouped by cylinder count
hp_by_cyl <- data_mtcars %>% group_by(cyl) %>% summarize(avg_hp = mean(hp)) # Calculate average horsepower for each cylinder group
ggplot(hp_by_cyl, aes(y = cyl, x = avg_hp)) +
geom_bar(stat = 'identity') + # Create bars based on the calculated averages
labs(title = "Average HP by Cylinder Count", y = "Cylinder Count", x = "Average Horsepower") # Add plot labels
#This horitzontal bar chart shows that 8 cylinder engines have the highest average HP
Stacked Bar Chart
#Create a stacked bar chart of average mpg, disp, hp, and wt, grouped by cyl.
bar_data_mtcars <- data_mtcars %>% group_by(cyl) %>% summarize(mpg = mean(mpg), disp = mean(disp), hp = mean(hp), wt = mean(wt)) %>% pivot_longer(cols = c("mpg", "disp", "hp", "wt"), names_to = "Measurement", values_to = "Average") #Calculate average values for each measurement, and pivot the data into a long format.
ggplot(bar_data_mtcars, aes(x = cyl, fill = Measurement, y = Average)) +
geom_bar(stat = "identity") + #Create bars based on the calculated averages
labs(title = "Average Measurements by Cylinder Count", x = "Cylinder Count", y = "Average Measurement") #add plot labels
#8 cylinder had the highest average measurments by cylinder, mainly trough dispersion and horsepower.