In Prof. Paulsons course going over R for Data Science: Analysis and Visualization I learned several different useful applications for R Studio, such as: - Data Wrangling to filter, sort, change, and summarize datasets - Data Visualization with various graphs, charts, and histograms - Data Importation using CSV from external sources to analyzing it within R Studio - Replicating data through R Notebooks and RMarkdown - How to tidy datasets, utilizing ‘tidyr’ - How to explore real world datasets using RStudio
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data("mtcars")
# groups data by cylinders, find mpg/weight
mtcars_summary <- mtcars %>%
group_by(cyl) %>%
summarise(
avg_mpg = mean(mpg),
avg_wt = mean(wt)
)
print(mtcars_summary)
## # A tibble: 3 × 3
## cyl avg_mpg avg_wt
## <dbl> <dbl> <dbl>
## 1 4 26.7 2.29
## 2 6 19.7 3.12
## 3 8 15.1 4.00
library(tidyverse)
ev_data <- read_csv("EV_Cleaned.csv")
## Rows: 51 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): State
## dbl (1): Registration Count
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Get the top 3 states by registration count
top3 <- ev_data %>%
arrange(desc(`Registration Count`)) %>%
slice(1:3)
# Calculate total national registrations
total_national <- sum(ev_data$`Registration Count`)
# Calculate 'Other States' total
others_total <- total_national - sum(top3$`Registration Count`)
# Create final data frame for plotting
pie_data <- top3 %>%
select(State, `Registration Count`) %>%
add_row(State = "Other States", `Registration Count` = others_total)
# Create pie chart
pie(pie_data$`Registration Count`,
labels = paste0(pie_data$State, " (", round(100 * pie_data$`Registration Count` / sum(pie_data$`Registration Count`), 1), "%)"),
main = "Top 3 States vs Rest of US - EV Registrations (2023)",
col = rainbow(4))