library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#The dataset I chose is the data collected by the DOE as part of reporting by electric utilities yearly operations. They collect data on its organizational structure, which zip codes it serves and the electricity rates for commerical, residential, and industrial customers.
iou_zipcodes_2022<-read.csv("iou_zipcodes_2022.csv")
non_iou_zipcodes_2022<-read.csv("non_iou_zipcodes_2022.csv")
combinedzips<-rbind(iou_zipcodes_2022,non_iou_zipcodes_2022)
#I had a lot of trouble with R finding the original csv’s so i had to make a new project folder specifically for the datasets to read them properly. After doing that I used rbind to combine the datasets as they were separated datasets due to ownserhip which I wanted to collate into one wo get all utilities in the same directory.
summary(combinedzips)
## zip eiaid utility_name state
## Min. : 501 Min. : 55 Length:80103 Length:80103
## 1st Qu.:21646 1st Qu.: 6411 Class :character Class :character
## Median :44431 Median :13573 Mode :character Mode :character
## Mean :46440 Mean :13799
## 3rd Qu.:68035 3rd Qu.:17066
## Max. :99950 Max. :57483
## service_type ownership comm_rate ind_rate
## Length:80103 Length:80103 Min. :0.00000 Min. :0.00000
## Class :character Class :character 1st Qu.:0.08766 1st Qu.:0.05821
## Mode :character Mode :character Median :0.11465 Median :0.07821
## Mean :0.11367 Mean :0.08179
## 3rd Qu.:0.13861 3rd Qu.:0.09838
## Max. :0.46309 Max. :0.40244
## res_rate
## Min. :0.0000
## 1st Qu.:0.1101
## Median :0.1327
## Mean :0.1331
## 3rd Qu.:0.1560
## Max. :0.5331
hist(combinedzips$res_rate)
#this shows the histogram of residential rates for customers for all
utilities in the country.
ggplot(combinedzips, aes(x=state, y=res_rate)) + geom_boxplot(fill='green')
#this is the box plot for residential rates in the country separated by
state, I will need to learn how to separate the states better in the
graph, but it is relatively self-explanatory.
cor(combinedzips$ind_rate,combinedzips$res_rate)
## [1] 0.7421116
cor(combinedzips$comm_rate, combinedzips$res_rate)
## [1] 0.8889202
#These two correlations compare the residential rate between the industrial and commercial rates which shows a discrepancy between how closely they follow each other.
#i would like to look into differences between states and ownership types in the future but I will need more practice with R beforehand.