#Graphing and Analyzing yearly Natural Gas Emission and Income in Different States
library(pacman) #loads pacman
pacman::p_load(rio,ggplot2,gridExtra,dplyr,rlang,reshape2,tidyverse) #loads the contributed packages
ngdata <- read.csv("NaturalGas.csv")
View(ngdata)
#creates a sub dataset for New York
NYdata <- ngdata[ngdata$state=="NY",]
#basic info
head(NYdata)
## rownames state statecode year consumption price eprice oprice lprice heating
## 1 1 NY 35 1967 313656 1.42 2.98 7.40 1.47 6262
## 2 2 NY 35 1968 319282 1.38 2.91 7.77 1.42 6125
## 3 3 NY 35 1969 331326 1.37 2.84 7.96 1.38 6040
## 4 4 NY 35 1970 346533 1.40 2.87 8.33 1.37 6085
## 5 5 NY 35 1971 352085 1.50 3.07 8.80 1.40 5907
## 6 6 NY 35 1972 363412 1.62 3.26 8.85 1.50 6248
## income
## 1 10903.75
## 2 11370.02
## 3 11578.68
## 4 11586.77
## 5 11657.42
## 6 11860.80
summary(NYdata$consumption)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 313027 326494 336712 336217 342819 364713
summary(NYdata$income)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10904 11674 12002 12731 13374 16425
#Plots New Yorks Natural Gas consumption from 1967 to 1989
NY<-ggplot()
NY<-NY+geom_line(data=NYdata, aes(x=year, y =consumption))
NY<-NY+labs(title="New York Yearly Natural Gas Consumption from 1967 to 1989")
NY<- NY+theme(legend.position="none")
plot(NY)

#Plots New Yorks Income consumption from 1967 to 1989
NY2<-ggplot()
NY2<-NY2+geom_line(data=NYdata, aes(x=year, y =income))
NY2<-NY2+labs(title="New York Yearly Income from 1967 to 1989")
NY2<- NY2+theme(legend.position="none")
plot(NY2)

grid.arrange(NY,NY2,ncol=2)

#creates a sub dataset for Florida
FLdata <- ngdata[ngdata$state=="FL",]
#basic info
head(FLdata)
## rownames state statecode year consumption price eprice oprice lprice heating
## 24 24 FL 10 1967 9430 2.26 2.16 6.86 2.21 569
## 25 25 FL 10 1968 11318 2.59 2.09 6.80 2.26 954
## 26 26 FL 10 1969 11636 2.48 2.03 7.04 2.59 937
## 27 27 FL 10 1970 14702 2.51 1.98 7.28 2.48 871
## 28 28 FL 10 1971 13242 2.54 2.02 8.04 2.51 596
## 29 29 FL 10 1972 12837 2.66 2.12 8.10 2.54 507
## income
## 24 8143.32
## 25 8651.26
## 26 9117.02
## 27 9410.66
## 28 9548.78
## 29 9956.45
summary(FLdata$consumption)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9430 13388 14793 14737 15834 21619
summary(FLdata$income)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8143 9888 10739 10890 11833 13755
#Plots Floridas Natural Gas consumption from 1967 to 1989
FL<-ggplot()
FL<-FL+geom_line(data=FLdata, aes(x=year, y =consumption))
FL<-FL+labs(title="Florida Yearly Natural Gas Consumption from 1967 to 1989")
FL<- FL+theme(legend.position="none")
plot(FL)

#Plots Floridas Income consumption from 1967 to 1989
FL2<-ggplot()
FL2<-FL2+geom_line(data=FLdata, aes(x=year, y =income))
FL2<-FL2+labs(title="Floridas Yearly Income from 1967 to 1989")
FL2<- FL2+theme(legend.position="none")
plot(FL2)

grid.arrange(FL,FL2,ncol=2)

#creates a sub dataset for Michigan
MIdata <- ngdata[ngdata$state=="MI",]
#basic info
head(MIdata)
## rownames state statecode year consumption price eprice oprice lprice heating
## 47 47 MI 23 1967 302472 0.98 2.32 6.58 0.98 7004
## 48 48 MI 23 1968 315694 0.98 2.27 6.76 0.98 6708
## 49 49 MI 23 1969 333264 0.98 2.24 6.99 0.98 6911
## 50 50 MI 23 1970 340033 1.01 2.27 7.16 0.98 6882
## 51 51 MI 23 1971 343773 1.06 2.32 7.57 1.01 6652
## 52 52 MI 23 1972 355266 1.10 2.42 7.63 1.06 7273
## income
## 47 9600.39
## 48 10034.59
## 49 10266.57
## 50 9862.30
## 51 10082.84
## 52 10577.12
summary(MIdata$consumption)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 299199 331752 340732 340549 351889 387279
summary(MIdata$income)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9600 10509 11365 11427 12058 13597
#Plots Michigans Natural Gas consumption from 1967 to 1989
MI<-ggplot()
MI<-MI+geom_line(data=MIdata, aes(x=year, y =consumption))
MI<-MI+labs(title="Michigan Yearly Natural Gas Consumption from 1967 to 1989")
MI<- MI+theme(legend.position="none")
plot(MI)

#Plots Michigans Income consumption from 1967 to 1989
MI2<-ggplot()
MI2<-MI2+geom_line(data=MIdata, aes(x=year, y =income))
MI2<-MI2+labs(title="Michigan Yearly income from 1967 to 1989")
MI2<- MI2+theme(legend.position="none")
plot(MI2)

grid.arrange(MI,MI2,ncol=2)

#creates sub dataset for Texas
TXdata <- ngdata[ngdata$state=="TX",]
#basic info
head(TXdata)
## rownames state statecode year consumption price eprice oprice lprice heating
## 70 70 TX 44 1967 201407 0.87 2.19 5.01 0.85 1859
## 71 71 TX 44 1968 211763 0.87 2.15 5.24 0.87 2258
## 72 72 TX 44 1969 220728 0.90 2.09 5.43 0.87 2032
## 73 73 TX 44 1970 232189 0.92 2.05 5.71 0.90 2119
## 74 74 TX 44 1971 237387 0.99 2.04 6.17 0.92 1689
## 75 75 TX 44 1972 240662 1.01 2.07 6.23 0.99 2021
## income
## 70 7930.66
## 71 8240.34
## 72 8517.22
## 73 8660.20
## 74 8655.35
## 75 8940.77
summary(TXdata$consumption)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 194602 212220 224800 230215 236512 315857
summary(TXdata$income)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7931 9142 10823 10411 11702 12239
#Plots Texas's Natural Gas consumption from 1967 to 1989
TX<-ggplot()
TX<-TX+geom_line(data=TXdata, aes(x=year, y =consumption))
TX<-TX+labs(title="Texas Yearly Natural Gas Consumption from 1967 to 1989")
TX<- TX+theme(legend.position="none")
plot(TX)

#Plots Texas's Income from 1967 to 1989
TX2<-ggplot()
TX2<-TX2+geom_line(data=TXdata, aes(x=year, y =income))
TX2<-TX2+labs(title="Texas Yearly Income from 1967 to 1989")
TX2<- TX2+theme(legend.position="none")
plot(TX2)

grid.arrange(TX,TX2,ncol=2)

#creates sub dataset for Utah
UTdata <- ngdata[ngdata$state=="UT",]
#basic info
head(UTdata)
## rownames state statecode year consumption price eprice oprice lprice heating
## 93 93 UT 45 1967 38935 0.68 2.25 6.74 0.69 6530
## 94 94 UT 45 1968 40779 0.74 2.23 6.95 0.68 6790
## 95 95 UT 45 1969 43948 0.83 2.20 7.20 0.74 6510
## 96 96 UT 45 1970 44637 0.85 2.17 7.46 0.83 6658
## 97 97 UT 45 1971 49849 0.87 2.23 7.92 0.85 6969
## 98 98 UT 45 1972 48855 0.90 2.24 7.92 0.87 6505
## income
## 93 7465.34
## 94 7560.08
## 95 7661.95
## 96 7865.50
## 97 8021.98
## 98 8299.55
summary(UTdata$consumption)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 35558 44293 48855 49967 56289 66417
summary(UTdata$income)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7465 8356 9001 8846 9348 10194
#Plots Utahs Natural Gas consumption from 1967 to 1989
UT<-ggplot()
UT<-UT+geom_line(data=UTdata, aes(x=year, y =consumption))
UT<-UT+labs(title="Utah Yearly Natural Gas Consumption from 1967 to 1989")
UT<- UT+theme(legend.position="none")
plot(UT)

#Plots Utahs Income from 1967 to 1989
UT2<-ggplot()
UT2<-UT2+geom_line(data=UTdata, aes(x=year, y =income))
UT2<-UT2+labs(title="Utah Yearly Income from 1967 to 1989")
UT2<- UT2+theme(legend.position="none")
plot(UT2)

grid.arrange(UT,UT2,ncol=2)

#creates sub dataset for California
CAdata <- ngdata[ngdata$state=="CA",]
#basic info
head(CAdata)
## rownames state statecode year consumption price eprice oprice lprice
## 116 116 CA 5 1967 522122 0.93 2.14 6.69 0.93
## 117 117 CA 5 1968 517636 0.93 2.11 6.89 0.93
## 118 118 CA 5 1969 562127 0.93 2.07 7.14 0.93
## 119 119 CA 5 1970 552544 0.99 2.12 7.40 0.93
## 120 120 CA 5 1971 630998 1.03 2.19 7.81 0.99
## 121 121 CA 5 1972 637289 1.08 2.34 7.86 1.03
## heating income
## 116 2805 10749.59
## 117 2591 11088.94
## 118 2760 11354.00
## 119 2621 11328.17
## 120 3094 11216.07
## 121 2714 11527.43
summary(CAdata$consumption)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 464307 508875 537194 545725 585371 637289
summary(CAdata$income)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10750 11623 12927 12854 13774 15533
#Plots Californias Natural Gas consumption from 1967 to 1989
CA<-ggplot()
CA<-CA+geom_line(data=CAdata, aes(x=year, y =consumption))
CA<-CA+labs(title="California Yearly Natural Gas Consumption from 1967 to 1989")
CA<- CA+theme(legend.position="none")
plot(CA)

#Plots Californias Income from 1967 to 1989
CA2<-ggplot()
CA2<-CA2+geom_line(data=CAdata, aes(x=year, y =income))
CA2<-CA2+labs(title="California Yearly Income from 1967 to 1989")
CA2<- CA2+theme(legend.position="none")
plot(CA2)

grid.arrange(CA,CA2,ncol=2)

#sets all to have same scale
NY<- NY+ ylim(0,650000)
FL<- FL+ ylim(0,650000)
MI<- MI+ ylim(0,650000)
TX<- TX+ ylim(0,650000)
UT<- UT+ ylim(0,650000)
CA<- CA+ ylim(0,650000)
NY2<- NY2+ ylim(0,17000)
FL2<- FL2+ ylim(0,17000)
MI2<- MI2+ ylim(0,17000)
TX2<- TX2+ ylim(0,17000)
UT2<- UT2+ ylim(0,17000)
CA2<- CA2+ ylim(0,17000)
#puts them all side by side
grid.arrange(NY,FL,MI,TX,UT,CA,ncol=6)

grid.arrange(NY2,FL2,MI2,TX2,UT2,CA2,ncol=6)

#graphs all state data on one plot
ggplot()+geom_line(NYdata, mapping = aes(x=year, y =consumption),color="red")+geom_point(NYdata, mapping = aes(x=year, y = consumption), color="red")+
geom_line(FLdata, mapping = aes(x=year, y =consumption),color="blue")+geom_point(FLdata, mapping = aes(x=year, y = consumption), color="blue")+
geom_line(MIdata, mapping = aes(x=year, y =consumption),color="purple")+geom_point(MIdata, mapping = aes(x=year, y = consumption), color="purple")+
geom_line(TXdata, mapping = aes(x=year, y =consumption),color="yellow")+geom_point(TXdata, mapping = aes(x=year, y = consumption), color="yellow")+
geom_line(UTdata, mapping = aes(x=year, y =consumption),color="green")+geom_point(UTdata, mapping = aes(x=year, y = consumption), color="green")+
geom_line(CAdata, mapping = aes(x=year, y =consumption),color="orange")+geom_point(CAdata, mapping = aes(x=year, y = consumption), color="orange")+
labs(x="year", y="Natural Gas Consumption (in Hundred Thousands)", color="legend") + scale_color_manual(values=c("red","blue","purple","yellow","green","orange"),label=c("NY","FL","MI","TX","UT","CA"))
## Ignoring unknown labels:
## • colour : "legend"

#graphs all state data on one plot but with legend (took so long!!!)
hello<-tibble(Years = 1967:1989, NY = NYdata$consumption, FL=FLdata$consumption,MI=MIdata$consumption,TX=TXdata$consumption,UT=UTdata$consumption,CA=CAdata$consumption)
hello%>%
ggplot(aes(x=Years,y=value,color="State"))+
geom_line(aes(x=Years,y=NY,color="blue"))+geom_line(aes(x=Years,y=FL,color="green"))+geom_line(aes(x=Years,y=MI,color="orange"))+geom_line(aes(x=Years,y=TX,color="purple"))+geom_line(aes(x=Years,y=UT,color="red"))+geom_line(aes(x=Years,y=CA,color="yellow"))+
labs(title="States and their Yearly Natural Gas Consumption", x="Year", y="Natural Gas Consumption (in$100,000)")+scale_color_manual(values=c("red","blue","purple","yellow","green","orange"),label=c("NY","FL","MI","TX","UT","CA"))

#graphs amount of income each person recieves
hi<-tibble(Years = 1967:1989, NYi = NYdata$income, FLi=FLdata$income,MIi=MIdata$income,TXi=TXdata$income,UTi=UTdata$income,CAi=CAdata$income)
hi%>%
ggplot(aes(x=Years,y=value,color="State"))+
geom_line(aes(x=Years,y=NYi,color="blue"))+geom_line(aes(x=Years,y=FLi,color="green"))+geom_line(aes(x=Years,y=MIi,color="orange"))+geom_line(aes(x=Years,y=TXi,color="purple"))+geom_line(aes(x=Years,y=UTi,color="red"))+geom_line(aes(x=Years,y=CAi,color="yellow"))+
labs(title="States and their Yearly Income", x="Year", y="Income($)")+scale_color_manual(values=c("red","blue","purple","yellow","green","orange"),label=c("NY","FL","MI","TX","UT","CA"))

#lets see the summary again!
summary(NYdata$consumption)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 313027 326494 336712 336217 342819 364713
summary(FLdata$consumption)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9430 13388 14793 14737 15834 21619
summary(MIdata$consumption)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 299199 331752 340732 340549 351889 387279
summary(TXdata$consumption)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 194602 212220 224800 230215 236512 315857
summary(UTdata$consumption)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 35558 44293 48855 49967 56289 66417
summary(CAdata$consumption)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 464307 508875 537194 545725 585371 637289
#consumption boxplot
p<-ggplot(ngdata,aes(x=state,y=consumption, fill=state))
p<-p+geom_boxplot()
p<-p+theme_classic()
p<-p+labs(title = "Compare Natural Gas Consumption in States via a boxplot")
plot(p)

#lets see income summary againnn
summary(NYdata$income)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10904 11674 12002 12731 13374 16425
summary(FLdata$income)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8143 9888 10739 10890 11833 13755
summary(MIdata$income)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9600 10509 11365 11427 12058 13597
summary(TXdata$income)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7931 9142 10823 10411 11702 12239
summary(UTdata$income)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7465 8356 9001 8846 9348 10194
summary(CAdata$income)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10750 11623 12927 12854 13774 15533
#income boxplot
i<-ggplot(ngdata,aes(x=state,y=income, fill=state))
i<-i+geom_boxplot()
i<-i+theme_classic()
i<-i+labs(title = "Compare Income in States via a boxplot")
plot(i)

#now next to eachother :)
grid.arrange(p,i,ncol=2)

#Plots total consumptions of states from dataset
total<-ggplot()
total<-total+geom_col(data=ngdata, aes(x=state, y =consumption, fill=consumption))
total<-total+labs(title="Total Yearly Natural Gas Consumption of States from 1967 to 1989")
total<- total+theme(legend.position="none")
plot(total)

#pie chart of percentage of NG
slices=c(sum(NYdata$consumption),sum(FLdata$consumption),sum(MIdata$consumption),
sum(TXdata$consumption),sum(UTdata$consumption),sum(CAdata$consumption))
names=c("NY","FL","MI","TX","UT","CA")
pct=round(slices/sum(slices)*100) #R can do math!
new_labels=paste(names,"-",pct,"%",sep="")
pie(slices,labels=new_labels,main="Percentage of amount of Natural Gas Consumptions States Emitted throughtout the Years",col=rainbow(6))

cat("\014") #clears console
dev.off() # clears plots
## null device
## 1
#done:)