#Graphing and Analyzing yearly Natural Gas Emission and Income in Different States

library(pacman) #loads pacman

pacman::p_load(rio,ggplot2,gridExtra,dplyr,rlang,reshape2,tidyverse) #loads the contributed packages

ngdata <- read.csv("NaturalGas.csv")
View(ngdata)


#creates a sub dataset for New York
NYdata <- ngdata[ngdata$state=="NY",]

#basic info
head(NYdata)
##   rownames state statecode year consumption price eprice oprice lprice heating
## 1        1    NY        35 1967      313656  1.42   2.98   7.40   1.47    6262
## 2        2    NY        35 1968      319282  1.38   2.91   7.77   1.42    6125
## 3        3    NY        35 1969      331326  1.37   2.84   7.96   1.38    6040
## 4        4    NY        35 1970      346533  1.40   2.87   8.33   1.37    6085
## 5        5    NY        35 1971      352085  1.50   3.07   8.80   1.40    5907
## 6        6    NY        35 1972      363412  1.62   3.26   8.85   1.50    6248
##     income
## 1 10903.75
## 2 11370.02
## 3 11578.68
## 4 11586.77
## 5 11657.42
## 6 11860.80
summary(NYdata$consumption)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  313027  326494  336712  336217  342819  364713
summary(NYdata$income)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10904   11674   12002   12731   13374   16425
#Plots New Yorks Natural Gas consumption from 1967 to 1989
NY<-ggplot()
NY<-NY+geom_line(data=NYdata, aes(x=year, y =consumption))
NY<-NY+labs(title="New York Yearly Natural Gas Consumption from 1967 to 1989")
NY<- NY+theme(legend.position="none")
plot(NY)

#Plots New Yorks Income consumption from 1967 to 1989
NY2<-ggplot()
NY2<-NY2+geom_line(data=NYdata, aes(x=year, y =income))
NY2<-NY2+labs(title="New York Yearly Income from 1967 to 1989")
NY2<- NY2+theme(legend.position="none")
plot(NY2)

grid.arrange(NY,NY2,ncol=2) 

#creates a sub dataset for Florida
FLdata <- ngdata[ngdata$state=="FL",]

#basic info
head(FLdata)
##    rownames state statecode year consumption price eprice oprice lprice heating
## 24       24    FL        10 1967        9430  2.26   2.16   6.86   2.21     569
## 25       25    FL        10 1968       11318  2.59   2.09   6.80   2.26     954
## 26       26    FL        10 1969       11636  2.48   2.03   7.04   2.59     937
## 27       27    FL        10 1970       14702  2.51   1.98   7.28   2.48     871
## 28       28    FL        10 1971       13242  2.54   2.02   8.04   2.51     596
## 29       29    FL        10 1972       12837  2.66   2.12   8.10   2.54     507
##     income
## 24 8143.32
## 25 8651.26
## 26 9117.02
## 27 9410.66
## 28 9548.78
## 29 9956.45
summary(FLdata$consumption)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    9430   13388   14793   14737   15834   21619
summary(FLdata$income)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    8143    9888   10739   10890   11833   13755
#Plots Floridas Natural Gas consumption from 1967 to 1989
FL<-ggplot()
FL<-FL+geom_line(data=FLdata, aes(x=year, y =consumption))
FL<-FL+labs(title="Florida Yearly Natural Gas Consumption from 1967 to 1989")
FL<- FL+theme(legend.position="none")
plot(FL)

#Plots Floridas Income consumption from 1967 to 1989
FL2<-ggplot()
FL2<-FL2+geom_line(data=FLdata, aes(x=year, y =income))
FL2<-FL2+labs(title="Floridas Yearly Income from 1967 to 1989")
FL2<- FL2+theme(legend.position="none")
plot(FL2)

grid.arrange(FL,FL2,ncol=2) 

#creates a sub dataset for Michigan
MIdata <- ngdata[ngdata$state=="MI",]

#basic info
head(MIdata)
##    rownames state statecode year consumption price eprice oprice lprice heating
## 47       47    MI        23 1967      302472  0.98   2.32   6.58   0.98    7004
## 48       48    MI        23 1968      315694  0.98   2.27   6.76   0.98    6708
## 49       49    MI        23 1969      333264  0.98   2.24   6.99   0.98    6911
## 50       50    MI        23 1970      340033  1.01   2.27   7.16   0.98    6882
## 51       51    MI        23 1971      343773  1.06   2.32   7.57   1.01    6652
## 52       52    MI        23 1972      355266  1.10   2.42   7.63   1.06    7273
##      income
## 47  9600.39
## 48 10034.59
## 49 10266.57
## 50  9862.30
## 51 10082.84
## 52 10577.12
summary(MIdata$consumption)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  299199  331752  340732  340549  351889  387279
summary(MIdata$income)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    9600   10509   11365   11427   12058   13597
#Plots Michigans Natural Gas consumption from 1967 to 1989
MI<-ggplot()
MI<-MI+geom_line(data=MIdata, aes(x=year, y =consumption))
MI<-MI+labs(title="Michigan Yearly Natural Gas Consumption from 1967 to 1989")
MI<- MI+theme(legend.position="none")
plot(MI)

#Plots Michigans Income consumption from 1967 to 1989
MI2<-ggplot()
MI2<-MI2+geom_line(data=MIdata, aes(x=year, y =income))
MI2<-MI2+labs(title="Michigan Yearly income from 1967 to 1989")
MI2<- MI2+theme(legend.position="none")
plot(MI2)

grid.arrange(MI,MI2,ncol=2) 

#creates sub dataset for Texas
TXdata <- ngdata[ngdata$state=="TX",]

#basic info
head(TXdata)
##    rownames state statecode year consumption price eprice oprice lprice heating
## 70       70    TX        44 1967      201407  0.87   2.19   5.01   0.85    1859
## 71       71    TX        44 1968      211763  0.87   2.15   5.24   0.87    2258
## 72       72    TX        44 1969      220728  0.90   2.09   5.43   0.87    2032
## 73       73    TX        44 1970      232189  0.92   2.05   5.71   0.90    2119
## 74       74    TX        44 1971      237387  0.99   2.04   6.17   0.92    1689
## 75       75    TX        44 1972      240662  1.01   2.07   6.23   0.99    2021
##     income
## 70 7930.66
## 71 8240.34
## 72 8517.22
## 73 8660.20
## 74 8655.35
## 75 8940.77
summary(TXdata$consumption)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  194602  212220  224800  230215  236512  315857
summary(TXdata$income)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    7931    9142   10823   10411   11702   12239
#Plots Texas's Natural Gas consumption from 1967 to 1989
TX<-ggplot()
TX<-TX+geom_line(data=TXdata, aes(x=year, y =consumption))
TX<-TX+labs(title="Texas Yearly Natural Gas Consumption from 1967 to 1989")
TX<- TX+theme(legend.position="none")
plot(TX)

#Plots Texas's Income from 1967 to 1989
TX2<-ggplot()
TX2<-TX2+geom_line(data=TXdata, aes(x=year, y =income))
TX2<-TX2+labs(title="Texas Yearly Income from 1967 to 1989")
TX2<- TX2+theme(legend.position="none")
plot(TX2)

grid.arrange(TX,TX2,ncol=2) 

#creates sub dataset for Utah
UTdata <- ngdata[ngdata$state=="UT",]

#basic info
head(UTdata)
##    rownames state statecode year consumption price eprice oprice lprice heating
## 93       93    UT        45 1967       38935  0.68   2.25   6.74   0.69    6530
## 94       94    UT        45 1968       40779  0.74   2.23   6.95   0.68    6790
## 95       95    UT        45 1969       43948  0.83   2.20   7.20   0.74    6510
## 96       96    UT        45 1970       44637  0.85   2.17   7.46   0.83    6658
## 97       97    UT        45 1971       49849  0.87   2.23   7.92   0.85    6969
## 98       98    UT        45 1972       48855  0.90   2.24   7.92   0.87    6505
##     income
## 93 7465.34
## 94 7560.08
## 95 7661.95
## 96 7865.50
## 97 8021.98
## 98 8299.55
summary(UTdata$consumption)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   35558   44293   48855   49967   56289   66417
summary(UTdata$income)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    7465    8356    9001    8846    9348   10194
#Plots Utahs Natural Gas consumption from 1967 to 1989
UT<-ggplot()
UT<-UT+geom_line(data=UTdata, aes(x=year, y =consumption))
UT<-UT+labs(title="Utah Yearly Natural Gas Consumption from 1967 to 1989")
UT<- UT+theme(legend.position="none")
plot(UT)

#Plots Utahs Income from 1967 to 1989
UT2<-ggplot()
UT2<-UT2+geom_line(data=UTdata, aes(x=year, y =income))
UT2<-UT2+labs(title="Utah Yearly Income from 1967 to 1989")
UT2<- UT2+theme(legend.position="none")
plot(UT2)

grid.arrange(UT,UT2,ncol=2) 

#creates sub dataset for California
CAdata <- ngdata[ngdata$state=="CA",]

#basic info
head(CAdata)
##     rownames state statecode year consumption price eprice oprice lprice
## 116      116    CA         5 1967      522122  0.93   2.14   6.69   0.93
## 117      117    CA         5 1968      517636  0.93   2.11   6.89   0.93
## 118      118    CA         5 1969      562127  0.93   2.07   7.14   0.93
## 119      119    CA         5 1970      552544  0.99   2.12   7.40   0.93
## 120      120    CA         5 1971      630998  1.03   2.19   7.81   0.99
## 121      121    CA         5 1972      637289  1.08   2.34   7.86   1.03
##     heating   income
## 116    2805 10749.59
## 117    2591 11088.94
## 118    2760 11354.00
## 119    2621 11328.17
## 120    3094 11216.07
## 121    2714 11527.43
summary(CAdata$consumption)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  464307  508875  537194  545725  585371  637289
summary(CAdata$income)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10750   11623   12927   12854   13774   15533
#Plots Californias Natural Gas consumption from 1967 to 1989
CA<-ggplot()
CA<-CA+geom_line(data=CAdata, aes(x=year, y =consumption))
CA<-CA+labs(title="California Yearly Natural Gas Consumption from 1967 to 1989")
CA<- CA+theme(legend.position="none")
plot(CA)

#Plots Californias Income from 1967 to 1989
CA2<-ggplot()
CA2<-CA2+geom_line(data=CAdata, aes(x=year, y =income))
CA2<-CA2+labs(title="California Yearly Income from 1967 to 1989")
CA2<- CA2+theme(legend.position="none")
plot(CA2)

grid.arrange(CA,CA2,ncol=2) 

#sets all to have same scale
NY<- NY+ ylim(0,650000)
FL<- FL+ ylim(0,650000)
MI<- MI+ ylim(0,650000)
TX<- TX+ ylim(0,650000)
UT<- UT+ ylim(0,650000)
CA<- CA+ ylim(0,650000)

NY2<- NY2+ ylim(0,17000)
FL2<- FL2+ ylim(0,17000)
MI2<- MI2+ ylim(0,17000)
TX2<- TX2+ ylim(0,17000)
UT2<- UT2+ ylim(0,17000)
CA2<- CA2+ ylim(0,17000)

#puts them all side by side
grid.arrange(NY,FL,MI,TX,UT,CA,ncol=6) 

grid.arrange(NY2,FL2,MI2,TX2,UT2,CA2,ncol=6) 

#graphs all state data on one plot
ggplot()+geom_line(NYdata, mapping = aes(x=year, y =consumption),color="red")+geom_point(NYdata, mapping = aes(x=year, y = consumption), color="red")+
  geom_line(FLdata, mapping = aes(x=year, y =consumption),color="blue")+geom_point(FLdata, mapping = aes(x=year, y = consumption), color="blue")+
  geom_line(MIdata, mapping = aes(x=year, y =consumption),color="purple")+geom_point(MIdata, mapping = aes(x=year, y = consumption), color="purple")+
  geom_line(TXdata, mapping = aes(x=year, y =consumption),color="yellow")+geom_point(TXdata, mapping = aes(x=year, y = consumption), color="yellow")+
  geom_line(UTdata, mapping = aes(x=year, y =consumption),color="green")+geom_point(UTdata, mapping = aes(x=year, y = consumption), color="green")+
  geom_line(CAdata, mapping = aes(x=year, y =consumption),color="orange")+geom_point(CAdata, mapping = aes(x=year, y = consumption), color="orange")+
  labs(x="year", y="Natural Gas Consumption (in Hundred Thousands)", color="legend") + scale_color_manual(values=c("red","blue","purple","yellow","green","orange"),label=c("NY","FL","MI","TX","UT","CA"))
## Ignoring unknown labels:
## • colour : "legend"

#graphs all state data on one plot but with legend (took so long!!!)
hello<-tibble(Years = 1967:1989, NY = NYdata$consumption, FL=FLdata$consumption,MI=MIdata$consumption,TX=TXdata$consumption,UT=UTdata$consumption,CA=CAdata$consumption)
hello%>%
  ggplot(aes(x=Years,y=value,color="State"))+
  geom_line(aes(x=Years,y=NY,color="blue"))+geom_line(aes(x=Years,y=FL,color="green"))+geom_line(aes(x=Years,y=MI,color="orange"))+geom_line(aes(x=Years,y=TX,color="purple"))+geom_line(aes(x=Years,y=UT,color="red"))+geom_line(aes(x=Years,y=CA,color="yellow"))+
  labs(title="States and their Yearly Natural Gas Consumption", x="Year", y="Natural Gas Consumption (in$100,000)")+scale_color_manual(values=c("red","blue","purple","yellow","green","orange"),label=c("NY","FL","MI","TX","UT","CA"))

#graphs amount of income each person recieves
hi<-tibble(Years = 1967:1989, NYi = NYdata$income, FLi=FLdata$income,MIi=MIdata$income,TXi=TXdata$income,UTi=UTdata$income,CAi=CAdata$income)
hi%>%
  ggplot(aes(x=Years,y=value,color="State"))+
  geom_line(aes(x=Years,y=NYi,color="blue"))+geom_line(aes(x=Years,y=FLi,color="green"))+geom_line(aes(x=Years,y=MIi,color="orange"))+geom_line(aes(x=Years,y=TXi,color="purple"))+geom_line(aes(x=Years,y=UTi,color="red"))+geom_line(aes(x=Years,y=CAi,color="yellow"))+
  labs(title="States and their Yearly Income", x="Year", y="Income($)")+scale_color_manual(values=c("red","blue","purple","yellow","green","orange"),label=c("NY","FL","MI","TX","UT","CA"))

#lets see the summary again!
summary(NYdata$consumption)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  313027  326494  336712  336217  342819  364713
summary(FLdata$consumption)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    9430   13388   14793   14737   15834   21619
summary(MIdata$consumption)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  299199  331752  340732  340549  351889  387279
summary(TXdata$consumption)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  194602  212220  224800  230215  236512  315857
summary(UTdata$consumption)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   35558   44293   48855   49967   56289   66417
summary(CAdata$consumption)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  464307  508875  537194  545725  585371  637289
#consumption boxplot
p<-ggplot(ngdata,aes(x=state,y=consumption, fill=state))
p<-p+geom_boxplot()
p<-p+theme_classic()
p<-p+labs(title = "Compare Natural Gas Consumption in States via a boxplot")
plot(p)

#lets see income summary againnn
summary(NYdata$income)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10904   11674   12002   12731   13374   16425
summary(FLdata$income)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    8143    9888   10739   10890   11833   13755
summary(MIdata$income)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    9600   10509   11365   11427   12058   13597
summary(TXdata$income)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    7931    9142   10823   10411   11702   12239
summary(UTdata$income)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    7465    8356    9001    8846    9348   10194
summary(CAdata$income)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10750   11623   12927   12854   13774   15533
#income boxplot
i<-ggplot(ngdata,aes(x=state,y=income, fill=state))
i<-i+geom_boxplot()
i<-i+theme_classic()
i<-i+labs(title = "Compare Income in States via a boxplot")
plot(i)

#now next to eachother :)
grid.arrange(p,i,ncol=2) 

#Plots total consumptions of states from dataset
total<-ggplot()
total<-total+geom_col(data=ngdata, aes(x=state, y =consumption, fill=consumption))
total<-total+labs(title="Total Yearly Natural Gas Consumption of States from 1967 to 1989")
total<- total+theme(legend.position="none")
plot(total)

#pie chart of percentage of NG 
slices=c(sum(NYdata$consumption),sum(FLdata$consumption),sum(MIdata$consumption),
         sum(TXdata$consumption),sum(UTdata$consumption),sum(CAdata$consumption))
names=c("NY","FL","MI","TX","UT","CA")
pct=round(slices/sum(slices)*100) #R can do math!

new_labels=paste(names,"-",pct,"%",sep="")

pie(slices,labels=new_labels,main="Percentage of amount of Natural Gas Consumptions States Emitted throughtout the Years",col=rainbow(6))

cat("\014") #clears console
dev.off() # clears plots
## null device 
##           1
#done:)