LOADING DATA AND SEEING HEAD RECORDS
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
library(RCurl)
## Loading required package: bitops
library(ggplot2)
data<- read.csv("C:\\Users\\jkks9\\Documents\\DATA 607\\v_taxi_daily-1.csv")
head(data)
x<-sqldf('SELECT yr,taxi,sum(rides) FROM data WHERE Yr between 2014 and 2018 GROUP BY yr,taxi')
x2<-sqldf('WITH TAXI AS(SELECT yr,taxi,sum(rides) as rides, DENSE_RANK() OVER(PARTITION BY Yr ORDER BY rides desc) as Rank1 FROM data WHERE Yr between 2014 and 2018 GROUP BY yr,taxi) select * FROM TAXI WHERE Rank1=1')
x2
ggplot(data=x2, aes(x=yr, y=rides, fill=taxi)) +
geom_bar(stat="identity")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
x3<-sqldf('WITH TAXI AS(SELECT yr,taxi,sum(rides) as rides FROM data WHERE Yr between 2014 and 2018 GROUP BY yr,taxi),TAXI2 AS(select *,SUM(rides) OVER(PARTITION BY yr) as TotalRidesYr FROM TAXI) SELECT * FROM TAXI2')
x3
x4<-mutate(x3,x3$rides/x3$TotalRidesYr)
x4
ggplot(x4, aes(fill=taxi, y=x3$rides/x3$TotalRidesYr, x=yr)) +
geom_bar(position="fill", stat="identity")
data2<- read.csv("C:\\Users\\jkks9\\Documents\\DATA 607\\v_taxi_daily-2.csv")
head(data2)
xx<-sqldf('SELECT yr,mth,AVG(rides) FROM data2 WHERE Yr between 2014 and 2018 GROUP BY yr,mth')
xx2<-sqldf('WITH TAXI AS(SELECT yr,mth,sum(rides) as rides,DENSE_RANK() OVER(PARTITION BY Yr ORDER BY rides desc) as rank1 FROM data2 WHERE Yr between 2014 and 2018 GROUP BY yr,mth) SELECT * FROM TAXI WHERE rank1=1 ORDER BY Yr')
xx2
ggplot(data=xx2, aes(x=yr, y=rides, fill=mth)) +
geom_bar(stat="identity")