library(DBI)
library(ggplot2)
con <- dbConnect(odbc::odbc(), "mssql")
dbGetQuery(con,'select top 3 "VendorID" from "Green_Taxi_Trip_Data" union all select top 5 "VendorID" from "Green_Taxi_Trip_Data1"')
## VendorID
## 1 2
## 2 2
## 3 2
## 4 2
## 5 2
## 6 1
## 7 2
## 8 2
df<-dbGetQuery(con,'WITH TAXI AS(SELECT DATEPART(year,"pickup_datetime") as Yr, COUNT("pickup_datetime") as Cnt, 1 as "TaxiType" FROM "FMV_Trip_Data" GROUP BY DATEPART(year,"pickup_datetime") union all SELECT DATEPART(year,"Pickup_DateTime") as Yr, COUNT("Pickup_DateTime") as Cnt, 1 as "TaxiType" FROM "FMV_Trip_Data2" GROUP BY DATEPART(year,"Pickup_DateTime") union all SELECT DATEPART(year,"Pickup_DateTime") as Yr, COUNT("Pickup_DateTime") as Cnt, 1 as "TaxiType" FROM "FMV_Trip_Data3" GROUP BY DATEPART(year,"Pickup_DateTime") union all SELECT DATEPART(year,"Pickup_Date") as Yr, COUNT("Pickup_Date") as Cnt, 1 as "TaxiType" FROM "FMV_Trip_Data4" GROUP BY DATEPART(year,"Pickup_Date") union all SELECT DATEPART(year,"lpep_pickup_datetime") as Yr, COUNT("lpep_pickup_datetime") as Cnt, 2 as "TaxiType" FROM "Green_Taxi_Trip_Data" GROUP BY DATEPART(year,"lpep_pickup_datetime") union all SELECT DATEPART(year,"lpep_pickup_datetime") as Yr, COUNT("lpep_pickup_datetime") as Cnt, 2 as "TaxiType" FROM "Green_Taxi_Trip_Data1" GROUP BY DATEPART(year,"lpep_pickup_datetime") union all SELECT DATEPART(year,"lpep_pickup_datetime") as Yr, COUNT("lpep_pickup_datetime") as Cnt, 2 as "TaxiType" FROM "Green_Taxi_Trip_Data2" GROUP BY DATEPART(year,"lpep_pickup_datetime") union all SELECT DATEPART(year,"tpep_pickup_datetime") as Yr, COUNT("tpep_pickup_datetime") as Cnt, 3 as "TaxiType" FROM "Yellow_Taxi_Trip_Data" GROUP BY DATEPART(year,"tpep_pickup_datetime") union all SELECT DATEPART(year,"tpep_pickup_datetime") as Yr, COUNT("tpep_pickup_datetime") as Cnt, 3 as "TaxiType" FROM "Yellow_Taxi_Trip_Data1" GROUP BY DATEPART(year,"tpep_pickup_datetime") union all SELECT DATEPART(year,"tpep_pickup_datetime") as Yr, COUNT("tpep_pickup_datetime") as Cnt, 3 as "TaxiType" FROM "Yellow_Taxi_Trip_Data2" GROUP BY DATEPART(year,"tpep_pickup_datetime")) SELECT * FROM TAXI WHERE Yr between 2016 and 2018')
df
## Yr Cnt TaxiType
## 1 2018 32473088 1
## 2 2017 2830659 1
## 3 2016 46735360 1
## 4 2018 67 2
## 5 2017 11740514 2
## 6 2018 8806832 2
## 7 2016 16385532 2
## 8 2018 366 3
## 9 2017 113496933 3
## 10 2018 112233174 3
## 11 2016 87425024 3
ggplot(data=df, aes(x=Yr, y=TaxiType, group=Yr, colour=Yr)) +
geom_line() +
geom_point()

dbGetQuery(con,'WITH TAXI AS(SELECT DATEPART(year,"pickup_datetime") as Yr, COUNT("pickup_datetime") as Cnt, 1 as "TaxiType" FROM "FMV_Trip_Data" GROUP BY DATEPART(year,"pickup_datetime") union all SELECT DATEPART(year,"Pickup_DateTime") as Yr, COUNT("Pickup_DateTime") as Cnt, 1 as "TaxiType" FROM "FMV_Trip_Data2" GROUP BY DATEPART(year,"Pickup_DateTime") union all SELECT DATEPART(year,"Pickup_DateTime") as Yr, COUNT("Pickup_DateTime") as Cnt, 1 as "TaxiType" FROM "FMV_Trip_Data3" GROUP BY DATEPART(year,"Pickup_DateTime") union all SELECT DATEPART(year,"Pickup_Date") as Yr, COUNT("Pickup_Date") as Cnt, 1 as "TaxiType" FROM "FMV_Trip_Data4" GROUP BY DATEPART(year,"Pickup_Date") union all SELECT DATEPART(year,"lpep_pickup_datetime") as Yr, COUNT("lpep_pickup_datetime") as Cnt, 2 as "TaxiType" FROM "Green_Taxi_Trip_Data" GROUP BY DATEPART(year,"lpep_pickup_datetime") union all SELECT DATEPART(year,"lpep_pickup_datetime") as Yr, COUNT("lpep_pickup_datetime") as Cnt, 2 as "TaxiType" FROM "Green_Taxi_Trip_Data1" GROUP BY DATEPART(year,"lpep_pickup_datetime") union all SELECT DATEPART(year,"lpep_pickup_datetime") as Yr, COUNT("lpep_pickup_datetime") as Cnt, 2 as "TaxiType" FROM "Green_Taxi_Trip_Data2" GROUP BY DATEPART(year,"lpep_pickup_datetime") union all SELECT DATEPART(year,"tpep_pickup_datetime") as Yr, COUNT("tpep_pickup_datetime") as Cnt, 3 as "TaxiType" FROM "Yellow_Taxi_Trip_Data" GROUP BY DATEPART(year,"tpep_pickup_datetime") union all SELECT DATEPART(year,"tpep_pickup_datetime") as Yr, COUNT("tpep_pickup_datetime") as Cnt, 3 as "TaxiType" FROM "Yellow_Taxi_Trip_Data1" GROUP BY DATEPART(year,"tpep_pickup_datetime") union all SELECT DATEPART(year,"tpep_pickup_datetime") as Yr, COUNT("tpep_pickup_datetime") as Cnt, 3 as "TaxiType" FROM "Yellow_Taxi_Trip_Data2" GROUP BY DATEPART(year,"tpep_pickup_datetime")) SELECT *, SUM (Cnt) OVER (PARTITION BY Yr) as "TotalsByYr", (Cnt)/(SUM (Cnt) OVER (PARTITION BY Yr)) as "TaxiTypeDist" FROM TAXI WHERE Yr between 2016 and 2018')
## Yr Cnt TaxiType TotalsByYr TaxiTypeDist
## 1 2016 46735360 1 150545916 0
## 2 2016 16385532 2 150545916 0
## 3 2016 87425024 3 150545916 0
## 4 2017 113496933 3 128068106 0
## 5 2017 11740514 2 128068106 0
## 6 2017 2830659 1 128068106 0
## 7 2018 32473088 1 153513527 0
## 8 2018 67 2 153513527 0
## 9 2018 8806832 2 153513527 0
## 10 2018 366 3 153513527 0
## 11 2018 112233174 3 153513527 0