library(DBI)
library(ggplot2)
con <- dbConnect(odbc::odbc(), "mssql")
dbGetQuery(con,'select top 3  "VendorID" from "Green_Taxi_Trip_Data" union all select top 5 "VendorID" from "Green_Taxi_Trip_Data1"')
##   VendorID
## 1        2
## 2        2
## 3        2
## 4        2
## 5        2
## 6        1
## 7        2
## 8        2
df<-dbGetQuery(con,'WITH TAXI AS(SELECT DATEPART(year,"pickup_datetime") as Yr, COUNT("pickup_datetime") as Cnt, 1 as "TaxiType" FROM "FMV_Trip_Data" GROUP BY DATEPART(year,"pickup_datetime") union all SELECT DATEPART(year,"Pickup_DateTime") as Yr, COUNT("Pickup_DateTime") as Cnt, 1 as "TaxiType" FROM "FMV_Trip_Data2" GROUP BY DATEPART(year,"Pickup_DateTime") union all SELECT DATEPART(year,"Pickup_DateTime") as Yr, COUNT("Pickup_DateTime") as Cnt, 1 as "TaxiType" FROM "FMV_Trip_Data3" GROUP BY DATEPART(year,"Pickup_DateTime") union all SELECT DATEPART(year,"Pickup_Date") as Yr, COUNT("Pickup_Date") as Cnt, 1 as "TaxiType" FROM "FMV_Trip_Data4" GROUP BY DATEPART(year,"Pickup_Date") union all SELECT DATEPART(year,"lpep_pickup_datetime") as Yr, COUNT("lpep_pickup_datetime") as Cnt, 2 as "TaxiType" FROM "Green_Taxi_Trip_Data" GROUP BY DATEPART(year,"lpep_pickup_datetime") union all SELECT DATEPART(year,"lpep_pickup_datetime") as Yr, COUNT("lpep_pickup_datetime") as Cnt, 2 as "TaxiType" FROM "Green_Taxi_Trip_Data1" GROUP BY DATEPART(year,"lpep_pickup_datetime") union all SELECT DATEPART(year,"lpep_pickup_datetime") as Yr, COUNT("lpep_pickup_datetime") as Cnt, 2 as "TaxiType" FROM "Green_Taxi_Trip_Data2" GROUP BY DATEPART(year,"lpep_pickup_datetime") union all SELECT DATEPART(year,"tpep_pickup_datetime") as Yr, COUNT("tpep_pickup_datetime") as Cnt, 3 as "TaxiType" FROM "Yellow_Taxi_Trip_Data" GROUP BY DATEPART(year,"tpep_pickup_datetime") union all SELECT DATEPART(year,"tpep_pickup_datetime") as Yr, COUNT("tpep_pickup_datetime") as Cnt, 3 as "TaxiType" FROM "Yellow_Taxi_Trip_Data1" GROUP BY DATEPART(year,"tpep_pickup_datetime") union all SELECT DATEPART(year,"tpep_pickup_datetime") as Yr, COUNT("tpep_pickup_datetime") as Cnt, 3 as "TaxiType" FROM "Yellow_Taxi_Trip_Data2" GROUP BY DATEPART(year,"tpep_pickup_datetime")) SELECT * FROM TAXI WHERE Yr between 2016 and 2018')

df
##      Yr       Cnt TaxiType
## 1  2018  32473088        1
## 2  2017   2830659        1
## 3  2016  46735360        1
## 4  2018        67        2
## 5  2017  11740514        2
## 6  2018   8806832        2
## 7  2016  16385532        2
## 8  2018       366        3
## 9  2017 113496933        3
## 10 2018 112233174        3
## 11 2016  87425024        3
ggplot(data=df, aes(x=Yr, y=TaxiType, group=Yr, colour=Yr)) +
    geom_line() +
    geom_point()

dbGetQuery(con,'WITH TAXI AS(SELECT DATEPART(year,"pickup_datetime") as Yr, COUNT("pickup_datetime") as Cnt, 1 as "TaxiType" FROM "FMV_Trip_Data" GROUP BY DATEPART(year,"pickup_datetime") union all SELECT DATEPART(year,"Pickup_DateTime") as Yr, COUNT("Pickup_DateTime") as Cnt, 1 as "TaxiType" FROM "FMV_Trip_Data2" GROUP BY DATEPART(year,"Pickup_DateTime") union all SELECT DATEPART(year,"Pickup_DateTime") as Yr, COUNT("Pickup_DateTime") as Cnt, 1 as "TaxiType" FROM "FMV_Trip_Data3" GROUP BY DATEPART(year,"Pickup_DateTime") union all SELECT DATEPART(year,"Pickup_Date") as Yr, COUNT("Pickup_Date") as Cnt, 1 as "TaxiType" FROM "FMV_Trip_Data4" GROUP BY DATEPART(year,"Pickup_Date") union all SELECT DATEPART(year,"lpep_pickup_datetime") as Yr, COUNT("lpep_pickup_datetime") as Cnt, 2 as "TaxiType" FROM "Green_Taxi_Trip_Data" GROUP BY DATEPART(year,"lpep_pickup_datetime") union all SELECT DATEPART(year,"lpep_pickup_datetime") as Yr, COUNT("lpep_pickup_datetime") as Cnt, 2 as "TaxiType" FROM "Green_Taxi_Trip_Data1" GROUP BY DATEPART(year,"lpep_pickup_datetime") union all SELECT DATEPART(year,"lpep_pickup_datetime") as Yr, COUNT("lpep_pickup_datetime") as Cnt, 2 as "TaxiType" FROM "Green_Taxi_Trip_Data2" GROUP BY DATEPART(year,"lpep_pickup_datetime") union all SELECT DATEPART(year,"tpep_pickup_datetime") as Yr, COUNT("tpep_pickup_datetime") as Cnt, 3 as "TaxiType" FROM "Yellow_Taxi_Trip_Data" GROUP BY DATEPART(year,"tpep_pickup_datetime") union all SELECT DATEPART(year,"tpep_pickup_datetime") as Yr, COUNT("tpep_pickup_datetime") as Cnt, 3 as "TaxiType" FROM "Yellow_Taxi_Trip_Data1" GROUP BY DATEPART(year,"tpep_pickup_datetime") union all SELECT DATEPART(year,"tpep_pickup_datetime") as Yr, COUNT("tpep_pickup_datetime") as Cnt, 3 as "TaxiType" FROM "Yellow_Taxi_Trip_Data2" GROUP BY DATEPART(year,"tpep_pickup_datetime")) SELECT *, SUM (Cnt) OVER (PARTITION BY Yr) as "TotalsByYr", (Cnt)/(SUM (Cnt) OVER (PARTITION BY Yr)) as  "TaxiTypeDist" FROM TAXI WHERE Yr between 2016 and 2018')
##      Yr       Cnt TaxiType TotalsByYr TaxiTypeDist
## 1  2016  46735360        1  150545916            0
## 2  2016  16385532        2  150545916            0
## 3  2016  87425024        3  150545916            0
## 4  2017 113496933        3  128068106            0
## 5  2017  11740514        2  128068106            0
## 6  2017   2830659        1  128068106            0
## 7  2018  32473088        1  153513527            0
## 8  2018        67        2  153513527            0
## 9  2018   8806832        2  153513527            0
## 10 2018       366        3  153513527            0
## 11 2018 112233174        3  153513527            0