1 Stem vs. Lemma

setwd("C:/Users/subas/Downloads/TX_01_2019_15NonNA")
library(data.table)
## Warning: package 'data.table' was built under R version 3.6.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.6.2
a <- read.csv("TMC_Identification.csv")
route10= subset(a, road=="HEBRON PKY" &   direction== "EASTBOUND")
route10= subset(route10, tmc=="111P12766"| tmc=="111+12767"| tmc=="111P12767"|tmc=="111+12768"|tmc=="111P12768")
b1= route10[,c(1, 12,13)]
b1
##            tmc    miles road_order
## 103  111P12766 0.022729          1
## 106  111P12768 0.099674          5
## 108  111P12767 0.009814          3
## 6443 111+12768 1.044005          4
## 6445 111+12767 1.632978          2
aa2= fread("TX_01_2019_15NonNA.csv")
aa3= subset(aa2, tmc_code=="111P12766"| tmc_code=="111+12767"| tmc_code=="111P12767"|tmc_code=="111+12768"|tmc_code=="111P12768")


b2= b1[order(b1$road_order),]
b2 <- within(b2, acc_sum <- cumsum(miles))
colnames(b2)[1] <- "tmc_code"

aa4= left_join(aa3, b2, by="tmc_code")
## Warning: Column `tmc_code` joining character vector and factor, coercing into
## character vector
head(aa4)
##    tmc_code  measurement_tstamp speed average_speed reference_speed
## 1 111P12766 2019-01-01 00:30:00    47            19              50
## 2 111P12766 2019-01-01 07:00:00    32            24              50
## 3 111P12766 2019-01-01 10:00:00    28            22              50
## 4 111P12766 2019-01-01 11:00:00    43            24              50
## 5 111P12766 2019-01-01 12:15:00    40            23              50
## 6 111P12766 2019-01-01 13:45:00    17            18              50
##   travel_time_minutes data_density    miles road_order  acc_sum
## 1                0.03            A 0.022729          1 0.022729
## 2                0.04            A 0.022729          1 0.022729
## 3                0.05            A 0.022729          1 0.022729
## 4                0.03            A 0.022729          1 0.022729
## 5                0.03            A 0.022729          1 0.022729
## 6                0.08            A 0.022729          1 0.022729
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.6.2
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## The following object is masked from 'package:base':
## 
##     date
aa4$Time <- format(as.POSIXct(strptime(aa4$measurement_tstamp, "%Y-%m-%d  %H:%M:%S",tz="")) ,format = "%H:%M")

aa5 = aa4 %>% group_by(acc_sum, Time) %>% summarize(spd=mean(speed, na.rm=TRUE))
dat3_1 <- aa5 %>% group_by(acc_sum, Time) %>% summarise(spd1=spd) %>%
  spread(Time, spd1)
dat3_1[is.na(dat3_1)]=0


aa6= data.matrix(dat3_1[,-c(1)])
colnames(aa6) <- NULL
aa= distinct(aa5, acc_sum)
bb= distinct(aa5[,c(2)], Time)



library(plotly)
## Warning: package 'plotly' was built under R version 3.6.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.6.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
plot_ly(z = ~aa6, type = "contour")
plot_ly(
  x = bb$Time,  y = aa$acc_sum,  
  z = aa6, 
  type = "contour" 
)
aa5 = aa4 %>% group_by(tmc_code, Time) %>% summarize(spd=mean(speed, na.rm=TRUE))
dat3_1 <- aa5 %>% group_by(tmc_code, Time) %>% summarise(spd1=spd) %>%
  spread(tmc_code, spd1)
dat3_1[is.na(dat3_1)]=0

aa= distinct(aa5, tmc_code)
bb= distinct(aa5[,c(2)], Time)


plot_ly(
  x = bb$Time,  y = aa$tmc_code,  
  z = aa6, 
  type = "contour" 
)
### not generating. need to run this code in R and open the plotly in html