Stem vs. Lemma
setwd("C:/Users/subas/Downloads/TX_01_2019_15NonNA")
library(data.table)
## Warning: package 'data.table' was built under R version 3.6.2
## Warning: package 'dplyr' was built under R version 3.6.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Warning: package 'tidyr' was built under R version 3.6.2
a <- read.csv("TMC_Identification.csv")
route10= subset(a, road=="HEBRON PKY" & direction== "EASTBOUND")
route10= subset(route10, tmc=="111P12766"| tmc=="111+12767"| tmc=="111P12767"|tmc=="111+12768"|tmc=="111P12768")
b1= route10[,c(1, 12,13)]
b1
## tmc miles road_order
## 103 111P12766 0.022729 1
## 106 111P12768 0.099674 5
## 108 111P12767 0.009814 3
## 6443 111+12768 1.044005 4
## 6445 111+12767 1.632978 2
aa2= fread("TX_01_2019_15NonNA.csv")
aa3= subset(aa2, tmc_code=="111P12766"| tmc_code=="111+12767"| tmc_code=="111P12767"|tmc_code=="111+12768"|tmc_code=="111P12768")
b2= b1[order(b1$road_order),]
b2 <- within(b2, acc_sum <- cumsum(miles))
colnames(b2)[1] <- "tmc_code"
aa4= left_join(aa3, b2, by="tmc_code")
## Warning: Column `tmc_code` joining character vector and factor, coercing into
## character vector
## tmc_code measurement_tstamp speed average_speed reference_speed
## 1 111P12766 2019-01-01 00:30:00 47 19 50
## 2 111P12766 2019-01-01 07:00:00 32 24 50
## 3 111P12766 2019-01-01 10:00:00 28 22 50
## 4 111P12766 2019-01-01 11:00:00 43 24 50
## 5 111P12766 2019-01-01 12:15:00 40 23 50
## 6 111P12766 2019-01-01 13:45:00 17 18 50
## travel_time_minutes data_density miles road_order acc_sum
## 1 0.03 A 0.022729 1 0.022729
## 2 0.04 A 0.022729 1 0.022729
## 3 0.05 A 0.022729 1 0.022729
## 4 0.03 A 0.022729 1 0.022729
## 5 0.03 A 0.022729 1 0.022729
## 6 0.08 A 0.022729 1 0.022729
## Warning: package 'lubridate' was built under R version 3.6.2
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
## The following object is masked from 'package:base':
##
## date
aa4$Time <- format(as.POSIXct(strptime(aa4$measurement_tstamp, "%Y-%m-%d %H:%M:%S",tz="")) ,format = "%H:%M")
aa5 = aa4 %>% group_by(acc_sum, Time) %>% summarize(spd=mean(speed, na.rm=TRUE))
dat3_1 <- aa5 %>% group_by(acc_sum, Time) %>% summarise(spd1=spd) %>%
spread(Time, spd1)
dat3_1[is.na(dat3_1)]=0
aa6= data.matrix(dat3_1[,-c(1)])
colnames(aa6) <- NULL
aa= distinct(aa5, acc_sum)
bb= distinct(aa5[,c(2)], Time)
library(plotly)
## Warning: package 'plotly' was built under R version 3.6.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.6.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
plot_ly(z = ~aa6, type = "contour")
plot_ly(
x = bb$Time, y = aa$acc_sum,
z = aa6,
type = "contour"
)
aa5 = aa4 %>% group_by(tmc_code, Time) %>% summarize(spd=mean(speed, na.rm=TRUE))
dat3_1 <- aa5 %>% group_by(tmc_code, Time) %>% summarise(spd1=spd) %>%
spread(tmc_code, spd1)
dat3_1[is.na(dat3_1)]=0
aa= distinct(aa5, tmc_code)
bb= distinct(aa5[,c(2)], Time)
plot_ly(
x = bb$Time, y = aa$tmc_code,
z = aa6,
type = "contour"
)
### not generating. need to run this code in R and open the plotly in html