EMA2 = read.csv("~/Downloads/SHINE_Round2EMA_24May2021.csv", stringsAsFactors = FALSE)
EMA2$date <- as.POSIXct(EMA2$Notification.Time, format = "%m/%d/%y")
#test =EMA2 %>% select(SHINEID,Notification.Time, GPS.Latitude.Start, #GPS.Longitude.Start, GPS.Latitude.Finish, GPS.Longitude.Finish)
##lat <- EMA2 %>%
group_by(date, SHINEID) %>% #shouldnt I also group_by(user_id) so that I get unique results for each user
# it seems to work now because for each date there is only 1 user somehow
arrange(date) %>% # so that initial location comes first
# single date locations
mutate(sd_lats = paste(GPS.Latitude.Start, collapse = ",")) %>%
mutate(sd_lngs = paste(GPS.Longitude.Start, collapse = ",")) %>%
# initial locations
mutate(init_lat = first(GPS.Latitude.Start)) %>% #if first NA move
mutate(init_lng = first(GPS.Longitude.Start))
EMA2= EMA2 %>% filter(GPS.Latitude.Start != 'NA')
EMA2= EMA2 %>% filter(GPS.Latitude.Start != 0.00000)
length(unique(EMA2$SHINEID)) #274 8Ppeople removed## [1] 274
df1 = EMA2 %>%
group_by(date, SHINEID) %>% #shouldnt I also group_by(user_id) so that I get unique results for each user
# it seems to work now because for each date there is only 1 user somehow
arrange(date) %>%
mutate(init_lng = first(GPS.Longitude.Start),
init_lat = ifelse(
first(GPS.Latitude.Start) != "NA", first(GPS.Latitude.Start),
ifelse(GPS.Latitude.Start[2] != "NA", GPS.Latitude.Start[2],
NA))) %>%
mutate(abc = first(GPS.Latitude.Start))
#df1 %<>% select(SHINEID,Notification.Time, GPS.Latitude.Start, GPS.Longitude.Start, init_lng, init_lat)
#lat= lat %>% group_by(init_lat, SHINEID) %>% filter(n() >3)
###if initial NA move to next
lat1 = df1 %>%
group_by(date, SHINEID) %>%
# find consecutive distances (not required I think...)
mutate(dist_consec = distHaversine(cbind(GPS.Longitude.Start, GPS.Latitude.Start),
cbind(lag(GPS.Longitude.Start), lag(GPS.Latitude.Start)))) %>%
# dist. from initial loc
mutate(dist_from_init = distHaversine(cbind(GPS.Longitude.Start, GPS.Latitude.Start),
cbind(init_lng, init_lat)))
lat <-lat1 %>%
group_by(date, SHINEID) %>%
mutate(sd_dist = max(dist_from_init))
length(unique(lat1$SHINEID)) #279## [1] 274
#hist(lat$sd_dist)
#hist(log(lat$sd_dist))
options(scipen=999)
df = lat %>% filter(GPS.Latitude.Start != 'NA')
df = df %>% filter(sd_dist != 'NA')
#l = lat1 %>%
# arrange(sd_dist, date,SHINEID)
df$sd_dist_w = psych::winsor(df$sd_dist, trim = 0.20, na.rm = TRUE)
psych::describe(df$sd_dist)## vars n mean sd median trimmed mad min max range
## X1 1 25124 24730.56 247571.2 33.73 1550.6 44.29 0 12791036 12791036
## skew kurtosis se
## X1 28.52 1103.69 1561.91
psych::describe(df$sd_dist_w)## vars n mean sd median trimmed mad min max range skew
## X1 1 25124 1221.11 1891.87 33.73 942.94 35.83 9.57 4658.45 4648.89 1.13
## kurtosis se
## X1 -0.57 11.94
lat$datetime <- as.POSIXct(lat$Notification.Time, format = "%m/%d/%y %H:%M")
lat<- lat[order(as.factor(lat$SHINEID), lat$datetime),]
# create day order variables
day = lat %>%
select_("SHINEID", "date") %>%
unique() %>%
group_by(SHINEID) %>%
dplyr::mutate(order_day = row_number()) %>%
ungroup()## Warning: `select_()` was deprecated in dplyr 0.7.0.
## Please use `select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
lat=lat%>%
group_by(SHINEID) %>%
dplyr::mutate(order_ema = row_number()) %>% #ema order variable
ungroup() %>%
left_join(., day) %>% #day order variable to group morning and evening survey of the same day together
group_by(SHINEID) ## Joining, by = c("SHINEID", "date")
#daily purpose was measured once in the morning. Copy over morning purpose to evening to create daily average purpose variable
lat$CovidNews_daily = lat$CovidNews
lat=lat%>%
group_by(SHINEID,order_day) %>%
tidyr::fill(CovidNews_daily, .direction = "down")
# create a lagged variable to test previous purpose predicting current alcohol use
lat=lat %>%
dplyr::group_by(SHINEID) %>%
dplyr::mutate(CovidNews_daily_lag = lag(CovidNews_daily)) %>%
ungroup()
#check =lat%>%
# dplyr::select(SHINEID,Notification.Time, Session.Name, order_ema, CovidNews_daily_lag, CovidNews_daily)
##idk if weill workpsych::describe(lat$CovidNews_daily)## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 23748 0.79 0.97 1 0.64 1.48 0 7 7 1.33 1.82 0.01
plot_d =lat%>%
group_by(SHINEID) %>%
mutate(mean_mob = sd_dist, na.rm = T,
mean_covid = CovidNews, na.rm =T)
#plot_d =plot_d %>%
# ungroup() %>%
# summarise(mean_mob = mean(mean_mob, na.rm =T),
# mean_covid = mean(mean_covid, na.rm =T),
# sd_mob = mean(mean_mob, na.rm =T),
# sd_covid = mean(mean_covid, na.rm =T)
# )
#plot = lat %>%
#select(CovidNews,sd_dist, SHINEID)
#table_one <- tableby(SHINEID ~ ., data = plot)
#summary(table_one, title = "Gapminder Data")#get one person's drinking mean to use in plot (190)
# psych::describe(lat$CovidNews_daily)
# vars n mean sd median trimmed mad min max range skew kurtosis se
# X1 1 23748 0.79 0.97 1 0.64 1.48 0 7 7 1.33 1.82 0.01
#Intensive repeated measures data simultaneously contain
#between-person and within-person information in drinking
p = ggplot(data = lat[which(lat$SHINEID=='muri053'),],
aes(x =order_day , group=ID)) +
guides(color="none") + #to suppress guide
geom_line(aes(y=CovidNews_daily), color="green") +
scale_x_continuous(breaks=c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
23,24,25,26,27,28,28), name="Day") +
ylim(0,3) +
geom_hline(yintercept= mean(lat$CovidNews_daily[lat$SHINEID == 'muri053'],na.rm = T)
, linetype="dashed",
color = "blue", size=1) +
geom_point(aes(y=CovidNews_daily), color="green") +
ylab("Intensity") + annotate("text", x = 28, y = 3, label= "example ppt") +
theme_bw()
p## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 5 rows containing missing values (geom_point).
check_day1 =lat%>%
group_by(SHINEID) %>%
filter(order_day == 1) %>%
arrange(Notification.Time) %>%
filter(row_number()==1) %>%
select(SHINEID, date,Notification.Time ) %>%
ungroup() %>%
count(date)
ggplot() +
geom_line(data = check_day1, aes(y=n, x = date),
size = 1,
group = 1) + ylab("# of individuals") + theme_bw()