Theme and font settings
theme_blue <- function(base_size = 11, base_family = "") {
theme_grey(base_size = base_size, base_family = base_family) %+replace%
theme(line = element_line(colour = "white", size = I(1.5), linetype = "solid",
lineend = "round", arrow = NULL, inherit.blank = FALSE),
rect = element_rect(fill = "#00010F", colour = "#00010F", size = 0, linetype = "solid", inherit.blank = FALSE),
text = element_text(family = "Helvetica", face = "plain", colour = "white", size = base_size,hjust = I(.5), vjust = I(.5), angle = 0, lineheight = base_size,color = NULL,margin = margin(I(.2),I(.2),I(.2),I(.2),unit="pt"),debug=F, inherit.blank = FALSE),
title = element_text(family = "AvantGarde", face = "plain", colour = "white", size = I(base_size),hjust = I(.5), vjust = I(1), angle = 0, lineheight = I(base_size),color = NULL,margin = margin(I(1),I(1),I(1),I(1),unit="pt"), inherit.blank = FALSE),
axis.line = element_line(colour = "#3A42B0", size = 1, linetype = "solid",
lineend = "round", inherit.blank = FALSE),
axis.text.x = element_text(size = base_size*0.6, color = "white", lineheight = 0.6,hjust = I(1), vjust = I(1),angle=60),
axis.text.y = element_text(size = base_size*0.6, color = "white", lineheight = 0.9,hjust = I(.5), vjust = I(1),angle=60),
axis.ticks = element_line(colour = "#3A42B0", size = 0.2),
axis.title = element_text(family = "Helvetica", face = "italic", colour = "white", size = I(base_size*1),hjust = I(.5), vjust = I(.5), lineheight = I(base_size*1), inherit.blank = FALSE),
axis.title.x = element_text(size = base_size, color = "white", margin = margin(0, .5, 0, 0)),
axis.title.y = element_text(size = base_size, color = "white", angle = 90, margin = margin(0, .5, 0, 0)),
axis.ticks.length = unit(0.3, "lines"),
# Specify legend options
legend.background = element_rect(color = NA, fill = "#060713"),
legend.key = element_rect(color = "white", fill = "black"),
legend.key.size = unit(1.2, "lines"),
legend.key.height = NULL,
legend.key.width = NULL,
legend.text = element_text(size = base_size*0.8, color = "white"),
legend.title = element_text(size = base_size*0.8, face = "bold", hjust = 0, color = "white"),
legend.position = "right",
legend.text.align = NULL,
legend.title.align = NULL,
legend.direction = "vertical",
legend.box = NULL,
# Specify panel options
panel.background = element_rect(fill = "#060713",color="#060713", inherit.blank = FALSE),
panel.border = element_rect(fill = NA, color = NA),
panel.grid.major = element_line(colour = "#34415B", size = I(.1), linetype = "solid",lineend = "butt", inherit.blank = FALSE),
panel.grid.minor = element_line(colour = "#1C2D52", size = I(.1), linetype = "dotted",lineend = "butt", inherit.blank = FALSE),
panel.spacing = margin(rep(.5,4),unit="pt"),
panel.spacing.x = unit(.5,"pt"),
panel.spacing.y = unit(.5,"pt"),
# Specify facetting options
strip.background = element_rect(fill = "#060713", color = "#060713"),
strip.text = element_text(family = "Helvetica", face = "plain", colour = "white", size = I(base_size*.8),hjust = .5,lineheight = I(base_size*1),margin = margin(rep(1,4), "pt"),inherit.blank = FALSE),
strip.text.x = element_text(color = "white"),
strip.text.y = element_text(color = "white",angle = -90),
# Specify plot options
plot.background = element_rect(fill = "#00010F",colour = "#00010F", linetype = "solid", inherit.blank = FALSE),
plot.title = element_text(family=base_family,size = base_size, color = "white", lineheight = I(1.2),vjust=1),
plot.subtitle = element_text( size = I(base_size*.8),hjust = I(.5),vjust=1, lineheight = I(1.2), inherit.blank = FALSE),
plot.margin = margin(.2,.2,.2,.2,unit="cm"),
strip.switch.pad.grid = unit(0, "cm")
)
}
update_geom_defaults("bar", list(fill = "#0686DB"))
theme_set(theme_blue(base_family = "AvantGarde"))
update_geom_defaults("text",list(colour="white",size=rel(2.5)))
windowsFonts(AvantGarde=windowsFont("TT AvantGarde-Book"))
windowsFonts(Helvetica=windowsFont("TT Helvetica"))
options(scipen=12)Color Schema
# http://colorschemedesigner.com/csd-3.5/
ColorKey <- c("Base", "Darker", "Darkest", "LightBold", "LightLight")
MonoDarkBlue <- c("#00021B", "#060713", "#00010F", "#3A42B0", "#686DB0")
CompGold <- c("#B48B18", "#1B1608", "#161000", "#B4943B", "#B4A06A")
AnalogicViolet <- c("#230159", "#0B0612", "#06000F", "#6839B0", "#8467B0")
AnalogicTurquoise <- c("#00395B", "#050D11", "#00090E", "#3983AF", "#6794AF")
Actcolors <- c(Meditation = "#6839B0", Productivity = "#357AA2", Distraction = "#FF7549",
RStudio = "#75AADB", Yoga = "#DEF83F", Plyometric = "#E03974")
ColorMatrix <- matrix(data = c(MonoDarkBlue, CompGold, AnalogicViolet, AnalogicTurquoise),
nrow = 4, ncol = 5, byrow = T, dimnames = list(c("MonoDarkBlue", "CompGold",
"AnalogicViolet", "AnalogicTurquoise"), ColorKey))
write_csv(as.data.frame(ColorMatrix), path = "ColorMatrix.csv")Load Data
Meditation <- read.csv("~/Northeastern/Git/ppua5302/Project 1/Data/insight_sessions_export.csv",
header = T, sep = ",")
library(lubridate)
names(Meditation) <- c("Time", "Duration", "Activity", "Preset")
Meditation$Time <- mdy_hms(Meditation$Time)
Meditation$Duration <- as.numeric(as.character(Meditation$Duration))
tagList(tags$h4("Types of Health Activity by Week number for the Year 2017"), tags$div(style = "width:100%",
DT::datatable(HlthLine <- Meditation %>% filter(year(Time) == "2017") %>% mutate(Weeknum = week(Time)) %>%
group_by(Weeknum, Activity) %>% summarise(Sum = sum(Duration)) %>% ungroup %>%
mutate(Hrs = {
Sum/60
}) %>% select(-Sum) %>% spread(key = Activity, value = Hrs) %>% replace_na(list(Meditation = 0,
Walking = 0, Yoga = 0, Breathing = 0)) %>% mutate(Meditation = {
Meditation + Breathing
}) %>% select(-c(Breathing, Healing)), width = "100%")))Filter Data and add to Sheet
GsheetUpdate <- Meditation %>% filter(Time > mdy("07-13-2017"))
names(GsheetUpdate) <- c("Started At", "Duration (minutes)", "Activity", "Preset")
library(googlesheets)
gs_auth()
MedTrack <- gs_url("https://docs.google.com/spreadsheets/d/19Pz41A0dq0DsJIBuFO0VOw0fXAmzxO7UOa7bHKcIgYQ/edit#gid=1807004634")
gs_edit_cells(MedTrack, ws = 2, input = GsheetUpdate, anchor = "A4095")D Fs of Health Activities
Med17 <- Meditation %>% filter(year(Time) == "2017" & Activity == "Meditation") %>%
mutate(Weeknum = week(Time)) %>% select(Weeknum, everything())
Yoga17 <- Meditation %>% filter(year(Time) == "2017" & Activity == "Yoga") %>% mutate(Weeknum = week(Time)) %>%
select(Weeknum, everything())
Plyo17 <- Meditation %>% filter(year(Time) == "2017" & Activity == "Walking") %>%
mutate(Weeknum = week(Time)) %>% select(Weeknum, everything())Import Rescuetime Data
RT <- read_csv(file = "~/Northeastern/Git/ppua5302/Project 1/Data/RescueTime_Report_All_Activities__by_week_2017-01-01.csv",
col_names = T)
RT$Date <- ymd(RT$Date)
class(RT$Productivity)## [1] "integer"
## [1] "Reference & Learning" "Communication & Scheduling"
## [3] "Social Networking" "Business"
## [5] "Utilities" "Design & Composition"
## [7] "Entertainment" "Software Development"
## [9] "Uncategorized" "News & Opinion"
## [11] "Shopping" "Miscellaneous"
RT$Overview <- factor(RT$Overview, levels = unique(RT$Overview), labels = c("Ref & Learn",
"Com & Sch", "Social Net", "Business", "Utilities", "Design", "Entertain", "Software Dev",
"Uncat.", "News & Op", "Shopping", "Misc"))
htmltools::tags$p("Sum of time and percent of total time in each category of productivity for 2017")Sum of time and percent of total time in each category of productivity for 2017
knitr::kable(RTSums <- RT %>% group_by(Productivity) %>% summarise(sum = {
sum(`Time Spent (seconds)`)/3600
}) %>% mutate(P = sum/sum(sum)))| Productivity | sum | P |
|---|---|---|
| -2 | 84.27556 | 0.0311079 |
| -1 | 489.65000 | 0.1807400 |
| 0 | 934.40833 | 0.3449095 |
| 1 | 527.00972 | 0.1945302 |
| 2 | 673.79722 | 0.2487125 |
htmltools::tags$p("Total % of time spent in productive activities during 2017:",
sum(RTSums$P[4:5]))Total % of time spent in productive activities during 2017: 0.443242717273937
Totals
RTOV <- RT %>% group_by(Overview, Category) %>% summarize(`Total Time` = sum(`Time Spent (seconds)`)/3600) %>%
arrange(desc(`Total Time`)) %>% filter(`Total Time` > 1)
TotalHrs <- sum(RTOV$`Total Time`[!is.na(RTOV$`Total Time`)])
tags$p("Total Hours of Digital Activity in 2017: ", TotalHrs)Total Hours of Digital Activity in 2017: 2701.25611111111
TtlProd <- RT %>% filter(Productivity > 0) %>% select(`Time Spent (seconds)`) %>%
colSums()
TtlDist <- RT %>% filter(Productivity < 0) %>% select(`Time Spent (seconds)`) %>%
colSums()
tags$p("Ratio of Productive:Distracting Digital Activity in 2017: ", (TtlProd/3600)/(TtlDist/3600))Ratio of Productive:Distracting Digital Activity in 2017: 2.09226951617806
htmltools::tagList(tags$p("Total Hours of Meditation in 2017:", sum(Med17 %>% group_by(Weeknum) %>%
summarize(TotalHrs = sum(Duration)/60) %>% .$TotalHrs)))Total Hours of Meditation in 2017: 424.233333333333
htmltools::tagList(tags$p("Total Hours of Yoga in 2017:", sum(Yoga17 %>% group_by(Weeknum) %>%
summarize(TotalHrs = sum(Duration)/60) %>% .$TotalHrs)))Total Hours of Yoga in 2017: 289.25
TotalR <- RT %>% mutate(R = str_detect(Activity, "rstudio")) %>% filter(R == T) %>%
select(-R) %>% select(`Time Spent (seconds)`) %>% colSums()
htmltools::tagList(tags$p("Total Hours of R Usage in 2017:", TotalR/3600))Total Hours of R Usage in 2017: 188.762222222222
tagList(tags$h4("Mean, Max, and Total Time in Hrs + Total # of Events per Productivity level for Each week of 2017"),
tags$div(DT::datatable((RT %>% filter(Date > "2017-01-01") %>% group_by(Date,
Productivity) %>% select(-`Number of People`) %>% summarise_if(.predicate = is.numeric,
.funs = c(MeanTime.s = mean, MaxTime.s = max, TotalTime.s = sum, TotalEntries = n_distinct)) %>%
ungroup() %>% mutate_at(.vars = vars(ends_with("s")), .funs = funs(./3600))),
width = "100%")))Graph of Health Activities
ggplot(data = HlthLine, mapping = aes(x = Weeknum)) + geom_line(mapping = aes(y = Meditation,
colour = "Meditation")) + geom_line(mapping = aes(y = Yoga, colour = "Yoga")) +
geom_line(mapping = aes(y = Walking, colour = "Plyometric")) + geom_point(mapping = aes(y = Meditation,
colour = "Meditation"), shape = 24) + geom_point(mapping = aes(y = Yoga, colour = "Yoga"),
shape = 25) + geom_point(mapping = aes(y = Walking, colour = "Plyometric")) +
scale_x_discrete(limits = seq(0, 54, 2)) + scale_y_continuous(minor_breaks = seq(0,
50, 1), breaks = seq(0, 50, 4)) + theme(legend.position = "right", legend.direction = "vertical") +
guides(colour = guide_legend("Activity")) + scale_colour_manual(name = "Lines",
values = Actcolors, breaks = c("Meditation", "Yoga", "Plyometric")) + labs(title = "Health Activities by Week",
subtitle = "Hours spent per Activity by Week Number\nActivities:Meditation,Yoga,Plyometric",
caption = "", x = "Week Number", y = "Hours")Graph 1 Timeseries
MedbyWeek <- Med17 %>% group_by(Weeknum) %>% summarize(Sum = round(sum(Duration)/60,
2))
ProbyWeek <- RT %>% filter(Productivity > 0) %>% mutate(Weeknum = week(Date)) %>%
group_by(Weeknum) %>% summarize(Sum = round(sum(`Time Spent (seconds)`)/3600,
2))
DisbyWeek <- RT %>% filter(Productivity < 0) %>% mutate(Weeknum = week(Date)) %>%
group_by(Weeknum) %>% summarize(Sum = round(sum(`Time Spent (seconds)`)/3600,
2))
RbyWeek <- RT %>% filter(Activity == "rstudio") %>% mutate(Weeknum = week(Date)) %>%
group_by(Weeknum) %>% summarize(Sum = round(sum(`Time Spent (seconds)`)/3600,
2))
# https://stackoverflow.com/questions/7549694/adding-regression-line-equation-and-r2-on-graph
lm_eqn = function(m) {
l <- list(a = format(coef(m)[1], digits = 2), b = format(abs(coef(m)[2]), digits = 2),
r2 = format(summary(m)$r.squared, digits = 3))
if (coef(m)[2] >= 0) {
eq <- substitute(italic(y) == a + b %.% italic(x) * "," ~ ~italic(r)^2 ~
"=" ~ r2, l)
} else {
eq <- substitute(italic(y) == a - b %.% italic(x) * "," ~ ~italic(r)^2 ~
"=" ~ r2, l)
}
as.character(as.expression(eq))
}
RStudioLM <- lm(Sum ~ Weeknum, RbyWeek)
LM <- RbyWeek %>% mutate(LM = predict(RStudioLM, newdata = RbyWeek)) %>% filter(LM >
0)
htmltools::tags$p("The rate at which RStudio Usage increases by week in hrs:", RStudioLM$coefficients["Weeknum"])The rate at which RStudio Usage increases by week in hrs: 0.500887190402477
ggplot(mapping = aes(x = Weeknum)) + geom_line(data = ProbyWeek, mapping = aes(y = Sum,
colour = "Productivity")) + geom_line(data = DisbyWeek, mapping = aes(y = Sum,
colour = "Distraction")) + geom_line(data = RbyWeek, mapping = aes(y = Sum, colour = "RStudio")) +
geom_line(data = LM, mapping = aes(y = LM, colour = "RStudio"), size = 0.5) +
geom_text(aes(x = 45, y = 1, label = lm_eqn(RStudioLM)), colour = "#75AADB",
size = 4, parse = TRUE) + geom_point(data = ProbyWeek, mapping = aes(y = Sum,
colour = "Productivity"), shape = 24, fill = "#357AA2") + geom_point(data = DisbyWeek,
mapping = aes(y = Sum, colour = "Distraction"), shape = 25, fill = "#FF7549") +
geom_point(data = RbyWeek, mapping = aes(y = Sum, colour = "RStudio"), colour = "#75AADB") +
scale_x_discrete(limits = seq(0, 54, 2)) + scale_y_continuous(minor_breaks = seq(0,
50, 1), breaks = seq(0, 50, 4)) + theme(legend.position = "bottom", legend.direction = "horizontal") +
guides(colour = guide_legend("Activity")) + scale_colour_manual(name = "Lines",
values = Actcolors, breaks = c("Productivity", "Distraction", "RStudio")) + labs(title = "Key Activities by Week",
subtitle = "Hours spent per Activity by Week Number\nActivities:Productivity,Distraction,RStudio",
caption = "", x = "Week Number", y = "Hours")# ggsave(last_plot(), filename = '2017TS.pdf', path='Images/Plots',device =
# 'pdf',width = 10, height = 5, units = 'in')Graph 1 Time by Category
RTCats <- RT %>% group_by(Overview, Category, Productivity) %>% summarize(`Total Time` = sum(`Time Spent (seconds)`)/3600) %>%
arrange(desc(Productivity)) %>% filter(`Total Time` > 1)
CatSums <- RTCats %>% group_by(Overview, Productivity) %>% summarize(Sum = sum(`Total Time`)) %>%
arrange(desc(Overview), desc(Productivity)) %>% mutate(ProSum = Sum * Productivity)
CatSums$Productivity <- factor(CatSums$Productivity, levels = c(2, 1, 0, -1, -2))
CatTotals <- RTCats %>% group_by(Overview) %>% summarize(Total = sum(`Total Time`)) %>%
arrange(desc(Total))
fillcolors <- c(`2` = "#357AA2", `1` = "#67A9D1", `0` = "#FEF0ED", `-1` = "#FF9777",
`-2` = "#FF7549")
# fix(CatSums)
ggplot() + geom_bar(data = CatSums, stat = "identity", mapping = aes(x = reorder(Overview,
ProSum), y = Sum, fill = Productivity)) + scale_fill_manual(values = fillcolors) +
geom_text(data = CatTotals, aes(Overview, Total, label = paste(round(Total/TotalHrs,
2) * 100, "%", sep = "")), position = position_dodge(0.9), vjust = 0.5, hjust = 0) +
labs(title = "Total Time per Activity Category", subtitle = "Rescuetime Primary Categories",
caption = "", x = "Category", y = "Total Time (HH:MM:SS)") + coord_flip() +
theme(axis.text.y = element_text(angle = 0, hjust = 1))ggsave(last_plot(), filename = "CategoryTotals.pdf", path = "Images/Plots", device = "pdf",
width = 5.0435, height = 3.5, units = "in")RT Top Activities by Cat
RTCatActs <- RT %>% group_by(Overview, Activity) %>% summarise(sum = {
sum(`Time Spent (seconds)`)/3600
}) %>% ungroup %>% group_by(Overview) %>% top_n(n = 3, wt = sum) %>% ungroup
RTCatActs <- inner_join(RTCatActs, RT %>% select(Activity, Productivity) %>% distinct,
by = "Activity")
RTCatActs$Productivity <- as.factor(RTCatActs$Productivity)
# RTCatActs <- editData::editData(RTCatActs) fix(RTCatActs)
actsums <- sum(RTCatActs$sum)
ggplot(data = RTCatActs, mapping = aes(x = reorder(Activity, sum), y = sum)) + geom_bar(stat = "identity",
mapping = aes(fill = Productivity)) + scale_fill_manual(values = fillcolors) +
facet_wrap(~Overview, scale = "free") + geom_label(mapping = aes(label = paste(round({
sum/actsums
}, 2) * 100, "%", sep = "")), color = "#00010F", label.padding = unit(0.15, "lines"),
size = rel(2)) + # geom_text(mapping=aes(label =
# paste(round({sum/actsums},2)*100,'%',sep='')),size=rel(1.2),color='white')+
theme(axis.text.x = element_text(vjust = 1, hjust = 0.8), axis.text.y = element_text(vjust = 0.1,
hjust = 0.9)) + coord_flip() + labs(title = "Top 3 Activities Per Category",
subtitle = "Total time by Activity with % Total Labels", caption = "Note: scale for each facet is set to free",
x = "Hrs", y = "Activity") + theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5),
axis.title.x = element_text(hjust = 0.3), plot.caption = element_text(hjust = 0))# ggsave(last_plot(), filename = 'Top3.pdf', path='Images/Plots',device =
# 'pdf',width = 10, height = 4, units = 'in')
# ------------------- Sat Feb 10 23:23:28 2018 --------------------# TODO Make
# Productivity Discrete as.factor and add consistent color schema as other graph.Initialize API
library(httr)
RTURL <- parse_url("https://www.rescuetime.com/anapi/data")
RTep <- oauth_endpoint(authorize = NULL, access = "https://www.rescuetime.com/api/oauth/data",
base_url = "https://www.rescuetime.com/anapi/data")
oauth2.0_authorize_url()
RTURL$query$pv <- "interval"
RTURL$query$rb <- "2017-01-01"
RTURL$query$re <- "2017-12-31"
RTURL$query$rs <- "hour"
RTURL$query$rk <- "productivity"
RTURL$query$format <- "csv"
RTHrs <- GET(build_url(RTURL))Read and Prepare Data from Rescuetime API
RTHrs <- read.csv("~/Northeastern/Git/ppua5302/Project 1/Data/RTHourly.csv")
RTHrs <- RTHrs[, c(1, 2, 4)]
colnames(RTHrs) <- c("Date", "Time", "Productivity")
RTHrTotal <- RTHrs %>% mutate(Hour = as.numeric(str_extract(Date, "\\d{2}(?=\\:\\d{2}\\:\\d{2})"))) %>%
group_by(Hour, Productivity) %>% summarize(Total = sum(Time)/3600)
RTHrTotal$Productivity <- factor(RTHrTotal$Productivity, levels = c(2, 1, 0, -1,
-2))
RTHrPD <- RTHrTotal %>% filter(Productivity != "0") %>% mutate(Actual = Total * sign(as.numeric(as.character(Productivity))))
RTHrPD <- plyr::ddply(RTHrPD, .variables = "Total", .fun = transform, percent = Total/sum(RTHrPD$Total) *
100)
RTHrPD <- plyr::ddply(RTHrPD, .variables = "Total", .fun = transform, pos = (cumsum(Actual) +
0.5 * abs(Actual)))Density by Hour of Day Graph
# Stack bars with percentages
# https://stackoverflow.com/questions/22231124/how-to-draw-stacked-bars-in-ggplot2-that-show-percentages-based-on-group
fillcolors <- c(`2` = "#357AA2", `1` = "#67A9D1", `0` = "#FEF0ED", `-1` = "#FF9777",
`-2` = "#FF7549")
ggplot(data = RTHrPD, mapping = aes(x = Hour)) + geom_bar(stat = "identity", mapping = aes(y = Actual,
fill = Productivity)) + scale_fill_manual(values = fillcolors) + geom_text(stat = "identity",
aes(label = round(percent, 0), y = Actual), position = position_stack(vjust = 1)) +
labs(title = "Total Productivity for Year 2017 by Hour of Day", subtitle = "Percentage of Total by Hour in Labels",
caption = "", x = "Hour of Day", y = "Total Time (Hrs)") + scale_x_continuous(breaks = seq(1,
23))# ggsave(last_plot(), filename = 'Hourly.pdf', path='Images/Plots',device =
# 'pdf',width = 5.0435, height = 3.5, units = 'in')Productivity by Day of Week
RTDay <- RTHrs %>% mutate(Day = wday(ymd(str_extract(Date, "\\d{4}\\-\\d{2}\\-\\d{2}")),
label = T)) %>% group_by(Day, Productivity) %>% summarize(Total = sum(Time)/3600) %>%
filter(Productivity != 0) %>% ungroup() %>% mutate(Actual = Total * sign(as.numeric(as.character(Productivity))))
RTDay <- plyr::ddply(RTDay, .variables = "Day", transform, percent = Total/sum(Total) *
100)
RTDay$Productivity <- factor(RTDay$Productivity, levels = c(2, 1, 0, -1, -2))Graph of Productivity by Day of Week
ggplot(data = RTDay, mapping = aes(x = Day)) + geom_bar(stat = "identity", mapping = aes(y = Actual,
fill = Productivity)) + scale_fill_manual(values = fillcolors) + geom_text(stat = "identity",
aes(label = round(percent, 0), y = Actual), position = position_stack(vjust = 1)) +
labs(title = "Total Productivity for Year 2017 by Day of Week", subtitle = "Percentage of Total by Day in Labels",
caption = "", x = "Day of Week", y = "Total Time (Hrs)")# ggsave(last_plot(), filename = 'Daily.pdf', path='Images/Plots',device =
# 'pdf',width = 5.0435, height = 3.5, units = 'in')Linear Correlation Meditation and Productivity
MedPro <- left_join(ProbyWeek, MedbyWeek, by = "Weeknum")
MedPro[, c(2, 3)] <- apply(MedPro[, c(2, 3)], 2, FUN = "scale")
tags$h4("Is there a correlation between Meditation and Productivity in 2017?")##
## Call:
## lm(formula = Sum.x ~ Sum.y, data = MedPro)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4204 -0.4679 -0.0238 0.4647 3.5432
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.212e-16 1.360e-01 0.000 1.000
## Sum.y 1.948e-01 1.373e-01 1.418 0.162
##
## Residual standard error: 0.9904 on 51 degrees of freedom
## Multiple R-squared: 0.03795, Adjusted R-squared: 0.01909
## F-statistic: 2.012 on 1 and 51 DF, p-value: 0.1621
There does not appear to be a correlation
Initialize Parallel
library(parallel)
cl <- makeCluster(detectCores(), type = "PSOCK")
clusterEvalQ(cl, {
library(rvest)
library(stringr)
library(lubridate)
})
clusterExport(cl, "Test")
clusterEvalQ(cl, {
library(parallel)
FBAct <- read_html("~/Northeastern/Git/ppua5302/Project 1/Data/FB Activity Feed/(3) Stephen Synchronicity.html")
Test <- FBAct %>% html_nodes(xpath = "tr._51mx")
})
Dates <- system.time(sapply(Test, function(x) {
x %>% html_nodes(css = "td._51m-.vTop._5ep5 > div > div > span") %>% html_text() %>%
mdy_hm()
}))
stopCluster(cl)Scrape Facebook Activity
FBAct <- read_html("~/Northeastern/Git/ppua5302/Project 1/Data/FB Activity Feed/(3) Stephen Synchronicity.html")
Items <- FBAct %>% html_nodes(xpath = "//tr[contains(@class,'_51mx')]")
Time <- FBAct %>% html_nodes(css = "td._51m-.vTop._5ep5 > div > div > span") %>%
html_text() %>% mdy_hm()
Test <- FBAct %>% html_nodes(css = "tr._51mx")
Dates <- lapply(Items, function(x) {
x %>% html_nodes(css = "td._51m-.vTop._5ep5 > div > div > span") %>% html_text() %>%
mdy_hm()
})
Action <- lapply(Items, function(x) {
x %>% html_nodes(css = "td._51m-.vTop._5ep5 > div > div > div") %>% html_text()
})
Content <- lapply(Items, function(x) {
x %>% html_nodes(css = "td._51m-.vTop._5ep6") %>% html_text()
})
Activity <- data.frame(Dates = map_call(Dates, "c"), Action = {
Action %>% unlist()
}, Content = {
Content %>% unlist()
})
# write_csv(Activity,path='~/Northeastern/Git/ppua5302/Project 1/Data/FB Activity
# Feed/Activity.csv')Clean FB Activity
Activity <- read.csv(file = "~/Northeastern/Git/ppua5302/Project 1/Data/FB Activity Feed/Activity.csv",
stringsAsFactors = F)
Activity <- Activity %>% filter(year("2018-01-01") > year(Dates) & year(Dates) >
year("2016-01-01"))
Activity$Weeknum <- sapply(Activity$Dates, FUN = "week", simplify = T)
Activity$SAction <- sapply(Activity$Action, FUN = function(x) {
str_match(x, "^Stephen\\sSynchronicity\\s(\\w+)") %>% .[, 2]
})
ActTotals <- Activity %>% group_by(SAction) %>% summarize(Sum = n()) %>% arrange(desc(Sum)) %>%
filter(!is.na(SAction))
ActTotals <- ActTotals %>% mutate(P = paste(round({
Sum/sum(Sum) * 100
}, 2), "%", sep = ""))
tagList(tags$p("Types of FB activities, the sum of total occurences, and the percent of total activities that sum represents:"))Types of FB activities, the sum of total occurences, and the percent of total activities that sum represents:
| SAction | Sum | P |
|---|---|---|
| likes | 2046 | 36.98% |
| liked | 897 | 16.21% |
| reacted | 662 | 11.97% |
| commented | 510 | 9.22% |
| saved | 437 | 7.9% |
| shared | 290 | 5.24% |
| replied | 282 | 5.1% |
| added | 106 | 1.92% |
| posted | 65 | 1.17% |
| became | 49 | 0.89% |
| wrote | 49 | 0.89% |
| is | 42 | 0.76% |
| updated | 35 | 0.63% |
| reached | 26 | 0.47% |
| was | 18 | 0.33% |
| followed | 11 | 0.2% |
| recommended | 4 | 0.07% |
| created | 2 | 0.04% |
| poked | 1 | 0.02% |
Graph Activity
ggplot(data = ActTotals, mapping = aes(x = reorder(SAction, order(Sum, decreasing = T)),
y = Sum)) + geom_bar(stat = "identity", fill = "#FF7549") + geom_text(aes(label = Sum),
position = position_dodge(0.9), vjust = 0) + labs(title = "Facebook Activity",
subtitle = "Totals by type of Action", caption = "", x = "Action", y = "Count")