Theme and font settings
theme_blue <- function(base_size = 11, base_family = "") {
theme_grey(base_size = base_size, base_family = base_family) %+replace%
theme(line = element_line(colour = "white", size = I(1.5), linetype = "solid",
lineend = "round", arrow = NULL, inherit.blank = FALSE),
rect = element_rect(fill = "#00010F", colour = "#00010F", size = 0, linetype = "solid", inherit.blank = FALSE),
text = element_text(family = "Helvetica", face = "plain", colour = "white", size = base_size,hjust = I(.5), vjust = I(.5), angle = 0, lineheight = base_size,color = NULL,margin = margin(I(.2),I(.2),I(.2),I(.2),unit="pt"),debug=F, inherit.blank = FALSE),
title = element_text(family = "AvantGarde", face = "plain", colour = "white", size = I(base_size),hjust = I(.5), vjust = I(1), angle = 0, lineheight = I(base_size),color = NULL,margin = margin(I(1),I(1),I(1),I(1),unit="pt"), inherit.blank = FALSE),
axis.line = element_line(colour = "#3A42B0", size = 1, linetype = "solid",
lineend = "round", inherit.blank = FALSE),
axis.text.x = element_text(size = base_size*0.6, color = "white", lineheight = 0.6,hjust = I(1), vjust = I(1),angle=60),
axis.text.y = element_text(size = base_size*0.6, color = "white", lineheight = 0.9,hjust = I(.5), vjust = I(1),angle=60),
axis.ticks = element_line(colour = "#3A42B0", size = 0.2),
axis.title = element_text(family = "Helvetica", face = "italic", colour = "white", size = I(base_size*1),hjust = I(.5), vjust = I(.5), lineheight = I(base_size*1), inherit.blank = FALSE),
axis.title.x = element_text(size = base_size, color = "white", margin = margin(0, .5, 0, 0)),
axis.title.y = element_text(size = base_size, color = "white", angle = 90, margin = margin(0, .5, 0, 0)),
axis.ticks.length = unit(0.3, "lines"),
# Specify legend options
legend.background = element_rect(color = NA, fill = "#060713"),
legend.key = element_rect(color = "white", fill = "black"),
legend.key.size = unit(1.2, "lines"),
legend.key.height = NULL,
legend.key.width = NULL,
legend.text = element_text(size = base_size*0.8, color = "white"),
legend.title = element_text(size = base_size*0.8, face = "bold", hjust = 0, color = "white"),
legend.position = "right",
legend.text.align = NULL,
legend.title.align = NULL,
legend.direction = "vertical",
legend.box = NULL,
# Specify panel options
panel.background = element_rect(fill = "#060713",color="#060713", inherit.blank = FALSE),
panel.border = element_rect(fill = NA, color = NA),
panel.grid.major = element_line(colour = "#34415B", size = I(.1), linetype = "solid",lineend = "butt", inherit.blank = FALSE),
panel.grid.minor = element_line(colour = "#1C2D52", size = I(.1), linetype = "dotted",lineend = "butt", inherit.blank = FALSE),
panel.spacing = margin(rep(.5,4),unit="pt"),
panel.spacing.x = unit(.5,"pt"),
panel.spacing.y = unit(.5,"pt"),
# Specify facetting options
strip.background = element_rect(fill = "#060713", color = "#060713"),
strip.text = element_text(family = "Helvetica", face = "plain", colour = "white", size = I(base_size*.8),hjust = .5,lineheight = I(base_size*1),margin = margin(rep(1,4), "pt"),inherit.blank = FALSE),
strip.text.x = element_text(color = "white"),
strip.text.y = element_text(color = "white",angle = -90),
# Specify plot options
plot.background = element_rect(fill = "#00010F",colour = "#00010F", linetype = "solid", inherit.blank = FALSE),
plot.title = element_text(family=base_family,size = base_size, color = "white", lineheight = I(1.2),vjust=1),
plot.subtitle = element_text( size = I(base_size*.8),hjust = I(.5),vjust=1, lineheight = I(1.2), inherit.blank = FALSE),
plot.margin = margin(.2,.2,.2,.2,unit="cm"),
strip.switch.pad.grid = unit(0, "cm")
)
}
update_geom_defaults("bar", list(fill = "#0686DB"))
theme_set(theme_blue(base_family = "AvantGarde"))
update_geom_defaults("text",list(colour="white",size=rel(2.5)))
windowsFonts(AvantGarde=windowsFont("TT AvantGarde-Book"))
windowsFonts(Helvetica=windowsFont("TT Helvetica"))
options(scipen=12)
Color Schema
# http://colorschemedesigner.com/csd-3.5/
ColorKey <- c("Base", "Darker", "Darkest", "LightBold", "LightLight")
MonoDarkBlue <- c("#00021B", "#060713", "#00010F", "#3A42B0", "#686DB0")
CompGold <- c("#B48B18", "#1B1608", "#161000", "#B4943B", "#B4A06A")
AnalogicViolet <- c("#230159", "#0B0612", "#06000F", "#6839B0", "#8467B0")
AnalogicTurquoise <- c("#00395B", "#050D11", "#00090E", "#3983AF", "#6794AF")
Actcolors <- c(Meditation = "#6839B0", Productivity = "#357AA2", Distraction = "#FF7549",
RStudio = "#75AADB", Yoga = "#DEF83F", Plyometric = "#E03974")
ColorMatrix <- matrix(data = c(MonoDarkBlue, CompGold, AnalogicViolet, AnalogicTurquoise),
nrow = 4, ncol = 5, byrow = T, dimnames = list(c("MonoDarkBlue", "CompGold",
"AnalogicViolet", "AnalogicTurquoise"), ColorKey))
write_csv(as.data.frame(ColorMatrix), path = "ColorMatrix.csv")
Load Data
Meditation <- read.csv("~/Northeastern/Git/ppua5302/Project 1/Data/insight_sessions_export.csv",
header = T, sep = ",")
library(lubridate)
names(Meditation) <- c("Time", "Duration", "Activity", "Preset")
Meditation$Time <- mdy_hms(Meditation$Time)
Meditation$Duration <- as.numeric(as.character(Meditation$Duration))
tagList(tags$h4("Types of Health Activity by Week number for the Year 2017"), tags$div(style = "width:100%",
DT::datatable(HlthLine <- Meditation %>% filter(year(Time) == "2017") %>% mutate(Weeknum = week(Time)) %>%
group_by(Weeknum, Activity) %>% summarise(Sum = sum(Duration)) %>% ungroup %>%
mutate(Hrs = {
Sum/60
}) %>% select(-Sum) %>% spread(key = Activity, value = Hrs) %>% replace_na(list(Meditation = 0,
Walking = 0, Yoga = 0, Breathing = 0)) %>% mutate(Meditation = {
Meditation + Breathing
}) %>% select(-c(Breathing, Healing)), width = "100%")))
Filter Data and add to Sheet
GsheetUpdate <- Meditation %>% filter(Time > mdy("07-13-2017"))
names(GsheetUpdate) <- c("Started At", "Duration (minutes)", "Activity", "Preset")
library(googlesheets)
gs_auth()
MedTrack <- gs_url("https://docs.google.com/spreadsheets/d/19Pz41A0dq0DsJIBuFO0VOw0fXAmzxO7UOa7bHKcIgYQ/edit#gid=1807004634")
gs_edit_cells(MedTrack, ws = 2, input = GsheetUpdate, anchor = "A4095")
D Fs of Health Activities
Med17 <- Meditation %>% filter(year(Time) == "2017" & Activity == "Meditation") %>%
mutate(Weeknum = week(Time)) %>% select(Weeknum, everything())
Yoga17 <- Meditation %>% filter(year(Time) == "2017" & Activity == "Yoga") %>% mutate(Weeknum = week(Time)) %>%
select(Weeknum, everything())
Plyo17 <- Meditation %>% filter(year(Time) == "2017" & Activity == "Walking") %>%
mutate(Weeknum = week(Time)) %>% select(Weeknum, everything())
Import Rescuetime Data
RT <- read_csv(file = "~/Northeastern/Git/ppua5302/Project 1/Data/RescueTime_Report_All_Activities__by_week_2017-01-01.csv",
col_names = T)
RT$Date <- ymd(RT$Date)
class(RT$Productivity)
## [1] "integer"
## [1] "Reference & Learning" "Communication & Scheduling"
## [3] "Social Networking" "Business"
## [5] "Utilities" "Design & Composition"
## [7] "Entertainment" "Software Development"
## [9] "Uncategorized" "News & Opinion"
## [11] "Shopping" "Miscellaneous"
RT$Overview <- factor(RT$Overview, levels = unique(RT$Overview), labels = c("Ref & Learn",
"Com & Sch", "Social Net", "Business", "Utilities", "Design", "Entertain", "Software Dev",
"Uncat.", "News & Op", "Shopping", "Misc"))
htmltools::tags$p("Sum of time and percent of total time in each category of productivity for 2017")
Sum of time and percent of total time in each category of productivity for 2017
knitr::kable(RTSums <- RT %>% group_by(Productivity) %>% summarise(sum = {
sum(`Time Spent (seconds)`)/3600
}) %>% mutate(P = sum/sum(sum)))
Productivity | sum | P |
---|---|---|
-2 | 84.27556 | 0.0311079 |
-1 | 489.65000 | 0.1807400 |
0 | 934.40833 | 0.3449095 |
1 | 527.00972 | 0.1945302 |
2 | 673.79722 | 0.2487125 |
htmltools::tags$p("Total % of time spent in productive activities during 2017:",
sum(RTSums$P[4:5]))
Total % of time spent in productive activities during 2017: 0.443242717273937
Totals
RTOV <- RT %>% group_by(Overview, Category) %>% summarize(`Total Time` = sum(`Time Spent (seconds)`)/3600) %>%
arrange(desc(`Total Time`)) %>% filter(`Total Time` > 1)
TotalHrs <- sum(RTOV$`Total Time`[!is.na(RTOV$`Total Time`)])
tags$p("Total Hours of Digital Activity in 2017: ", TotalHrs)
Total Hours of Digital Activity in 2017: 2701.25611111111
TtlProd <- RT %>% filter(Productivity > 0) %>% select(`Time Spent (seconds)`) %>%
colSums()
TtlDist <- RT %>% filter(Productivity < 0) %>% select(`Time Spent (seconds)`) %>%
colSums()
tags$p("Ratio of Productive:Distracting Digital Activity in 2017: ", (TtlProd/3600)/(TtlDist/3600))
Ratio of Productive:Distracting Digital Activity in 2017: 2.09226951617806
htmltools::tagList(tags$p("Total Hours of Meditation in 2017:", sum(Med17 %>% group_by(Weeknum) %>%
summarize(TotalHrs = sum(Duration)/60) %>% .$TotalHrs)))
Total Hours of Meditation in 2017: 424.233333333333
htmltools::tagList(tags$p("Total Hours of Yoga in 2017:", sum(Yoga17 %>% group_by(Weeknum) %>%
summarize(TotalHrs = sum(Duration)/60) %>% .$TotalHrs)))
Total Hours of Yoga in 2017: 289.25
TotalR <- RT %>% mutate(R = str_detect(Activity, "rstudio")) %>% filter(R == T) %>%
select(-R) %>% select(`Time Spent (seconds)`) %>% colSums()
htmltools::tagList(tags$p("Total Hours of R Usage in 2017:", TotalR/3600))
Total Hours of R Usage in 2017: 188.762222222222
tagList(tags$h4("Mean, Max, and Total Time in Hrs + Total # of Events per Productivity level for Each week of 2017"),
tags$div(DT::datatable((RT %>% filter(Date > "2017-01-01") %>% group_by(Date,
Productivity) %>% select(-`Number of People`) %>% summarise_if(.predicate = is.numeric,
.funs = c(MeanTime.s = mean, MaxTime.s = max, TotalTime.s = sum, TotalEntries = n_distinct)) %>%
ungroup() %>% mutate_at(.vars = vars(ends_with("s")), .funs = funs(./3600))),
width = "100%")))
Graph of Health Activities
ggplot(data = HlthLine, mapping = aes(x = Weeknum)) + geom_line(mapping = aes(y = Meditation,
colour = "Meditation")) + geom_line(mapping = aes(y = Yoga, colour = "Yoga")) +
geom_line(mapping = aes(y = Walking, colour = "Plyometric")) + geom_point(mapping = aes(y = Meditation,
colour = "Meditation"), shape = 24) + geom_point(mapping = aes(y = Yoga, colour = "Yoga"),
shape = 25) + geom_point(mapping = aes(y = Walking, colour = "Plyometric")) +
scale_x_discrete(limits = seq(0, 54, 2)) + scale_y_continuous(minor_breaks = seq(0,
50, 1), breaks = seq(0, 50, 4)) + theme(legend.position = "right", legend.direction = "vertical") +
guides(colour = guide_legend("Activity")) + scale_colour_manual(name = "Lines",
values = Actcolors, breaks = c("Meditation", "Yoga", "Plyometric")) + labs(title = "Health Activities by Week",
subtitle = "Hours spent per Activity by Week Number\nActivities:Meditation,Yoga,Plyometric",
caption = "", x = "Week Number", y = "Hours")
Graph 1 Timeseries
MedbyWeek <- Med17 %>% group_by(Weeknum) %>% summarize(Sum = round(sum(Duration)/60,
2))
ProbyWeek <- RT %>% filter(Productivity > 0) %>% mutate(Weeknum = week(Date)) %>%
group_by(Weeknum) %>% summarize(Sum = round(sum(`Time Spent (seconds)`)/3600,
2))
DisbyWeek <- RT %>% filter(Productivity < 0) %>% mutate(Weeknum = week(Date)) %>%
group_by(Weeknum) %>% summarize(Sum = round(sum(`Time Spent (seconds)`)/3600,
2))
RbyWeek <- RT %>% filter(Activity == "rstudio") %>% mutate(Weeknum = week(Date)) %>%
group_by(Weeknum) %>% summarize(Sum = round(sum(`Time Spent (seconds)`)/3600,
2))
# https://stackoverflow.com/questions/7549694/adding-regression-line-equation-and-r2-on-graph
lm_eqn = function(m) {
l <- list(a = format(coef(m)[1], digits = 2), b = format(abs(coef(m)[2]), digits = 2),
r2 = format(summary(m)$r.squared, digits = 3))
if (coef(m)[2] >= 0) {
eq <- substitute(italic(y) == a + b %.% italic(x) * "," ~ ~italic(r)^2 ~
"=" ~ r2, l)
} else {
eq <- substitute(italic(y) == a - b %.% italic(x) * "," ~ ~italic(r)^2 ~
"=" ~ r2, l)
}
as.character(as.expression(eq))
}
RStudioLM <- lm(Sum ~ Weeknum, RbyWeek)
LM <- RbyWeek %>% mutate(LM = predict(RStudioLM, newdata = RbyWeek)) %>% filter(LM >
0)
htmltools::tags$p("The rate at which RStudio Usage increases by week in hrs:", RStudioLM$coefficients["Weeknum"])
The rate at which RStudio Usage increases by week in hrs: 0.500887190402477
ggplot(mapping = aes(x = Weeknum)) + geom_line(data = ProbyWeek, mapping = aes(y = Sum,
colour = "Productivity")) + geom_line(data = DisbyWeek, mapping = aes(y = Sum,
colour = "Distraction")) + geom_line(data = RbyWeek, mapping = aes(y = Sum, colour = "RStudio")) +
geom_line(data = LM, mapping = aes(y = LM, colour = "RStudio"), size = 0.5) +
geom_text(aes(x = 45, y = 1, label = lm_eqn(RStudioLM)), colour = "#75AADB",
size = 4, parse = TRUE) + geom_point(data = ProbyWeek, mapping = aes(y = Sum,
colour = "Productivity"), shape = 24, fill = "#357AA2") + geom_point(data = DisbyWeek,
mapping = aes(y = Sum, colour = "Distraction"), shape = 25, fill = "#FF7549") +
geom_point(data = RbyWeek, mapping = aes(y = Sum, colour = "RStudio"), colour = "#75AADB") +
scale_x_discrete(limits = seq(0, 54, 2)) + scale_y_continuous(minor_breaks = seq(0,
50, 1), breaks = seq(0, 50, 4)) + theme(legend.position = "bottom", legend.direction = "horizontal") +
guides(colour = guide_legend("Activity")) + scale_colour_manual(name = "Lines",
values = Actcolors, breaks = c("Productivity", "Distraction", "RStudio")) + labs(title = "Key Activities by Week",
subtitle = "Hours spent per Activity by Week Number\nActivities:Productivity,Distraction,RStudio",
caption = "", x = "Week Number", y = "Hours")
# ggsave(last_plot(), filename = '2017TS.pdf', path='Images/Plots',device =
# 'pdf',width = 10, height = 5, units = 'in')
Graph 1 Time by Category
RTCats <- RT %>% group_by(Overview, Category, Productivity) %>% summarize(`Total Time` = sum(`Time Spent (seconds)`)/3600) %>%
arrange(desc(Productivity)) %>% filter(`Total Time` > 1)
CatSums <- RTCats %>% group_by(Overview, Productivity) %>% summarize(Sum = sum(`Total Time`)) %>%
arrange(desc(Overview), desc(Productivity)) %>% mutate(ProSum = Sum * Productivity)
CatSums$Productivity <- factor(CatSums$Productivity, levels = c(2, 1, 0, -1, -2))
CatTotals <- RTCats %>% group_by(Overview) %>% summarize(Total = sum(`Total Time`)) %>%
arrange(desc(Total))
fillcolors <- c(`2` = "#357AA2", `1` = "#67A9D1", `0` = "#FEF0ED", `-1` = "#FF9777",
`-2` = "#FF7549")
# fix(CatSums)
ggplot() + geom_bar(data = CatSums, stat = "identity", mapping = aes(x = reorder(Overview,
ProSum), y = Sum, fill = Productivity)) + scale_fill_manual(values = fillcolors) +
geom_text(data = CatTotals, aes(Overview, Total, label = paste(round(Total/TotalHrs,
2) * 100, "%", sep = "")), position = position_dodge(0.9), vjust = 0.5, hjust = 0) +
labs(title = "Total Time per Activity Category", subtitle = "Rescuetime Primary Categories",
caption = "", x = "Category", y = "Total Time (HH:MM:SS)") + coord_flip() +
theme(axis.text.y = element_text(angle = 0, hjust = 1))
ggsave(last_plot(), filename = "CategoryTotals.pdf", path = "Images/Plots", device = "pdf",
width = 5.0435, height = 3.5, units = "in")
RT Top Activities by Cat
RTCatActs <- RT %>% group_by(Overview, Activity) %>% summarise(sum = {
sum(`Time Spent (seconds)`)/3600
}) %>% ungroup %>% group_by(Overview) %>% top_n(n = 3, wt = sum) %>% ungroup
RTCatActs <- inner_join(RTCatActs, RT %>% select(Activity, Productivity) %>% distinct,
by = "Activity")
RTCatActs$Productivity <- as.factor(RTCatActs$Productivity)
# RTCatActs <- editData::editData(RTCatActs) fix(RTCatActs)
actsums <- sum(RTCatActs$sum)
ggplot(data = RTCatActs, mapping = aes(x = reorder(Activity, sum), y = sum)) + geom_bar(stat = "identity",
mapping = aes(fill = Productivity)) + scale_fill_manual(values = fillcolors) +
facet_wrap(~Overview, scale = "free") + geom_label(mapping = aes(label = paste(round({
sum/actsums
}, 2) * 100, "%", sep = "")), color = "#00010F", label.padding = unit(0.15, "lines"),
size = rel(2)) + # geom_text(mapping=aes(label =
# paste(round({sum/actsums},2)*100,'%',sep='')),size=rel(1.2),color='white')+
theme(axis.text.x = element_text(vjust = 1, hjust = 0.8), axis.text.y = element_text(vjust = 0.1,
hjust = 0.9)) + coord_flip() + labs(title = "Top 3 Activities Per Category",
subtitle = "Total time by Activity with % Total Labels", caption = "Note: scale for each facet is set to free",
x = "Hrs", y = "Activity") + theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5),
axis.title.x = element_text(hjust = 0.3), plot.caption = element_text(hjust = 0))
# ggsave(last_plot(), filename = 'Top3.pdf', path='Images/Plots',device =
# 'pdf',width = 10, height = 4, units = 'in')
# ------------------- Sat Feb 10 23:23:28 2018 --------------------# TODO Make
# Productivity Discrete as.factor and add consistent color schema as other graph.
Initialize API
library(httr)
RTURL <- parse_url("https://www.rescuetime.com/anapi/data")
RTep <- oauth_endpoint(authorize = NULL, access = "https://www.rescuetime.com/api/oauth/data",
base_url = "https://www.rescuetime.com/anapi/data")
oauth2.0_authorize_url()
RTURL$query$pv <- "interval"
RTURL$query$rb <- "2017-01-01"
RTURL$query$re <- "2017-12-31"
RTURL$query$rs <- "hour"
RTURL$query$rk <- "productivity"
RTURL$query$format <- "csv"
RTHrs <- GET(build_url(RTURL))
Read and Prepare Data from Rescuetime API
RTHrs <- read.csv("~/Northeastern/Git/ppua5302/Project 1/Data/RTHourly.csv")
RTHrs <- RTHrs[, c(1, 2, 4)]
colnames(RTHrs) <- c("Date", "Time", "Productivity")
RTHrTotal <- RTHrs %>% mutate(Hour = as.numeric(str_extract(Date, "\\d{2}(?=\\:\\d{2}\\:\\d{2})"))) %>%
group_by(Hour, Productivity) %>% summarize(Total = sum(Time)/3600)
RTHrTotal$Productivity <- factor(RTHrTotal$Productivity, levels = c(2, 1, 0, -1,
-2))
RTHrPD <- RTHrTotal %>% filter(Productivity != "0") %>% mutate(Actual = Total * sign(as.numeric(as.character(Productivity))))
RTHrPD <- plyr::ddply(RTHrPD, .variables = "Total", .fun = transform, percent = Total/sum(RTHrPD$Total) *
100)
RTHrPD <- plyr::ddply(RTHrPD, .variables = "Total", .fun = transform, pos = (cumsum(Actual) +
0.5 * abs(Actual)))
Density by Hour of Day Graph
# Stack bars with percentages
# https://stackoverflow.com/questions/22231124/how-to-draw-stacked-bars-in-ggplot2-that-show-percentages-based-on-group
fillcolors <- c(`2` = "#357AA2", `1` = "#67A9D1", `0` = "#FEF0ED", `-1` = "#FF9777",
`-2` = "#FF7549")
ggplot(data = RTHrPD, mapping = aes(x = Hour)) + geom_bar(stat = "identity", mapping = aes(y = Actual,
fill = Productivity)) + scale_fill_manual(values = fillcolors) + geom_text(stat = "identity",
aes(label = round(percent, 0), y = Actual), position = position_stack(vjust = 1)) +
labs(title = "Total Productivity for Year 2017 by Hour of Day", subtitle = "Percentage of Total by Hour in Labels",
caption = "", x = "Hour of Day", y = "Total Time (Hrs)") + scale_x_continuous(breaks = seq(1,
23))
# ggsave(last_plot(), filename = 'Hourly.pdf', path='Images/Plots',device =
# 'pdf',width = 5.0435, height = 3.5, units = 'in')
Productivity by Day of Week
RTDay <- RTHrs %>% mutate(Day = wday(ymd(str_extract(Date, "\\d{4}\\-\\d{2}\\-\\d{2}")),
label = T)) %>% group_by(Day, Productivity) %>% summarize(Total = sum(Time)/3600) %>%
filter(Productivity != 0) %>% ungroup() %>% mutate(Actual = Total * sign(as.numeric(as.character(Productivity))))
RTDay <- plyr::ddply(RTDay, .variables = "Day", transform, percent = Total/sum(Total) *
100)
RTDay$Productivity <- factor(RTDay$Productivity, levels = c(2, 1, 0, -1, -2))
Graph of Productivity by Day of Week
ggplot(data = RTDay, mapping = aes(x = Day)) + geom_bar(stat = "identity", mapping = aes(y = Actual,
fill = Productivity)) + scale_fill_manual(values = fillcolors) + geom_text(stat = "identity",
aes(label = round(percent, 0), y = Actual), position = position_stack(vjust = 1)) +
labs(title = "Total Productivity for Year 2017 by Day of Week", subtitle = "Percentage of Total by Day in Labels",
caption = "", x = "Day of Week", y = "Total Time (Hrs)")
# ggsave(last_plot(), filename = 'Daily.pdf', path='Images/Plots',device =
# 'pdf',width = 5.0435, height = 3.5, units = 'in')
Linear Correlation Meditation and Productivity
MedPro <- left_join(ProbyWeek, MedbyWeek, by = "Weeknum")
MedPro[, c(2, 3)] <- apply(MedPro[, c(2, 3)], 2, FUN = "scale")
tags$h4("Is there a correlation between Meditation and Productivity in 2017?")
##
## Call:
## lm(formula = Sum.x ~ Sum.y, data = MedPro)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4204 -0.4679 -0.0238 0.4647 3.5432
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.212e-16 1.360e-01 0.000 1.000
## Sum.y 1.948e-01 1.373e-01 1.418 0.162
##
## Residual standard error: 0.9904 on 51 degrees of freedom
## Multiple R-squared: 0.03795, Adjusted R-squared: 0.01909
## F-statistic: 2.012 on 1 and 51 DF, p-value: 0.1621
There does not appear to be a correlation
Initialize Parallel
library(parallel)
cl <- makeCluster(detectCores(), type = "PSOCK")
clusterEvalQ(cl, {
library(rvest)
library(stringr)
library(lubridate)
})
clusterExport(cl, "Test")
clusterEvalQ(cl, {
library(parallel)
FBAct <- read_html("~/Northeastern/Git/ppua5302/Project 1/Data/FB Activity Feed/(3) Stephen Synchronicity.html")
Test <- FBAct %>% html_nodes(xpath = "tr._51mx")
})
Dates <- system.time(sapply(Test, function(x) {
x %>% html_nodes(css = "td._51m-.vTop._5ep5 > div > div > span") %>% html_text() %>%
mdy_hm()
}))
stopCluster(cl)
Scrape Facebook Activity
FBAct <- read_html("~/Northeastern/Git/ppua5302/Project 1/Data/FB Activity Feed/(3) Stephen Synchronicity.html")
Items <- FBAct %>% html_nodes(xpath = "//tr[contains(@class,'_51mx')]")
Time <- FBAct %>% html_nodes(css = "td._51m-.vTop._5ep5 > div > div > span") %>%
html_text() %>% mdy_hm()
Test <- FBAct %>% html_nodes(css = "tr._51mx")
Dates <- lapply(Items, function(x) {
x %>% html_nodes(css = "td._51m-.vTop._5ep5 > div > div > span") %>% html_text() %>%
mdy_hm()
})
Action <- lapply(Items, function(x) {
x %>% html_nodes(css = "td._51m-.vTop._5ep5 > div > div > div") %>% html_text()
})
Content <- lapply(Items, function(x) {
x %>% html_nodes(css = "td._51m-.vTop._5ep6") %>% html_text()
})
Activity <- data.frame(Dates = map_call(Dates, "c"), Action = {
Action %>% unlist()
}, Content = {
Content %>% unlist()
})
# write_csv(Activity,path='~/Northeastern/Git/ppua5302/Project 1/Data/FB Activity
# Feed/Activity.csv')
Clean FB Activity
Activity <- read.csv(file = "~/Northeastern/Git/ppua5302/Project 1/Data/FB Activity Feed/Activity.csv",
stringsAsFactors = F)
Activity <- Activity %>% filter(year("2018-01-01") > year(Dates) & year(Dates) >
year("2016-01-01"))
Activity$Weeknum <- sapply(Activity$Dates, FUN = "week", simplify = T)
Activity$SAction <- sapply(Activity$Action, FUN = function(x) {
str_match(x, "^Stephen\\sSynchronicity\\s(\\w+)") %>% .[, 2]
})
ActTotals <- Activity %>% group_by(SAction) %>% summarize(Sum = n()) %>% arrange(desc(Sum)) %>%
filter(!is.na(SAction))
ActTotals <- ActTotals %>% mutate(P = paste(round({
Sum/sum(Sum) * 100
}, 2), "%", sep = ""))
tagList(tags$p("Types of FB activities, the sum of total occurences, and the percent of total activities that sum represents:"))
Types of FB activities, the sum of total occurences, and the percent of total activities that sum represents:
SAction | Sum | P |
---|---|---|
likes | 2046 | 36.98% |
liked | 897 | 16.21% |
reacted | 662 | 11.97% |
commented | 510 | 9.22% |
saved | 437 | 7.9% |
shared | 290 | 5.24% |
replied | 282 | 5.1% |
added | 106 | 1.92% |
posted | 65 | 1.17% |
became | 49 | 0.89% |
wrote | 49 | 0.89% |
is | 42 | 0.76% |
updated | 35 | 0.63% |
reached | 26 | 0.47% |
was | 18 | 0.33% |
followed | 11 | 0.2% |
recommended | 4 | 0.07% |
created | 2 | 0.04% |
poked | 1 | 0.02% |
Graph Activity
ggplot(data = ActTotals, mapping = aes(x = reorder(SAction, order(Sum, decreasing = T)),
y = Sum)) + geom_bar(stat = "identity", fill = "#FF7549") + geom_text(aes(label = Sum),
position = position_dodge(0.9), vjust = 0) + labs(title = "Facebook Activity",
subtitle = "Totals by type of Action", caption = "", x = "Action", y = "Count")