# Load necessary libraries
library(ggplot2)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl) # For reading Excel files
# Load the raw data
data <- read_excel('/cloud/project/1701692325-wifeandman.xlsx')
# View the structure of the data
str(data)
## tibble [191 × 9] (S3: tbl_df/tbl/data.frame)
## $ Date : chr [1:191] "she ended it" "12/02/2023" "after sex imam" "11/27/2023" ...
## $ Time : chr [1:191] NA "12:24" NA "09:57" ...
## $ DispBeginAmPm: chr [1:191] NA "PM" NA "PM" ...
## $ To : chr [1:191] NA "gerald" NA "Wife" ...
## $ From : chr [1:191] NA "Wife" NA "gerald" ...
## $ Direction : chr [1:191] NA "Sent" NA "Received" ...
## $ msgType : chr [1:191] NA "Text" NA "Text" ...
## $ Charge : chr [1:191] NA "0.0" NA "0.0" ...
## $ TypeOfMsg : chr [1:191] NA "Domestic" NA "Domestic" ...
# Display the first few rows of the data
head(data)
## # A tibble: 6 × 9
## Date Time DispBeginAmPm To From Direction msgType Charge TypeOfMsg
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 she ended … <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 2 12/02/2023 12:24 PM gera… Wife Sent Text 0.0 Domestic
## 3 after sex … <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 4 11/27/2023 09:57 PM Wife gera… Received Text 0.0 Domestic
## 5 11/27/2023 09:56 PM Wife gera… Received Text 0.0 Domestic
## 6 11/27/2023 09:56 PM Wife gera… Received Text 0.0 Domestic
# Data Cleaning
# Filtering out rows with missing critical information
data_cleaned <- data %>%
filter(!is.na(Date), !is.na(Time), !is.na(To), !is.na(From), !is.na(Direction), !is.na(msgType))
# Combine Date and Time into a single DateTime column and convert to Date-Time format
data_cleaned$DateTime <- as.POSIXct(paste(data_cleaned$Date, data_cleaned$Time, data_cleaned$DispBeginAmPm),
format="%m/%d/%Y %I:%M %p")
# Identify text and picture messages
data_cleaned$IsText <- ifelse(grepl("Text", data_cleaned$msgType), 1, 0)
data_cleaned$IsPicture <- ifelse(grepl("Picture", data_cleaned$msgType), 1, 0)
# Analysis and Visualization
# 1. Daily Text and Picture Messages
# Aggregating the count of texts and pictures by date
daily_counts <- data_cleaned %>%
group_by(Date = as.Date(DateTime)) %>%
summarise(Texts = sum(IsText), Pictures = sum(IsPicture))
# Plotting daily message trends
# This plot reveals the frequency and type of communication over time.
# Peaks in the graph may indicate periods of increased communication or significant events in their relationship.
ggplot(daily_counts, aes(x = Date)) +
geom_line(aes(y = Texts, colour = "Text Messages")) +
geom_line(aes(y = Pictures, colour = "Picture Messages")) +
labs(title = "Daily Text and Picture Messages", x = "Date", y = "Number of Messages") +
scale_colour_manual("",
breaks = c("Text Messages", "Picture Messages"),
values = c("blue", "orange"))

# 2. Direction of Messages
# Counting the direction of messages
direction_counts <- data_cleaned %>%
group_by(From, To) %>%
summarise(Count = n())
## `summarise()` has grouped output by 'From'. You can override using the
## `.groups` argument.
# Plotting the direction of messages
# This bar plot shows who initiated conversations more often. A skewed distribution could indicate one party's greater interest or involvement.
ggplot(direction_counts, aes(x = From, y = Count, fill = To)) +
geom_bar(stat = "identity", position = position_dodge()) +
labs(title = "Direction of Messages", x = "Sender", y = "Number of Messages") +
scale_fill_brewer(palette = "Set1")
