# Load necessary libraries
library(ggplot2)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readxl)  # For reading Excel files
# Load the raw data
data <- read_excel('/cloud/project/1701692325-wifeandman.xlsx')

# View the structure of the data
str(data)
## tibble [191 × 9] (S3: tbl_df/tbl/data.frame)
##  $ Date         : chr [1:191] "she ended it" "12/02/2023" "after sex imam" "11/27/2023" ...
##  $ Time         : chr [1:191] NA "12:24" NA "09:57" ...
##  $ DispBeginAmPm: chr [1:191] NA "PM" NA "PM" ...
##  $ To           : chr [1:191] NA "gerald" NA "Wife" ...
##  $ From         : chr [1:191] NA "Wife" NA "gerald" ...
##  $ Direction    : chr [1:191] NA "Sent" NA "Received" ...
##  $ msgType      : chr [1:191] NA "Text" NA "Text" ...
##  $ Charge       : chr [1:191] NA "0.0" NA "0.0" ...
##  $ TypeOfMsg    : chr [1:191] NA "Domestic" NA "Domestic" ...
# Display the first few rows of the data
head(data)
## # A tibble: 6 × 9
##   Date        Time  DispBeginAmPm To    From  Direction msgType Charge TypeOfMsg
##   <chr>       <chr> <chr>         <chr> <chr> <chr>     <chr>   <chr>  <chr>    
## 1 she ended … <NA>  <NA>          <NA>  <NA>  <NA>      <NA>    <NA>   <NA>     
## 2 12/02/2023  12:24 PM            gera… Wife  Sent      Text    0.0    Domestic 
## 3 after sex … <NA>  <NA>          <NA>  <NA>  <NA>      <NA>    <NA>   <NA>     
## 4 11/27/2023  09:57 PM            Wife  gera… Received  Text    0.0    Domestic 
## 5 11/27/2023  09:56 PM            Wife  gera… Received  Text    0.0    Domestic 
## 6 11/27/2023  09:56 PM            Wife  gera… Received  Text    0.0    Domestic
# Data Cleaning
# Filtering out rows with missing critical information
data_cleaned <- data %>%
  filter(!is.na(Date), !is.na(Time), !is.na(To), !is.na(From), !is.na(Direction), !is.na(msgType))

# Combine Date and Time into a single DateTime column and convert to Date-Time format
data_cleaned$DateTime <- as.POSIXct(paste(data_cleaned$Date, data_cleaned$Time, data_cleaned$DispBeginAmPm),
                                    format="%m/%d/%Y %I:%M %p")
# Identify text and picture messages
data_cleaned$IsText <- ifelse(grepl("Text", data_cleaned$msgType), 1, 0)
data_cleaned$IsPicture <- ifelse(grepl("Picture", data_cleaned$msgType), 1, 0)
# Analysis and Visualization

# 1. Daily Text and Picture Messages
# Aggregating the count of texts and pictures by date
daily_counts <- data_cleaned %>%
  group_by(Date = as.Date(DateTime)) %>%
  summarise(Texts = sum(IsText), Pictures = sum(IsPicture))

# Plotting daily message trends
# This plot reveals the frequency and type of communication over time.
# Peaks in the graph may indicate periods of increased communication or significant events in their relationship.
ggplot(daily_counts, aes(x = Date)) +
  geom_line(aes(y = Texts, colour = "Text Messages")) +
  geom_line(aes(y = Pictures, colour = "Picture Messages")) +
  labs(title = "Daily Text and Picture Messages", x = "Date", y = "Number of Messages") +
  scale_colour_manual("", 
                      breaks = c("Text Messages", "Picture Messages"),
                      values = c("blue", "orange"))

# 2. Direction of Messages
# Counting the direction of messages
direction_counts <- data_cleaned %>%
  group_by(From, To) %>%
  summarise(Count = n())
## `summarise()` has grouped output by 'From'. You can override using the
## `.groups` argument.
# Plotting the direction of messages
# This bar plot shows who initiated conversations more often. A skewed distribution could indicate one party's greater interest or involvement.
ggplot(direction_counts, aes(x = From, y = Count, fill = To)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  labs(title = "Direction of Messages", x = "Sender", y = "Number of Messages") +
  scale_fill_brewer(palette = "Set1")