Domain 1: Time Series Analysis

1. Popular Time Series Analysis Packages in R

Here are some of the most popular packages in R for time series analysis:

# Install packages if not already installed
install.packages(c("forecast", "tseries", "zoo", "xts", "tidyverse", 
                   "lubridate", "prophet", "seasonal", "fable", 
                   "stats", "TSstudio", "timetk"))

# Load required packages
library(forecast)    # For time series forecasting methods
library(tseries)     # For time series analysis functions
library(zoo)         # For handling irregular time series
library(xts)         # For extensible time series
library(tidyverse)   # For data manipulation and visualization
library(lubridate)   # For date handling
library(prophet)     # For forecasting with additive models
library(seasonal)    # For seasonal adjustment
library(stats)       # For basic time series functions
library(TSstudio)    # For time series visualization
library(timetk)      # For time series toolkit

2. Functions for Construction and Plotting Time Series

Let’s demonstrate functions for constructing and plotting time series using the AirPassengers dataset, which contains monthly airline passenger numbers from 1949 to 1960.

# Load the dataset
data("AirPassengers")

# Basic information about the time series
class(AirPassengers)

## [1] "ts"

start(AirPassengers)

## [1] 1949    1

end(AirPassengers)

## [1] 1960   12

frequency(AirPassengers)

## [1] 12

summary(AirPassengers)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   104.0   180.0   265.5   280.3   360.5   622.0

# Create a time series object from scratch
set.seed(123)
my_ts <- ts(rnorm(60), start = c(2020, 1), frequency = 12)
head(my_ts)

##              Jan         Feb         Mar         Apr         May         Jun
## 2020 -0.56047565 -0.23017749  1.55870831  0.07050839  0.12928774  1.71506499

# Using zoo/xts for irregular time series
dates <- seq(as.Date("2020-01-01"), by = "month", length.out = 24)
values <- rnorm(24, mean = 100, sd = 15)
z_series <- zoo(values, order.by = dates)
x_series <- as.xts(z_series)

head(z_series)

## 2020-01-01 2020-02-01 2020-03-01 2020-04-01 2020-05-01 2020-06-01 
##  105.69459   92.46515   95.00189   84.72137   83.92313  104.55293

# Base R plotting
plot(AirPassengers, main = "Monthly Airline Passengers (1949-1960)",
     xlab = "Year", ylab = "Passengers (thousands)")

# Using ggplot2 for time series visualization
autoplot(AirPassengers) + 
  ggtitle("Monthly Airline Passengers (1949-1960)") +
  xlab("Year") + 
  ylab("Passengers (thousands)") +
  theme_minimal()

# Seasonal plot
ggseasonplot(AirPassengers, year.labels = TRUE, year.labels.left = TRUE) +
  ggtitle("Seasonal Plot: Monthly Airline Passengers") +
  theme_minimal()

# Time series decomposition plot
AirPassengers %>%
  decompose() %>%
  autoplot() +
  ggtitle("Decomposition of Airline Passengers Time Series")

3. Functions for Decomposing Time Series

Time series decomposition separates a time series into its components: trend, seasonal, cyclical, and irregular.

# Classical decomposition using stats package
ap_decomp <- decompose(AirPassengers)
plot(ap_decomp)

# STL decomposition (Seasonal and Trend decomposition using Loess)
ap_stl <- stl(AirPassengers, s.window = "periodic")
plot(ap_stl)

# Seasonal adjustment using seasonal package
ap_seas <- seas(AirPassengers)
plot(ap_seas)

summary(ap_seas)

## 
## Call:
## seas(x = AirPassengers)
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## Weekday           -0.0029497  0.0005232  -5.638 1.72e-08 ***
## Easter[1]          0.0177674  0.0071580   2.482   0.0131 *  
## AO1951.May         0.1001558  0.0204387   4.900 9.57e-07 ***
## MA-Nonseasonal-01  0.1156204  0.0858588   1.347   0.1781    
## MA-Seasonal-12     0.4973600  0.0774677   6.420 1.36e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## SEATS adj.  ARIMA: (0 1 1)(0 1 1)  Obs.: 144  Transform: log
## AICc: 947.3, BIC: 963.9  QS (no seasonality in final):    0  
## Box-Ljung (no autocorr.): 26.65   Shapiro (normality): 0.9908

# X-11 decomposition
ap_x11 <- seasonal::seas(AirPassengers, x11 = "")
plot(ap_x11)

# Using forecast package for decomposition
ap_tbats <- tbats(AirPassengers)
components <- tbats.components(ap_tbats)
plot(components)

4. Base Functions for Forecasting Time Series Models

Let’s explore different forecasting models for the AirPassengers dataset.

# Split data into training and test sets
train <- window(AirPassengers, end = c(1959, 12))
test <- window(AirPassengers, start = c(1960, 1))

# 1. ARIMA forecasting
arima_model <- auto.arima(train)
summary(arima_model)

## Series: train 
## ARIMA(1,1,0)(0,1,0)[12] 
## 
## Coefficients:
##           ar1
##       -0.2431
## s.e.   0.0894
## 
## sigma^2 = 109.8:  log likelihood = -447.95
## AIC=899.9   AICc=900.01   BIC=905.46
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE     MAPE      MASE       ACF1
## Training set 0.579486 9.907267 7.483159 0.1187348 2.880429 0.2457523 0.01227544

arima_forecast <- forecast(arima_model, h = 12)
plot(arima_forecast)

accuracy(arima_forecast, test)

##                      ME      RMSE       MAE        MPE     MAPE      MASE
## Training set   0.579486  9.907267  7.483159  0.1187348 2.880429 0.2457523
## Test set     -16.986385 23.931703 18.527682 -3.9334909 4.182395 0.6084625
##                    ACF1 Theil's U
## Training set 0.01227544        NA
## Test set     0.04802038 0.5336134

# 2. Exponential Smoothing (ETS)
ets_model <- ets(train)
summary(ets_model)

## ETS(M,Ad,M) 
## 
## Call:
## ets(y = train)
## 
##   Smoothing parameters:
##     alpha = 0.758 
##     beta  = 0.0213 
##     gamma = 1e-04 
##     phi   = 0.98 
## 
##   Initial states:
##     l = 120.7483 
##     b = 1.7632 
##     s = 0.897 0.798 0.919 1.0587 1.2156 1.2251
##            1.1075 0.9782 0.9804 1.0207 0.8926 0.9073
## 
##   sigma:  0.0378
## 
##      AIC     AICc      BIC 
## 1244.458 1250.511 1296.348 
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE     MAPE      MASE      ACF1
## Training set 1.511141 9.353686 6.980909 0.4422335 2.761156 0.2292581 0.0457192

ets_forecast <- forecast(ets_model, h = 12)
plot(ets_forecast)

accuracy(ets_forecast, test)

##                     ME      RMSE       MAE       MPE     MAPE      MASE
## Training set  1.511141  9.353686  6.980909 0.4422335 2.761156 0.2292581
## Test set     12.084843 27.398040 22.804502 2.0517668 4.655647 0.7489163
##                   ACF1 Theil's U
## Training set 0.0457192        NA
## Test set     0.4844065 0.5419859

# 3. Prophet model
# Convert to dataframe for prophet
ap_df <- data.frame(
  ds = seq(as.Date("1949-01-01"), by = "month", length.out = length(AirPassengers)),
  y = as.numeric(AirPassengers)
)

# Split data
train_df <- ap_df[1:132, ]
test_df <- ap_df[133:144, ]

# Create and fit model
prophet_model <- prophet(train_df)
future <- make_future_dataframe(prophet_model, periods = 12, freq = "month")
prophet_forecast <- predict(prophet_model, future)

# Plot forecast
plot(prophet_model, prophet_forecast) +
  ggtitle("Prophet Forecast for Airline Passengers")

# Component plot
prophet_plot_components(prophet_model, prophet_forecast)

5. Identifying Correlation and Variance

# Autocorrelation function (ACF)
acf(AirPassengers, main = "Autocorrelation of Airline Passengers")

# Partial autocorrelation function (PACF)
pacf(AirPassengers, main = "Partial Autocorrelation of Airline Passengers")

# Cross-correlation between original series and differenced series
diff_ap <- diff(AirPassengers)
ccf(AirPassengers, diff_ap, main = "Cross-correlation between Original and Differenced Series")

# Variance analysis
var(AirPassengers)

## [1] 14391.92

sd(AirPassengers)

## [1] 119.9663

# Box-Cox transformation to stabilize variance
lambda <- BoxCox.lambda(AirPassengers)
ap_boxcox <- BoxCox(AirPassengers, lambda)
par(mfrow = c(2, 1))
plot(AirPassengers, main = "Original Series")
plot(ap_boxcox, main = paste("Box-Cox Transformed Series (lambda =", round(lambda, 3), ")"))

par(mfrow = c(1, 1))

# Ljung-Box test for autocorrelation
Box.test(AirPassengers, lag = 24, type = "Ljung-Box")

## 
##  Box-Ljung test
## 
## data:  AirPassengers
## X-squared = 1606.1, df = 24, p-value < 2.2e-16

Domain 2: Social Network Analysis and Mining

1. Popular Social Network Analysis Packages in R

Here are some of the most popular packages in R for social network analysis:

# Install packages if not already installed
install.packages(c("igraph", "network", "sna", "networkD3", "statnet", 
                   "ergm", "visNetwork", "graphlayouts", "tidygraph", 
                   "ggraph", "linkcomm"))

# Load required packages
library(igraph)      # Primary SNA package
library(network)     # Network object class
library(sna)         # Social network analysis
library(networkD3)   # Interactive network visualizations
library(tidygraph)   # Tidy framework for graphs
library(ggraph)      # Grammar of graphics for graphs
library(linkcomm)    # Link communities

2. Functions for Network Graph Construction and Plot

For this section, we’ll use the Zachary’s Karate Club dataset, a well-known social network showing friendships between 34 members of a karate club.

# Load Zachary's Karate Club dataset
data(karate, package = "igraphdata")

# Basic information about the network
class(karate)

## [1] "igraph"

vcount(karate) # Number of vertices

## [1] 34

ecount(karate) # Number of edges

## [1] 78

is_directed(karate) # Is the network directed?

## [1] FALSE

# Create a network from scratch
# Define edges
edges <- matrix(c(1,2, 1,3, 2,3, 3,4, 4,5, 4,6, 5,6), ncol = 2, byrow = TRUE)
g <- graph_from_edgelist(edges, directed = FALSE)

# Add vertex attributes
V(g)$name <- letters[1:6]
V(g)$size <- c(10, 5, 8, 12, 7, 9)

# Add edge attributes
E(g)$weight <- c(1, 2.5, 1, 1.5, 2, 1, 0.5)

# Print summary
summary(g)

## IGRAPH eb4e57a UNW- 6 7 -- 
## + attr: name (v/c), size (v/n), weight (e/n)

# Basic plot
plot(karate, main = "Zachary's Karate Club Network")

# Using ggraph for network visualization
ggraph(karate, layout = "fr") +
  geom_edge_link(alpha = 0.2) +
  geom_node_point(aes(size = igraph::degree(karate), color = as.factor(membership(cluster_louvain(karate))))) +
  geom_node_text(aes(label = name), repel = TRUE) +
  theme_graph() +
  labs(title = "Zachary's Karate Club Network",
       subtitle = "Node size by degree, color by community")

# Interactive network visualization with networkD3
karate_d3 <- igraph_to_networkD3(karate, group = membership(cluster_louvain(karate)))
forceNetwork(Links = karate_d3$links, Nodes = karate_d3$nodes,
             Source = "source", Target = "target",
             NodeID = "name", Group = "group",
             opacity = 0.8, fontSize = 12,
             zoom = TRUE, opacityNoHover = 0.1,
             linkDistance = 100, charge = -100)

3. Functions for Neighbor Nodes

# Get all neighbors of a node
neighbors(karate, 1)

## + 16/34 vertices, named, from 4b458a1:
##  [1] Actor 2  Actor 3  Actor 4  Actor 5  Actor 6  Actor 7  Actor 8  Actor 9 
##  [9] Actor 11 Actor 12 Actor 13 Actor 14 Actor 18 Actor 20 Actor 22 Actor 32

# Get neighbor node IDs
neighbor_ids <- neighbors(karate, 1)
print(neighbor_ids)

## + 16/34 vertices, named, from 4b458a1:
##  [1] Actor 2  Actor 3  Actor 4  Actor 5  Actor 6  Actor 7  Actor 8  Actor 9 
##  [9] Actor 11 Actor 12 Actor 13 Actor 14 Actor 18 Actor 20 Actor 22 Actor 32

# Get neighbor names
neighbor_names <- V(karate)[neighbors(karate, 1)]$name
print(neighbor_names)

##  [1] "Actor 2"  "Actor 3"  "Actor 4"  "Actor 5"  "Actor 6"  "Actor 7" 
##  [7] "Actor 8"  "Actor 9"  "Actor 11" "Actor 12" "Actor 13" "Actor 14"
## [13] "Actor 18" "Actor 20" "Actor 22" "Actor 32"

# First order neighborhood (direct neighbors)
ego_1 <- ego(karate, order = 1, nodes = 1)
plot(ego_1[[1]], main = "First-order neighborhood of node 1")

# Second order neighborhood (neighbors of neighbors)
ego_2 <- ego(karate, order = 2, nodes = 1)
plot(ego_2[[1]], main = "Second-order neighborhood of node 1")

# Neighborhood size
neighborhood.size(karate, 1)

##  [1] 17 10 11  7  4  5  5  5  6  3  4  2  3  6  3  3  3  3  3  4  3  3  3  6  4
## [26]  4  3  5  4  5  5  7 13 18

# Neighborhood density
neighbor_nodes <- c(1, neighbors(karate, 1))
egonet <- induced_subgraph(karate, neighbor_nodes)
graph.density(egonet)

## [1] 0.25

4. Functions for Clusters

# Identify connected components
igraph::components(karate)

## $membership
##    Mr Hi  Actor 2  Actor 3  Actor 4  Actor 5  Actor 6  Actor 7  Actor 8 
##        1        1        1        1        1        1        1        1 
##  Actor 9 Actor 10 Actor 11 Actor 12 Actor 13 Actor 14 Actor 15 Actor 16 
##        1        1        1        1        1        1        1        1 
## Actor 17 Actor 18 Actor 19 Actor 20 Actor 21 Actor 22 Actor 23 Actor 24 
##        1        1        1        1        1        1        1        1 
## Actor 25 Actor 26 Actor 27 Actor 28 Actor 29 Actor 30 Actor 31 Actor 32 
##        1        1        1        1        1        1        1        1 
## Actor 33   John A 
##        1        1 
## 
## $csize
## [1] 34
## 
## $no
## [1] 1

# Find communities using different methods
# 1. Fast greedy modularity optimization
fg_comm <- cluster_fast_greedy(karate)
print(fg_comm)

## IGRAPH clustering fast greedy, groups: 3, mod: 0.43
## + groups:
##   $`1`
##    [1] "Actor 9"  "Actor 10" "Actor 15" "Actor 16" "Actor 19" "Actor 21"
##    [7] "Actor 23" "Actor 24" "Actor 25" "Actor 26" "Actor 27" "Actor 28"
##   [13] "Actor 29" "Actor 30" "Actor 31" "Actor 32" "Actor 33" "John A"  
##   
##   $`2`
##    [1] "Mr Hi"    "Actor 2"  "Actor 3"  "Actor 4"  "Actor 8"  "Actor 12"
##    [7] "Actor 13" "Actor 14" "Actor 18" "Actor 20" "Actor 22"
##   
##   $`3`
##   + ... omitted several groups/vertices

plot(fg_comm, karate, main = "Fast Greedy Communities")

# 2. Louvain method
louvain_comm <- cluster_louvain(karate)
print(louvain_comm)

## IGRAPH clustering multi level, groups: 4, mod: 0.43
## + groups:
##   $`1`
##    [1] "Mr Hi"    "Actor 5"  "Actor 6"  "Actor 7"  "Actor 11" "Actor 12"
##    [7] "Actor 17" "Actor 18" "Actor 20" "Actor 22"
##   
##   $`2`
##   [1] "Actor 2"  "Actor 3"  "Actor 4"  "Actor 8"  "Actor 13" "Actor 14"
##   
##   $`3`
##    [1] "Actor 9"  "Actor 10" "Actor 15" "Actor 16" "Actor 19" "Actor 21"
##    [7] "Actor 23" "Actor 27" "Actor 30" "Actor 31" "Actor 33" "John A"  
##   + ... omitted several groups/vertices

plot(louvain_comm, karate, main = "Louvain Communities")

# 3. Label propagation
lp_comm <- cluster_label_prop(karate)
print(lp_comm)

## IGRAPH clustering label propagation, groups: 4, mod: 0.42
## + groups:
##   $`1`
##    [1] "Mr Hi"    "Actor 2"  "Actor 3"  "Actor 4"  "Actor 5"  "Actor 8" 
##    [7] "Actor 11" "Actor 12" "Actor 13" "Actor 14" "Actor 18" "Actor 20"
##   [13] "Actor 22"
##   
##   $`2`
##   [1] "Actor 6"  "Actor 7"  "Actor 17"
##   
##   $`3`
##    [1] "Actor 9"  "Actor 10" "Actor 15" "Actor 16" "Actor 19" "Actor 21"
##   + ... omitted several groups/vertices

plot(lp_comm, karate, main = "Label Propagation Communities")

# Compare modularity of different methods
modularity(fg_comm)

## [1] 0.4345215

modularity(louvain_comm)

## [1] 0.4276719

modularity(lp_comm)

## [1] 0.4231649

5. Functions for Cliques

# Find all cliques
all_cliques <- cliques(karate)
length(all_cliques)

## [1] 170

# Find largest cliques
largest_cliques <- largest_cliques(karate)
print(largest_cliques)

## [[1]]
## + 5/34 vertices, named, from 4b458a1:
## [1] Actor 2 Mr Hi   Actor 4 Actor 3 Actor 8
## 
## [[2]]
## + 5/34 vertices, named, from 4b458a1:
## [1] Actor 2  Mr Hi    Actor 4  Actor 3  Actor 14

# Visualize a largest clique
largest_clique <- largest_cliques[[1]]
clique_graph <- induced_subgraph(karate, largest_clique)
plot(clique_graph, main = "Largest Clique in Karate Club Network")

# Count cliques of different sizes
clique_sizes <- sapply(all_cliques, length)
table(clique_sizes)

## clique_sizes
##  1  2  3  4  5 
## 34 78 45 11  2

# Clique number (size of the largest clique)
clique_num(karate)

## [1] 5

# Clique percolation communities
perc_communities <- cluster_edge_betweenness(karate)
plot(perc_communities, karate, main = "Clique Percolation Communities")

6. Functions for Community

# Detect communities using edge betweenness
eb_comm <- cluster_edge_betweenness(karate)
print(eb_comm)

## IGRAPH clustering edge betweenness, groups: 6, mod: 0.35
## + groups:
##   $`1`
##   [1] "Mr Hi"    "Actor 2"  "Actor 4"  "Actor 8"  "Actor 12" "Actor 13"
##   [7] "Actor 18" "Actor 20" "Actor 22"
##   
##   $`2`
##   [1] "Actor 3"  "Actor 10" "Actor 14" "Actor 29"
##   
##   $`3`
##   [1] "Actor 5"  "Actor 6"  "Actor 7"  "Actor 11" "Actor 17"
##   
##   + ... omitted several groups/vertices

plot(eb_comm, karate, main = "Edge Betweenness Communities")

# Community membership
membership(eb_comm)

##    Mr Hi  Actor 2  Actor 3  Actor 4  Actor 5  Actor 6  Actor 7  Actor 8 
##        1        1        2        1        3        3        3        1 
##  Actor 9 Actor 10 Actor 11 Actor 12 Actor 13 Actor 14 Actor 15 Actor 16 
##        4        2        3        1        1        2        4        4 
## Actor 17 Actor 18 Actor 19 Actor 20 Actor 21 Actor 22 Actor 23 Actor 24 
##        3        1        4        1        4        1        4        5 
## Actor 25 Actor 26 Actor 27 Actor 28 Actor 29 Actor 30 Actor 31 Actor 32 
##        5        5        6        5        2        6        4        4 
## Actor 33   John A 
##        4        4

# Community sizes
sizes(eb_comm)

## Community sizes
##  1  2  3  4  5  6 
##  9  4  5 10  4  2

# Communities as subgraphs
communities(eb_comm)

## $`1`
## [1] "Mr Hi"    "Actor 2"  "Actor 4"  "Actor 8"  "Actor 12" "Actor 13" "Actor 18"
## [8] "Actor 20" "Actor 22"
## 
## $`2`
## [1] "Actor 3"  "Actor 10" "Actor 14" "Actor 29"
## 
## $`3`
## [1] "Actor 5"  "Actor 6"  "Actor 7"  "Actor 11" "Actor 17"
## 
## $`4`
##  [1] "Actor 9"  "Actor 15" "Actor 16" "Actor 19" "Actor 21" "Actor 23"
##  [7] "Actor 31" "Actor 32" "Actor 33" "John A"  
## 
## $`5`
## [1] "Actor 24" "Actor 25" "Actor 26" "Actor 28"
## 
## $`6`
## [1] "Actor 27" "Actor 30"

# Community modularity
modularity(eb_comm)

## [1] 0.345299

# Compare communities with the known factions
faction <- V(karate)$Faction
compare(membership(eb_comm), faction, method = "adjusted.rand")

## [1] 0.3581858

compare(membership(eb_comm), faction, method = "nmi")

## [1] 0.5178731

# Visualize community structure
V(karate)$community <- membership(eb_comm)
plot(karate, 
     vertex.color = V(karate)$community,
     vertex.size = 10,
     vertex.label = V(karate)$name,
     main = "Karate Club Network with Communities")

7. Functions for Blocks

# Load only necessary packages
library(igraph)

# Stochastic block model
set.seed(123)  # For reproducibility
sbm <- sample_sbm(40, pref.matrix = matrix(c(0.5, 0.1, 0.1, 0.5), nrow = 2),
                  block.sizes = c(20, 20))

# Visualize the block model
plot(sbm, 
     vertex.color = rep(c("skyblue", "salmon"), each = 20),
     vertex.size = 5,
     vertex.label = NA,
     main = "Stochastic Block Model")

# Create a simplified version of the karate network
karate_simple <- simplify(karate, remove.multiple = TRUE, remove.loops = TRUE)
adj_matrix <- as_adjacency_matrix(karate_simple)
adj_mat <- as.matrix(adj_matrix)

# Use alternative community detection methods instead of blockmodeling
# 1. Fast greedy community detection
fg_blocks <- cluster_fast_greedy(karate)
print(fg_blocks)

## IGRAPH clustering fast greedy, groups: 3, mod: 0.43
## + groups:
##   $`1`
##    [1] "Actor 9"  "Actor 10" "Actor 15" "Actor 16" "Actor 19" "Actor 21"
##    [7] "Actor 23" "Actor 24" "Actor 25" "Actor 26" "Actor 27" "Actor 28"
##   [13] "Actor 29" "Actor 30" "Actor 31" "Actor 32" "Actor 33" "John A"  
##   
##   $`2`
##    [1] "Mr Hi"    "Actor 2"  "Actor 3"  "Actor 4"  "Actor 8"  "Actor 12"
##    [7] "Actor 13" "Actor 14" "Actor 18" "Actor 20" "Actor 22"
##   
##   $`3`
##   + ... omitted several groups/vertices

plot(fg_blocks, karate, main = "Fast Greedy Community Blocks")

# 2. Louvain community detection
louvain_blocks <- cluster_louvain(karate)
print(louvain_blocks)

## IGRAPH clustering multi level, groups: 3, mod: 0.43
## + groups:
##   $`1`
##    [1] "Mr Hi"    "Actor 2"  "Actor 3"  "Actor 4"  "Actor 8"  "Actor 12"
##    [7] "Actor 13" "Actor 14" "Actor 18" "Actor 20" "Actor 22"
##   
##   $`2`
##   [1] "Actor 5"  "Actor 6"  "Actor 7"  "Actor 11" "Actor 17"
##   
##   $`3`
##    [1] "Actor 9"  "Actor 10" "Actor 15" "Actor 16" "Actor 19" "Actor 21"
##    [7] "Actor 23" "Actor 24" "Actor 25" "Actor 26" "Actor 27" "Actor 28"
##   + ... omitted several groups/vertices

plot(louvain_blocks, karate, main = "Louvain Community Blocks")

# 3. Edge betweenness community detection
eb_blocks <- cluster_edge_betweenness(karate)
print(eb_blocks)

## IGRAPH clustering edge betweenness, groups: 6, mod: 0.35
## + groups:
##   $`1`
##   [1] "Mr Hi"    "Actor 2"  "Actor 4"  "Actor 8"  "Actor 12" "Actor 13"
##   [7] "Actor 18" "Actor 20" "Actor 22"
##   
##   $`2`
##   [1] "Actor 3"  "Actor 10" "Actor 14" "Actor 29"
##   
##   $`3`
##   [1] "Actor 5"  "Actor 6"  "Actor 7"  "Actor 11" "Actor 17"
##   
##   + ... omitted several groups/vertices

plot(eb_blocks, karate, main = "Edge Betweenness Blocks")

# 4. Label propagation
lp_blocks <- cluster_label_prop(karate)
print(lp_blocks)

## IGRAPH clustering label propagation, groups: 3, mod: 0.43
## + groups:
##   $`1`
##    [1] "Mr Hi"    "Actor 2"  "Actor 3"  "Actor 4"  "Actor 8"  "Actor 12"
##    [7] "Actor 13" "Actor 14" "Actor 18" "Actor 20" "Actor 22"
##   
##   $`2`
##   [1] "Actor 5"  "Actor 6"  "Actor 7"  "Actor 11" "Actor 17"
##   
##   $`3`
##    [1] "Actor 9"  "Actor 10" "Actor 15" "Actor 16" "Actor 19" "Actor 21"
##    [7] "Actor 23" "Actor 24" "Actor 25" "Actor 26" "Actor 27" "Actor 28"
##   + ... omitted several groups/vertices

plot(lp_blocks, karate, main = "Label Propagation Blocks")

# 5. Walktrap community detection
wt_blocks <- cluster_walktrap(karate)
print(wt_blocks)

## IGRAPH clustering walktrap, groups: 4, mod: 0.44
## + groups:
##   $`1`
##    [1] "Actor 9"  "Actor 10" "Actor 15" "Actor 16" "Actor 19" "Actor 21"
##    [7] "Actor 23" "Actor 27" "Actor 29" "Actor 30" "Actor 31" "Actor 33"
##   [13] "John A"  
##   
##   $`2`
##    [1] "Mr Hi"    "Actor 2"  "Actor 3"  "Actor 4"  "Actor 8"  "Actor 12"
##    [7] "Actor 13" "Actor 14" "Actor 18" "Actor 20" "Actor 22"
##   
##   $`3`
##   + ... omitted several groups/vertices

plot(wt_blocks, karate, main = "Walktrap Community Blocks")

# Use hierarchical clustering based on structural equivalence
# First calculate a distance matrix
dist_matrix <- dist(adj_mat)
hc <- hclust(dist_matrix, method = "ward.D2")
blocks <- cutree(hc, k = 4)

# Plot with block colors from hierarchical clustering
plot(karate, 
     vertex.color = blocks,
     vertex.size = 8,
     vertex.label = V(karate)$name,
     main = "Hierarchical Clustering Blocks")

# Create a visualization of connections between different blocks
# This is a simplified version of what blockmodeling tries to accomplish
block_matrix <- matrix(0, nrow = 4, ncol = 4)
rownames(block_matrix) <- paste("Block", 1:4)
colnames(block_matrix) <- paste("Block", 1:4)

# Count connections between blocks
for (i in 1:vcount(karate)) {
  for (j in 1:vcount(karate)) {
    if (are_adjacent(karate, i, j)) {
      block_i <- blocks[i]
      block_j <- blocks[j]
      block_matrix[block_i, block_j] <- block_matrix[block_i, block_j] + 1
    }
  }
}

# Print the block connection matrix
print("Block Connection Matrix:")

## [1] "Block Connection Matrix:"

print(block_matrix)

##         Block 1 Block 2 Block 3 Block 4
## Block 1       2      20       3       0
## Block 2      20      24       4       3
## Block 3       3       4      20      24
## Block 4       0       3      24       2

# Heatmap of the adjacency matrix to visualize block structure
# Reorder the adjacency matrix based on block assignments
order_by_blocks <- order(blocks)
reordered_adj <- adj_mat[order_by_blocks, order_by_blocks]

# Create a heatmap
heatmap(reordered_adj, Rowv = NA, Colv = NA, 
        main = "Adjacency Matrix Ordered by Blocks",
        col = c("white", "steelblue"))

8. Functions for Edges

# Basic edge information
E(karate)

## + 78/78 edges from 4b458a1 (vertex names):
##  [1] Mr Hi  --Actor 2  Mr Hi  --Actor 3  Mr Hi  --Actor 4  Mr Hi  --Actor 5 
##  [5] Mr Hi  --Actor 6  Mr Hi  --Actor 7  Mr Hi  --Actor 8  Mr Hi  --Actor 9 
##  [9] Mr Hi  --Actor 11 Mr Hi  --Actor 12 Mr Hi  --Actor 13 Mr Hi  --Actor 14
## [13] Mr Hi  --Actor 18 Mr Hi  --Actor 20 Mr Hi  --Actor 22 Mr Hi  --Actor 32
## [17] Actor 2--Actor 3  Actor 2--Actor 4  Actor 2--Actor 8  Actor 2--Actor 14
## [21] Actor 2--Actor 18 Actor 2--Actor 20 Actor 2--Actor 22 Actor 2--Actor 31
## [25] Actor 3--Actor 4  Actor 3--Actor 8  Actor 3--Actor 9  Actor 3--Actor 10
## [29] Actor 3--Actor 14 Actor 3--Actor 28 Actor 3--Actor 29 Actor 3--Actor 33
## [33] Actor 4--Actor 8  Actor 4--Actor 13 Actor 4--Actor 14 Actor 5--Actor 7 
## [37] Actor 5--Actor 11 Actor 6--Actor 7  Actor 6--Actor 11 Actor 6--Actor 17
## + ... omitted several edges

head(E(karate))

## + 6/78 edges from 4b458a1 (vertex names):
## [1] Mr Hi--Actor 2 Mr Hi--Actor 3 Mr Hi--Actor 4 Mr Hi--Actor 5 Mr Hi--Actor 6
## [6] Mr Hi--Actor 7

# Edge attributes
E(karate)$weight <- runif(ecount(karate), 0.5, 3)
head(E(karate)$weight)

## [1] 1.7616576 2.2995942 0.8238434 1.0967612 1.0425515 0.6678842

# Edge density
edge_density(karate)

## [1] 0.1390374

# Weighted edge density
sum(E(karate)$weight) / (vcount(karate) * (vcount(karate) - 1))

## [1] 0.1263641

# Find edges with highest weights
top_edges <- E(karate)[order(E(karate)$weight, decreasing = TRUE)[1:10]]
print(top_edges)

## + 10/78 edges from 4b458a1 (vertex names):
##  [1] Mr Hi   --Actor 12 Actor 28--John A   Actor 2 --Actor 18 Mr Hi   --Actor 18
##  [5] Actor 10--John A   Actor 24--Actor 28 Actor 23--Actor 33 Actor 16--Actor 33
##  [9] Actor 20--John A   Actor 27--John A

# Plot with edge weights
plot(karate,
     edge.width = E(karate)$weight,
     vertex.size = 10,
     vertex.label = V(karate)$name,
     main = "Karate Club Network with Weighted Edges")

9. Functions for Subgraphs

# Create a subgraph by selecting specific vertices
sub_vertices <- c(1, 2, 3, 4, 5, 6, 7, 8)
sub_g <- induced_subgraph(karate, sub_vertices)
plot(sub_g, main = "Subgraph of Karate Club Network")

# Create a subgraph based on attribute values
# Add a fictional attribute to vertices
V(karate)$role <- sample(c("leader", "follower", "neutral"), vcount(karate), replace = TRUE)
leaders_g <- induced_subgraph(karate, V(karate)[role == "leader"])
plot(leaders_g, main = "Leaders Subgraph")

# Create an ego network (subgraph centered on a vertex)
ego_g <- make_ego_graph(karate, order = 1, nodes = 1)[[1]]
plot(ego_g, main = "Ego Network of Node 1")

# Create a subgraph based on edge weights
# First, add weights to edges if not already present
if (!"weight" %in% edge_attr_names(karate)) {
  E(karate)$weight <- runif(ecount(karate), 0.5, 3)
}
heavy_edges <- E(karate)[weight > 2]
heavy_g <- subgraph.edges(karate, heavy_edges)
plot(heavy_g, main = "Subgraph with High-Weight Edges")

# Extract the largest connected component
components_g <- decompose(karate)
largest_comp <- components_g[[which.max(sapply(components_g, vcount))]]
plot(largest_comp, main = "Largest Connected Component")

# Create a subgraph based on community detection
comm <- cluster_louvain(karate)
comm_1 <- induced_subgraph(karate, which(membership(comm) == 1))
plot(comm_1, main = "Community 1 Subgraph")

# Create a random subgraph
random_vertices <- sample(vcount(karate), 15)
random_g <- induced_subgraph(karate, random_vertices)
plot(random_g, main = "Random Subgraph of Karate Club Network")

Summary and Conclusions

In this assignment, we’ve explored two important domains in data analysis: time series analysis and social network analysis.

For time series analysis, we covered: - Key packages such as forecast, tseries, zoo, and xts - Functions for constructing and plotting time series data - Methods for decomposing time series into trend, seasonal, and residual components - Various forecasting techniques including ARIMA, ETS, and Prophet - Correlation analysis and variance examination

For social network analysis, we: - Explored powerful packages like igraph, network, and sna - Demonstrated network construction and visualization - Analyzed neighbor relationships and connectivity - Identified clusters, cliques, and communities - Explored block modeling techniques - Examined edge properties and their importance - Created and analyzed various types of subgraphs

These techniques provide powerful tools for uncovering patterns in sequential data and network structures, applicable across many domains including finance, social sciences, business, and more.

Digital Assignment - I: Time Series Analysis and Social Network Analysis

Venkat Balaji S 22BCE2093

March 13, 2025