Reading in Files

corr = read.csv("~/Downloads/subjectwise_correlations-2.csv")
getAlong = read.csv("~/Downloads/getAlong.csv")
trait = read.csv("~/Downloads/df.trait_fa21.csv")
PID_21 = read.csv("/Volumes/GoogleDrive/Shared drives/Box SSNL Folder/SSNL Social Networks Study/2021-2022/Rosters/Housing Rosters/PIDinfo_long_2021-2022_autoentry_Special_Characters_Changed_wHousing.csv")%>% 
  select(PID, dorm, DID)

Gender

Gender Data Wrangling

corr = corr %>% 
  rename(PID = sub_left) %>% 
  rename(PID_partner = sub_right)

trait = trait %>% 
  select(PID, gender)

corr = corr %>% 
  left_join(trait, by = "PID")

colnames(trait) <- paste(colnames(trait),"partner",sep="_")

corr = corr %>% 
  left_join(trait, by = "PID_partner")


corr = corr %>% 
  mutate(same_gender = ifelse((gender == gender_partner),"yes", "no")) %>%
  mutate(same_gender_specific = ifelse((gender == "Man" & gender_partner == "Man"),"Both Male", ifelse((gender == "Woman" & gender_partner == "Woman"), "Both Female", "Mixed")))

Gender Visualisation

corr$same_gender = as.factor(corr$same_gender)

p = ggplot(corr, aes(x = same_gender, y = correlation)) +
    geom_point(size = 0.005)
p

model = lm(correlation ~ 1 + same_gender, corr)
summary(model)
## 
## Call:
## lm(formula = correlation ~ 1 + same_gender, data = corr)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.217873 -0.046385  0.003269  0.045958  0.208951 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.551080   0.002454 224.527   <2e-16 ***
## same_genderyes 0.006254   0.003613   1.731   0.0836 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06933 on 1480 degrees of freedom
##   (1068 observations deleted due to missingness)
## Multiple R-squared:  0.002021,   Adjusted R-squared:  0.001347 
## F-statistic: 2.997 on 1 and 1480 DF,  p-value: 0.08362
p

Looking at the specific genders of the pairs

corr$same_gender_specific = as.factor(corr$same_gender_specific)

p = ggplot(corr, aes(x = same_gender_specific, y = correlation)) +
    geom_point(size = 0.005)
p

model = lm(correlation ~ 1 + same_gender_specific, corr)
summary(model)
## 
## Call:
## lm(formula = correlation ~ 1 + same_gender_specific, data = corr)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.250711 -0.046335  0.003478  0.045460  0.208742 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    0.5571255  0.0037724 147.684   <2e-16 ***
## same_gender_specificBoth Male  0.0004174  0.0053350   0.078   0.9377    
## same_gender_specificMixed     -0.0076782  0.0044891  -1.710   0.0874 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06976 on 1503 degrees of freedom
##   (1044 observations deleted due to missingness)
## Multiple R-squared:  0.003169,   Adjusted R-squared:  0.001842 
## F-statistic: 2.389 on 2 and 1503 DF,  p-value: 0.09209
p

Dorms

Dorms Data Wrangling

PID_21 = PID_21 %>% 
  distinct(PID, .keep_all = TRUE)

corr = corr %>% 
  left_join(PID_21, by = "PID")

colnames(PID_21) <- paste(colnames(PID_21),"partner",sep="_")

corr = corr %>% 
  left_join(PID_21, by = "PID_partner")

corr = corr %>% 
  mutate(same_dorm = ifelse((dorm == dorm_partner),"yes", "no")) %>%
  mutate(same_dorm_specific = ifelse((dorm == "Castano" & dorm_partner == "Castano"),"Both Castano", ifelse((dorm == "Schiff" & dorm_partner == "Schiff"), "Both Schiff", ifelse((dorm == "Rinconada" & dorm_partner == "Rinconada"), "Both Rinconada", ifelse((dorm == "Larkin" & dorm_partner == "Larkin"), "Both Larkin", "Mixed")))))

Same Dorm

corr$same_dorm = as.factor(corr$same_dorm)

p = ggplot(corr, aes(x = same_dorm, y = correlation)) +
    geom_point(size = 0.005)
p

p = ggplot(corr, aes(x = same_dorm, y = correlation, color = same_dorm)) + 
geom_point(alpha = 0.08,
             position = position_jitter(width = 0.1, height = 0),
             size = 1) +
  stat_summary(fun.data = "mean_cl_boot",
               geom = "pointrange",
               color = "black",
               fill = "black",
               shape = 21,
               size = .3) +
  theme_bw() + geom_text(aes(x = 1.5, y = 0.7,
                label = "p = 0.618 .", color = "black")) + theme(legend.position = "none")
p

model = lm(correlation ~ 1 + same_dorm, corr)
summary(model)
## 
## Call:
## lm(formula = correlation ~ 1 + same_dorm, data = corr)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.279897 -0.043825  0.009217  0.054239  0.226396 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.539889   0.001814 297.577   <2e-16 ***
## same_dormyes -0.001835   0.003679  -0.499    0.618    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0797 on 2548 degrees of freedom
## Multiple R-squared:  9.764e-05,  Adjusted R-squared:  -0.0002948 
## F-statistic: 0.2488 on 1 and 2548 DF,  p-value: 0.6179

Same Specific Dorm

corr$same_dorm_specific = as.factor(corr$same_dorm_specific)

p = ggplot(corr, aes(x = same_dorm_specific, y = correlation)) +
    geom_point(size = 0.005)
p

model = lm(correlation ~ 1 + same_dorm_specific, corr)
summary(model)
## 
## Call:
## lm(formula = correlation ~ 1 + same_dorm_specific, data = corr)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.279897 -0.043716  0.008426  0.054383  0.226396 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       0.529935   0.005893  89.925  < 2e-16 ***
## same_dorm_specificBoth Larkin    -0.007143   0.008674  -0.823  0.41030    
## same_dorm_specificBoth Rinconada  0.020622   0.011069   1.863  0.06256 .  
## same_dorm_specificBoth Schiff     0.022206   0.008051   2.758  0.00586 ** 
## same_dorm_specificMixed           0.009954   0.006165   1.615  0.10649    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0795 on 2545 degrees of freedom
## Multiple R-squared:  0.006353,   Adjusted R-squared:  0.004791 
## F-statistic: 4.068 on 4 and 2545 DF,  p-value: 0.00273
p

Social Network Proximity (Under Construction)

edges = read.csv("~/Downloads/CloseFrds.csv")

Creating the igraph object

nodes = data.frame(PID=unlist(edges, use.names = FALSE)) %>% 
  distinct(PID, .keep_all = FALSE) 

g = graph_from_data_frame(edges, directed = TRUE, vertices = nodes) 
g = as_tbl_graph(g)

# Finding the shortest path between nodes
distMatrix <- as.data.frame(shortest.paths(g, v=V(g), to=V(g)))
#distMatrix