Reading in Files
corr = read.csv("~/Downloads/subjectwise_correlations-2.csv")
getAlong = read.csv("~/Downloads/getAlong.csv")
trait = read.csv("~/Downloads/df.trait_fa21.csv")
PID_21 = read.csv("/Volumes/GoogleDrive/Shared drives/Box SSNL Folder/SSNL Social Networks Study/2021-2022/Rosters/Housing Rosters/PIDinfo_long_2021-2022_autoentry_Special_Characters_Changed_wHousing.csv")%>%
select(PID, dorm, DID)
Gender
Gender Data Wrangling
corr = corr %>%
rename(PID = sub_left) %>%
rename(PID_partner = sub_right)
trait = trait %>%
select(PID, gender)
corr = corr %>%
left_join(trait, by = "PID")
colnames(trait) <- paste(colnames(trait),"partner",sep="_")
corr = corr %>%
left_join(trait, by = "PID_partner")
corr = corr %>%
mutate(same_gender = ifelse((gender == gender_partner),"yes", "no")) %>%
mutate(same_gender_specific = ifelse((gender == "Man" & gender_partner == "Man"),"Both Male", ifelse((gender == "Woman" & gender_partner == "Woman"), "Both Female", "Mixed")))
Gender Visualisation
corr$same_gender = as.factor(corr$same_gender)
p = ggplot(corr, aes(x = same_gender, y = correlation)) +
geom_point(size = 0.005)
p

model = lm(correlation ~ 1 + same_gender, corr)
summary(model)
##
## Call:
## lm(formula = correlation ~ 1 + same_gender, data = corr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.217873 -0.046385 0.003269 0.045958 0.208951
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.551080 0.002454 224.527 <2e-16 ***
## same_genderyes 0.006254 0.003613 1.731 0.0836 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06933 on 1480 degrees of freedom
## (1068 observations deleted due to missingness)
## Multiple R-squared: 0.002021, Adjusted R-squared: 0.001347
## F-statistic: 2.997 on 1 and 1480 DF, p-value: 0.08362
p

Looking at the specific genders of the pairs
corr$same_gender_specific = as.factor(corr$same_gender_specific)
p = ggplot(corr, aes(x = same_gender_specific, y = correlation)) +
geom_point(size = 0.005)
p

model = lm(correlation ~ 1 + same_gender_specific, corr)
summary(model)
##
## Call:
## lm(formula = correlation ~ 1 + same_gender_specific, data = corr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.250711 -0.046335 0.003478 0.045460 0.208742
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.5571255 0.0037724 147.684 <2e-16 ***
## same_gender_specificBoth Male 0.0004174 0.0053350 0.078 0.9377
## same_gender_specificMixed -0.0076782 0.0044891 -1.710 0.0874 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06976 on 1503 degrees of freedom
## (1044 observations deleted due to missingness)
## Multiple R-squared: 0.003169, Adjusted R-squared: 0.001842
## F-statistic: 2.389 on 2 and 1503 DF, p-value: 0.09209
p

Dorms Data Wrangling
PID_21 = PID_21 %>%
distinct(PID, .keep_all = TRUE)
corr = corr %>%
left_join(PID_21, by = "PID")
colnames(PID_21) <- paste(colnames(PID_21),"partner",sep="_")
corr = corr %>%
left_join(PID_21, by = "PID_partner")
corr = corr %>%
mutate(same_dorm = ifelse((dorm == dorm_partner),"yes", "no")) %>%
mutate(same_dorm_specific = ifelse((dorm == "Castano" & dorm_partner == "Castano"),"Both Castano", ifelse((dorm == "Schiff" & dorm_partner == "Schiff"), "Both Schiff", ifelse((dorm == "Rinconada" & dorm_partner == "Rinconada"), "Both Rinconada", ifelse((dorm == "Larkin" & dorm_partner == "Larkin"), "Both Larkin", "Mixed")))))
Same Dorm
corr$same_dorm = as.factor(corr$same_dorm)
p = ggplot(corr, aes(x = same_dorm, y = correlation)) +
geom_point(size = 0.005)
p

p = ggplot(corr, aes(x = same_dorm, y = correlation, color = same_dorm)) +
geom_point(alpha = 0.08,
position = position_jitter(width = 0.1, height = 0),
size = 1) +
stat_summary(fun.data = "mean_cl_boot",
geom = "pointrange",
color = "black",
fill = "black",
shape = 21,
size = .3) +
theme_bw() + geom_text(aes(x = 1.5, y = 0.7,
label = "p = 0.618 .", color = "black")) + theme(legend.position = "none")
p

model = lm(correlation ~ 1 + same_dorm, corr)
summary(model)
##
## Call:
## lm(formula = correlation ~ 1 + same_dorm, data = corr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.279897 -0.043825 0.009217 0.054239 0.226396
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.539889 0.001814 297.577 <2e-16 ***
## same_dormyes -0.001835 0.003679 -0.499 0.618
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0797 on 2548 degrees of freedom
## Multiple R-squared: 9.764e-05, Adjusted R-squared: -0.0002948
## F-statistic: 0.2488 on 1 and 2548 DF, p-value: 0.6179
Same Specific Dorm
corr$same_dorm_specific = as.factor(corr$same_dorm_specific)
p = ggplot(corr, aes(x = same_dorm_specific, y = correlation)) +
geom_point(size = 0.005)
p

model = lm(correlation ~ 1 + same_dorm_specific, corr)
summary(model)
##
## Call:
## lm(formula = correlation ~ 1 + same_dorm_specific, data = corr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.279897 -0.043716 0.008426 0.054383 0.226396
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.529935 0.005893 89.925 < 2e-16 ***
## same_dorm_specificBoth Larkin -0.007143 0.008674 -0.823 0.41030
## same_dorm_specificBoth Rinconada 0.020622 0.011069 1.863 0.06256 .
## same_dorm_specificBoth Schiff 0.022206 0.008051 2.758 0.00586 **
## same_dorm_specificMixed 0.009954 0.006165 1.615 0.10649
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0795 on 2545 degrees of freedom
## Multiple R-squared: 0.006353, Adjusted R-squared: 0.004791
## F-statistic: 4.068 on 4 and 2545 DF, p-value: 0.00273
p

Creating the igraph object
nodes = data.frame(PID=unlist(edges, use.names = FALSE)) %>%
distinct(PID, .keep_all = FALSE)
g = graph_from_data_frame(edges, directed = TRUE, vertices = nodes)
g = as_tbl_graph(g)
# Finding the shortest path between nodes
distMatrix <- as.data.frame(shortest.paths(g, v=V(g), to=V(g)))
#distMatrix
Social Network Proximity (Under Construction)