A friend of mine was having struggles finishing a paper that was about the relationship between being goth and various traits, particularly mental illness, unconventional sexual behaviour, and homosexuality. She requested a correlation matrix, a factor analysis of composite variables, a set of linear regression tables, a chart of the rates at which people answered each option on the gothic-identification question on OKCupid.
Source of data is the OKCupid dataset.
Data cleaning and converting variables to numeric:
setwd('~')
Warning: The working directory was changed to C:/Users/micha/OneDrive/Documents inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
setwd('rfolder/goths')
okcdataset <- read_parquet(file="parsed_data_public.parquet")
savedszxet <- okcdataset
okcdataset <- savedszxet
okcdataset$brit <- NA
okcdataset$brit[!is.na(okcdataset$d_country)] <- 0
okcdataset$brit[nchar(as.character(okcdataset$d_country)) == 2] <- 1
okcdataset$brit[okcdataset$d_country=='Ireland'] <- 1
okcdataset$brit[okcdataset$d_country=='Australia'] <- 1
okcdataset$brit[okcdataset$d_country=='New Zealand'] <- 1
okcdataset$brit[okcdataset$d_country=='UK'] <- 1
okcdataset$sexuality <- okcdataset$d_orientation
okcdataset$sexuality[okcdataset$sexuality=="Pansexual"] <- "Bisexual"
okcdataset$sexuality[okcdataset$sexuality=="Straight, Gay"] <- "Bisexual"
okcdataset$sexuality[okcdataset$sexuality=="Heteroflexible"] <- "Bisexual"
okcdataset$sexuality[okcdataset$sexuality=="Homoflexible"] <- "Bisexual"
okcdataset$sexuality[okcdataset$sexuality=="Lesbian"] <- "Gay"
okcdataset$sexuality[okcdataset$sexuality=="Bisexual, Pansexual"] <- "Bisexual"
okcdataset$sexuality[okcdataset$sexuality=="Straight, Bisexual"] <- "Bisexual"
okcdataset$sexuality[okcdataset$sexuality=="Straight, Bisexual, Heteroflexible"] <- "Bisexual"
okcdataset$sexuality[okcdataset$sexuality=="Straight, Pansexual"] <- "Bisexual"
okcdataset$sexuality[okcdataset$sexuality=="Heteroflexible, Bisexual"] <- "Bisexual"
okcdataset$sexuality[okcdataset$sexuality=="Pansexual, Bisexual"] <- "Bisexual"
okcdataset$sexuality[okcdataset$sexuality=="Homoflexible, Heteroflexible, Pansexual, Bisexual"] <- "Bisexual"
okcdataset$sexuality[!(okcdataset$sexuality=="Bisexual" | okcdataset$sexuality=="Straight" | okcdataset$sexuality=="Gay" | is.na(okcdataset$sexuality))] <- "Other"
okcdataset$race <- okcdataset$d_ethnicity
okcdataset$race[!(okcdataset$d_ethnicity=='Asian' | okcdataset$d_ethnicity=='Black' | okcdataset$d_ethnicity=='Indian' | okcdataset$d_ethnicity=='White' | okcdataset$d_ethnicity=='Hispanic / Latin' | okcdataset$d_ethnicity=='White' | okcdataset$d_ethnicity=='Middle Eastern' | okcdataset$d_ethnicity=='Pacific Islander' | okcdataset$d_ethnicity=='Other' | is.na(okcdataset$d_ethnicity))] <- 'Selected over 1 race'
okcdataset$sex <- NA
okcdataset$sex[okcdataset$gender=='Man'] <- 'Male'
okcdataset$sex[okcdataset$gender=='Woman'] <- 'Female'
okcdataset$sex <- relevel(factor(okcdataset$sex), ref = "Female")
okcdataset$sexuality <- relevel(factor(okcdataset$sexuality), ref = "Straight")
okcdataset$race <- relevel(factor(okcdataset$race), ref = "White")
okcdataset$d_job[is.na(okcdataset$d_job)] <- 'Missing'
okcdataset$d_job <- relevel(factor(okcdataset$d_job), ref = "Missing")
okcdataset$d_country <- relevel(factor(okcdataset$d_country), ref = "UK")
print(okcdataset %>% group_by(d_job) %>% summarise(n = n()), n=100)
okcdataset$anime <- NA
okcdataset$anime[okcdataset$q185=='Yes'] <- 1
okcdataset$anime[okcdataset$q185=='No'] <- 0
okcdataset$dancetopless <- NA
okcdataset$dancetopless[okcdataset$q1379=='No'] <- 0
okcdataset$dancetopless[okcdataset$q1379=='Yes'] <- 1
okcdataset$numincome <- NA
okcdataset$numincome[okcdataset$d_income=='Less than $20,000'] <- 10000
okcdataset$numincome[okcdataset$d_income=='$20,000-$30,000'] <- 25000
okcdataset$numincome[okcdataset$d_income=='$50,000-$60,000'] <- 55000
okcdataset$numincome[okcdataset$d_income=='$30,000-$40,000'] <- 35000
okcdataset$numincome[okcdataset$d_income=='$40,000-$50,000'] <- 45000
okcdataset$numincome[okcdataset$d_income=='$80,000-$100,000'] <- 90000
okcdataset$numincome[okcdataset$d_income=='$60,000-$70,000'] <- 65000
okcdataset$numincome[okcdataset$d_income=='More than $1,000,000'] <- NA
okcdataset$numincome[okcdataset$d_income=='$100,000-$150,000'] <- 125000
okcdataset$numincome[okcdataset$d_income=='$70,000-$80,000'] <- 75000
okcdataset$numincome[okcdataset$d_income=='$250,000-$500,000'] <- 375000
okcdataset$numincome[okcdataset$d_income=='$150,000-$250,000'] <- 200000
okcdataset$numincome[okcdataset$d_income=='$500,000-$1,000,000'] <- 750000
###############################
okcdataset$seen_therapist <- NA
okcdataset$seen_therapist[okcdataset$q50=='Yes'] <- 1
okcdataset$seen_therapist[okcdataset$q50=='No'] <- 0
okcdataset$much_depression <- NA
okcdataset$much_depression[okcdataset$q1552=="Almost never, I'm happy!"] <- 0
okcdataset$much_depression[okcdataset$q1552=="Sometimes, when it's a bad day"] <- 1
okcdataset$much_depression[okcdataset$q1552=="Yeah, despair is my life"] <- 2
okcdataset$emotional_diversity <- NA
okcdataset$emotional_diversity[okcdataset$q6021=="I get extremely happy but rarely depressed"] <- 1
okcdataset$emotional_diversity[okcdataset$q6021=="I don't feel much of either"] <- 0
okcdataset$emotional_diversity[okcdataset$q6021=="I get extremely depressed and I'm rarely happy"] <- 1
okcdataset$emotional_diversity[okcdataset$q6021=="I don't feel much of either"] <- 2
okcdataset$happy_with_life <- NA
okcdataset$happy_with_life[okcdataset$q4018=="Yes"] <- 1
okcdataset$happy_with_life[okcdataset$q4018=="No"] <- 0
unique(okcdataset$q48278)
[1] NA "No." "Yes."
okcdataset$open_to_poly <- NA
okcdataset$open_to_poly[okcdataset$q33107=="I am commited to total monogamy"] <- 0
okcdataset$open_to_poly[okcdataset$q33107=="I could be convinced by the right people"] <- 1
okcdataset$open_to_poly[okcdataset$q33107=="Yes, I like that type of polygamy."] <- 2
okcdataset$open_to_poly[okcdataset$q33107=="I have open relationships only"] <- 3
okcdataset$open_to_dating_poly <- NA
okcdataset$open_to_dating_poly[okcdataset$q48278=="Yes."] <- 1
okcdataset$open_to_dating_poly[okcdataset$q48278=="No."] <- 0
okcdataset$pref_poly <- NA
okcdataset$pref_poly[okcdataset$q37772=="Monogamy"] <- 0
okcdataset$pref_poly[okcdataset$q37772=="Playing the field"] <- 1
okcdataset$pref_poly[okcdataset$q37772=="An open relationship"] <- 2
okcdataset$pref_poly[okcdataset$q37772=="Polyamory"] <- 3
unique(okcdataset$q15414)
[1] NA
[2] "No, I have not used them but I would try them."
[3] "No, I have not used them and I would not try them."
[4] "Yes, I have used psychedelic drugs."
okcdataset$marijuana <- NA
okcdataset$marijuana[okcdataset$q79=="Never."] <- 0
okcdataset$marijuana[okcdataset$q79=="I smoked in the past, but no longer."] <- 1
okcdataset$marijuana[okcdataset$q79=="I smoke occasionally."] <- 2
okcdataset$marijuana[okcdataset$q79=="I smoke regularly."] <- 3
okcdataset$drug_use <- NA
okcdataset$drug_use[okcdataset$q80=="I never do drugs."] <- 0
okcdataset$drug_use[okcdataset$q80=="I've done drugs in the past, but no longer."] <- 1
okcdataset$drug_use[okcdataset$q80=="I do drugs occasionally."] <- 2
okcdataset$drug_use[okcdataset$q80=="I do drugs regularly."] <- 3
okcdataset$psychedelics_use <- NA
okcdataset$psychedelics_use[okcdataset$q15414=="No, I have not used them and I would not try them."] <- 0
okcdataset$psychedelics_use[okcdataset$q15414=="No, I have not used them but I would try them."] <- 1
okcdataset$psychedelics_use[okcdataset$q15414=="Yes, I have used psychedelic drugs."] <- 2
unique(okcdataset$q88)
[1] NA "I'm mostly organized" "I'm average" "I'm very messy"
[5] "I'm compulsively neat"
okcdataset$brushing_teeth <- NA
okcdataset$brushing_teeth[okcdataset$q12970=="Rarely / never"] <- 0
okcdataset$brushing_teeth[okcdataset$q12970=="Only on days I feel like it"] <- 1
okcdataset$brushing_teeth[okcdataset$q12970=="Once a day"] <- 2
okcdataset$brushing_teeth[okcdataset$q12970=="Twice or more a day"] <- 3
okcdataset$showering <- NA
okcdataset$showering[okcdataset$q1062=="Once a week or less."] <- 0
okcdataset$showering[okcdataset$q1062=="A couple times a week."] <- 1
okcdataset$showering[okcdataset$q1062=="Usually daily. I skip some."] <- 2
okcdataset$showering[okcdataset$q1062=="At least once a day."] <- 3
okcdataset$mess_cleaning <- NA
okcdataset$mess_cleaning[okcdataset$q35409=="Not cleanup at all."] <- 0
okcdataset$mess_cleaning[okcdataset$q35409=="Postpone the cleanup for later."] <- 1
okcdataset$mess_cleaning[okcdataset$q35409=="Clean up immediately after eating."] <- 2
okcdataset$mess_cleaning[okcdataset$q35409=="Clean up immediately."] <- 3
okcdataset$bathroom_cleanliness <- NA
okcdataset$bathroom_cleanliness[okcdataset$q41099=="Disgusting! even I cringe when I have to use it!"] <- 0
okcdataset$bathroom_cleanliness[okcdataset$q41099=="Not awful, but it could use a good cleaning."] <- 1
okcdataset$bathroom_cleanliness[okcdataset$q41099=="Clean-ish but could use a quick touch-up."] <- 2
okcdataset$bathroom_cleanliness[okcdataset$q41099=="Spotless, shiny, germ-free, clean clean clean."] <- 3
okcdataset$messiness <- NA
okcdataset$messiness[okcdataset$q88=="I'm very messy"] <- 3
okcdataset$messiness[okcdataset$q88=="I'm average"] <- 2
okcdataset$messiness[okcdataset$q88=="I'm mostly organized"] <- 1
okcdataset$messiness[okcdataset$q88=="I'm compulsively neat"] <- 0
unique(okcdataset$q23696)
[1] "Never" NA "Sometimes"
[4] "Used to, but not any more" "Yes, frequently"
okcdataset$angry_video_games <- NA
okcdataset$angry_video_games[okcdataset$q43304=="Never"] <- 0
okcdataset$angry_video_games[okcdataset$q43304=="Sometimes"] <- 1
okcdataset$angry_video_games[okcdataset$q43304=="Usually"] <- 2
okcdataset$general_angry <- NA
okcdataset$general_angry[okcdataset$q1052=="Rarely"] <- 0
okcdataset$general_angry[okcdataset$q1052=="Sometimes"] <- 1
okcdataset$general_angry[okcdataset$q1052=="Very often"] <- 2
okcdataset$quietly_angry <- NA
okcdataset$quietly_angry[okcdataset$q6689=="Yes"] <- 1
okcdataset$quietly_angry[okcdataset$q6689=="No"] <- 0
okcdataset$throwing_objects <- NA
okcdataset$throwing_objects[okcdataset$q23696=="Never"] <- 0
okcdataset$throwing_objects[okcdataset$q23696=="Used to, but not any more"] <- 1
okcdataset$throwing_objects[okcdataset$q23696=="Sometimes"] <- 2
okcdataset$throwing_objects[okcdataset$q23696=="Yes, frequently"] <- 3
unique(okcdataset$q133)
[1] NA "No, not really" "Average" "Yes, very"
okcdataset$six_pack_importance <- NA
okcdataset$six_pack_importance[okcdataset$q84223=="Hide them with fat, please."] <- 0
okcdataset$six_pack_importance[okcdataset$q84223=="Not at all important."] <- 1
okcdataset$six_pack_importance[okcdataset$q84223=="Somewhat important."] <- 2
okcdataset$six_pack_importance[okcdataset$q84223=="Very important."] <- 3
okcdataset$physical_shape <- NA
okcdataset$physical_shape[okcdataset$q126=="Shitty"] <- 0
okcdataset$physical_shape[okcdataset$q126=="Average"] <- 1
okcdataset$physical_shape[okcdataset$q126=="Great"] <- 2
okcdataset$physical_strength <- NA
okcdataset$physical_strength[okcdataset$q133=="No, not really"] <- 0
okcdataset$physical_strength[okcdataset$q133=="Average"] <- 1
okcdataset$physical_strength[okcdataset$q133=="Yes, very"] <- 2
okcdataset$good_posture <- NA
okcdataset$good_posture[okcdataset$q23311=="No"] <- 0
okcdataset$good_posture[okcdataset$q23311=="Sometimes, when the situation calls for it"] <- 1
okcdataset$good_posture[okcdataset$q23311=="Yes"] <- 2
unique(okcdataset$q18894)
[1] NA "No" "Occasionally" "Yes"
okcdataset$sleep_time <- NA
okcdataset$sleep_time[okcdataset$q17342=="By 9pm"] <- 0
okcdataset$sleep_time[okcdataset$q17342=="By 11pm"] <- 1
okcdataset$sleep_time[okcdataset$q17342=="By 1am"] <- 2
okcdataset$sleep_time[okcdataset$q17342=="Later"] <- 3
okcdataset$sleep_trouble <- NA
okcdataset$sleep_trouble[okcdataset$q79134=="Never."] <- 0
okcdataset$sleep_trouble[okcdataset$q79134=="Rarely."] <- 1
okcdataset$sleep_trouble[okcdataset$q79134=="Frequently."] <- 2
okcdataset$sleep_trouble[okcdataset$q79134=="Always."] <- 3
okcdataset$sleeping_during_the_day <- NA
okcdataset$sleeping_during_the_day[okcdataset$q18644=="No"] <- 0
okcdataset$sleeping_during_the_day[okcdataset$q18644=="Yes"] <- 1
okcdataset$insomniac <- NA
okcdataset$insomniac[okcdataset$q18644=="No"] <- 0
okcdataset$insomniac[okcdataset$q18644=="Occasionally"] <- 1
okcdataset$insomniac[okcdataset$q18644=="Yes"] <- 2
unique(okcdataset$q84023)
[1] NA "Yes." "No."
okcdataset$life_without_alcohol <- NA
okcdataset$life_without_alcohol[okcdataset$q84023=="No."] <- 0
okcdataset$life_without_alcohol[okcdataset$q84023=="Yes."] <- 1
okcdataset$alcohol_use <- NA
okcdataset$alcohol_use[okcdataset$q77=="Never"] <- 0
okcdataset$alcohol_use[okcdataset$q77=="Rarely"] <- 1
okcdataset$alcohol_use[okcdataset$q77=="Sometimes"] <- 2
okcdataset$alcohol_use[okcdataset$q77=="Very often"] <- 3
okcdataset$alcohol_inhibit_sexual_tensions <- NA
okcdataset$alcohol_inhibit_sexual_tensions[okcdataset$q12540=="No"] <- 0
okcdataset$alcohol_inhibit_sexual_tensions[okcdataset$q12540=="Yes"] <- 1
okcdataset$blackout_from_alcohol <- NA
okcdataset$blackout_from_alcohol[okcdataset$q417=="No"] <- 0
okcdataset$blackout_from_alcohol[okcdataset$q417=="Yes"] <- 1
unique(okcdataset$q55744)
[1] "This would not be an important factor to me." NA
[3] "No." "Yes."
okcdataset$burning_own_flag_illegality <- NA
okcdataset$burning_own_flag_illegality[okcdataset$q175=="No"] <- 0
okcdataset$burning_own_flag_illegality[okcdataset$q175=="Yes"] <- 1
okcdataset$worth_more_than_strangers <- NA
okcdataset$worth_more_than_strangers[okcdataset$q168==F] <- 0
okcdataset$worth_more_than_strangers[okcdataset$q168==T] <- 1
okcdataset$into_foreign_accents <- NA
okcdataset$into_foreign_accents[okcdataset$q12954=="No"] <- 0
okcdataset$into_foreign_accents[okcdataset$q12954=="Yes"] <- 1
okcdataset$wearing_shirt_with_flag <- NA
okcdataset$wearing_shirt_with_flag[okcdataset$q66067=="Ridiculous/Tacky."] <- 0
okcdataset$wearing_shirt_with_flag[okcdataset$q66067=="Its ok, but not for me."] <- 1
okcdataset$wearing_shirt_with_flag[okcdataset$q66067=="A poor substitute for true patriotism."] <- 2
okcdataset$wearing_shirt_with_flag[okcdataset$q66067=="A fine way to show national pride."] <- 3
okcdataset$partner_same_ethnicity_as_you <- NA
okcdataset$partner_same_ethnicity_as_you[okcdataset$q55744=="No."] <- 0
okcdataset$partner_same_ethnicity_as_you[okcdataset$q55744=="This would not be an important factor to me."] <- 1
okcdataset$partner_same_ethnicity_as_you[okcdataset$q55744=="Yes."] <- 2
unique(okcdataset$q488)
[1] NA "No" "Yes"
okcdataset$money_saving <- NA
okcdataset$money_saving[okcdataset$q19110=="Save some/most of it, but not invest it."] <- 2
okcdataset$money_saving[okcdataset$q19110=="Save some/most of it and invest it."] <- 2
okcdataset$money_saving[okcdataset$q19110=="Save some for an emergency fund, spend the rest."] <- 1
okcdataset$money_saving[okcdataset$q19110=="Spend it all and live paycheck to paycheck."] <- 0
okcdataset$impulse_purchasing <- NA
okcdataset$impulse_purchasing[okcdataset$q488=="No"] <- 0
okcdataset$impulse_purchasing[okcdataset$q488=="Yes"] <- 1
unique(okcdataset$q38051)
[1] NA "Rarely." "Never." "Often."
okcdataset$gambling <- NA
okcdataset$gambling[okcdataset$q35673=="No, I have no interest in gambling."] <- 0
okcdataset$gambling[okcdataset$q35673=="No, it is illegal or against the rules."] <- 0
okcdataset$gambling[okcdataset$q35673=="Yes, but only if really interested in the subject."] <- 1
okcdataset$gambling[okcdataset$q35673=="Yes, I'd gamble on almost anything."] <- 2
okcdataset$gambling2 <- NA
okcdataset$gambling2[okcdataset$q38051=="Never."] <- 0
okcdataset$gambling2[okcdataset$q38051=="Rarely."] <- 1
okcdataset$gambling2[okcdataset$q38051=="Often."] <- 2
unique(okcdataset$q27243)
[1] NA "Often" "Rarely / never"
okcdataset$problem_with_authority <- NA
okcdataset$problem_with_authority[okcdataset$q6377=="No"] <- 0
okcdataset$problem_with_authority[okcdataset$q6377=="Yes"] <- 1
okcdataset$ever_arrested <- NA
okcdataset$ever_arrested[okcdataset$q252=="No"] <- 0
okcdataset$ever_arrested[okcdataset$q252=="Yes"] <- 1
okcdataset$trouble_as_child <- NA
okcdataset$trouble_as_child[okcdataset$q18169=="I never got in trouble."] <- 0
okcdataset$trouble_as_child[okcdataset$q18169=="I never got in trouble because I never got caught."] <- 1
okcdataset$trouble_as_child[okcdataset$q18169=="I got in trouble all the time."] <- 1
okcdataset$fights_with_authority <- NA
okcdataset$fights_with_authority[okcdataset$q27243=="Rarely / never"] <- 0
okcdataset$fights_with_authority[okcdataset$q27243=="Often"] <- 1
unique(okcdataset$q13103)
[1] NA "Small" "I don't have one" "LARGE" "Medium"
okcdataset$receiving_anal_sex <- NA
okcdataset$receiving_anal_sex[okcdataset$q1040=="I don't like it / I don't think I would like it"] <- 0
okcdataset$receiving_anal_sex[okcdataset$q1040=="I like it / I think I might like it"] <- 1
okcdataset$porn_collection <- NA
okcdataset$porn_collection[okcdataset$q13103=="I don't have one"] <- 0
okcdataset$porn_collection[okcdataset$q13103=="Small"] <- 1
okcdataset$porn_collection[okcdataset$q13103=="Medium"] <- 2
okcdataset$porn_collection[okcdataset$q13103=="LARGE"] <- 3
okcdataset$dating_sex_worker <- NA
okcdataset$dating_sex_worker[okcdataset$q18048=="The very notion of this question offends me."] <- NA
okcdataset$dating_sex_worker[okcdataset$q18048=="No way!"] <- 0
okcdataset$dating_sex_worker[okcdataset$q18048=="Depends... what kind of sex worker?"] <- 1
okcdataset$dating_sex_worker[okcdataset$q18048=="Sure. At least they know what they're doing."] <- 2
okcdataset$dating_HIV_positive <- NA
okcdataset$dating_HIV_positive[okcdataset$q16317=="No"] <- 0
okcdataset$dating_HIV_positive[okcdataset$q16317=="I don't know"] <- 1
okcdataset$dating_HIV_positive[okcdataset$q16317=="Yes"] <- 2
okcdataset$dating_trans_person <- NA
okcdataset$dating_trans_person[okcdataset$q546=="No"] <- 0
okcdataset$dating_trans_person[okcdataset$q546=="Yes"] <- 1
okcdataset$dating_herpes_positive <- NA
okcdataset$dating_herpes_positive[okcdataset$q1618=="No"] <- 0
okcdataset$dating_herpes_positive[okcdataset$q1618=="Yes"] <- 1
okcdataset$foot_fetish <- NA
okcdataset$foot_fetish[okcdataset$q19892=="No, I don't have a foot fetish"] <- 0
okcdataset$foot_fetish[okcdataset$q19892=="Yes, I have a foot fetish!"] <- 1
unique(okcdataset$q323)
[1] "Confident" NA "Super confident" "Not really confident"
okcdataset$self_confidence <- NA
okcdataset$self_confidence[okcdataset$q20930=="Below average"] <- 0
okcdataset$self_confidence[okcdataset$q20930=="Average"] <- 1
okcdataset$self_confidence[okcdataset$q20930=="Higher than average"] <- 2
okcdataset$self_confidence[okcdataset$q20930=="Very, very high"] <- 3
okcdataset$comfortable_with_yourself <- NA
okcdataset$comfortable_with_yourself[okcdataset$q20452=="No"] <- 0
okcdataset$comfortable_with_yourself[okcdataset$q20452=="Yes"] <- 1
okcdataset$self_esteem <- NA
okcdataset$self_esteem[okcdataset$q19691=="Low"] <- 0
okcdataset$self_esteem[okcdataset$q19691=="Neither high nor low"] <- 1
okcdataset$self_esteem[okcdataset$q19691=="High"] <- 2
okcdataset$sexual_confidence <- NA
okcdataset$sexual_confidence[okcdataset$q323=="Not really confident"] <- 0
okcdataset$sexual_confidence[okcdataset$q323=="Confident"] <- 1
okcdataset$sexual_confidence[okcdataset$q323=="Super confident"] <- 2
unique(okcdataset$q6347)
[1] NA "No" "Yes"
okcdataset$believe_in_god <- NA
okcdataset$believe_in_god[okcdataset$q210=="No"] <- 0
okcdataset$believe_in_god[okcdataset$q210=="Yes"] <- 1
okcdataset$religion_important <- NA
okcdataset$religion_important[okcdataset$q41=="Not at all important"] <- 0
okcdataset$religion_important[okcdataset$q41=="Not very important"] <- 1
okcdataset$religion_important[okcdataset$q41=="Somewhat important"] <- 2
okcdataset$religion_important[okcdataset$q41=="Extremely important"] <- 3
okcdataset$religious_duty_important <- NA
okcdataset$religious_duty_important[okcdataset$q42=="No"] <- 0
okcdataset$religious_duty_important[okcdataset$q42=="Yes"] <- 1
okcdataset$homosexuality_is_sin <- NA
okcdataset$homosexuality_is_sin[okcdataset$q70=="No"] <- 0
okcdataset$homosexuality_is_sin[okcdataset$q70=="Yes"] <- 1
okcdataset$belief_in_miracles <- NA
okcdataset$belief_in_miracles[okcdataset$q6347=="No"] <- 0
okcdataset$belief_in_miracles[okcdataset$q6347=="Yes"] <- 1
unique(okcdataset$q13106)
[1] NA "I'd consider it." "I'm partially there already."
[4] "No way!" "I am one!"
okcdataset$am_vegeterian <- NA
okcdataset$am_vegeterian[okcdataset$q179268=="No"] <- 0
okcdataset$am_vegeterian[okcdataset$q179268=="Yes"] <- 1
okcdataset$consider_vegetarian <- NA
okcdataset$consider_vegetarian[okcdataset$q13106=="No way!"] <- 0
okcdataset$consider_vegetarian[okcdataset$q13106=="I'd consider it."] <- 1
okcdataset$consider_vegetarian[okcdataset$q13106=="I'm partially there already."] <- 2
okcdataset$consider_vegetarian[okcdataset$q13106=="I am one!"] <- 3
unique(okcdataset$q20976)
[1] "That seems like an average number." NA
[3] "I guess, but It doesn't change how I feel." "No, that's nothing."
[5] "Yes, and it makes me uncomfortable."
okcdataset$too_many_sex_partners <- NA
okcdataset$too_many_sex_partners[okcdataset$q393=="No"] <- 0
okcdataset$too_many_sex_partners[okcdataset$q393=="Yes"] <- 1
okcdataset$multiple_sex_partners_same_time <- NA
okcdataset$multiple_sex_partners_same_time[okcdataset$q1121=="No."] <- 0
okcdataset$multiple_sex_partners_same_time[okcdataset$q1121=="Yes, and I didn't tell at least one of them."] <- 1
okcdataset$multiple_sex_partners_same_time[okcdataset$q1121=="Yes, and they both knew."] <- 1
okcdataset$consider_fwb <- NA
okcdataset$consider_fwb[okcdataset$q42524=="No."] <- 0
okcdataset$consider_fwb[okcdataset$q42524=="Yes."] <- 1
okcdataset$fourteen_sexpart_alot <- NA
okcdataset$fourteen_sexpart_alot[okcdataset$q20976=="Yes, and it makes me uncomfortable."] <- 2
okcdataset$fourteen_sexpart_alot[okcdataset$q20976=="I guess, but It doesn't change how I feel."] <- 2
okcdataset$fourteen_sexpart_alot[okcdataset$q20976=="That seems like an average number."] <- 1
okcdataset$fourteen_sexpart_alot[okcdataset$q20976=="No, that's nothing."] <- 0
unique(okcdataset$q80041)
[1] "Yes" NA "No"
okcdataset$desired_fertility <- NA
okcdataset$desired_fertility[okcdataset$q979=="None"] <- 0
okcdataset$desired_fertility[okcdataset$q979=="1-2"] <- 1.5
okcdataset$desired_fertility[okcdataset$q979=="3-4"] <- 3.5
okcdataset$desired_fertility[okcdataset$q979=="5 or more!"] <- 6
okcdataset$child_birth_disgust <- NA
okcdataset$child_birth_disgust[okcdataset$q478=="No"] <- 0
okcdataset$child_birth_disgust[okcdataset$q478=="Yes"] <- 1
okcdataset$planned_child_names <- NA
okcdataset$planned_child_names[okcdataset$q63010=="No."] <- 0
okcdataset$planned_child_names[okcdataset$q63010=="No, and I'm not planning on any future children."] <- 0
okcdataset$planned_child_names[okcdataset$q63010=="Yes."] <- 1
okcdataset$looking_to_have_kids <- NA
okcdataset$looking_to_have_kids[okcdataset$q80041=="No"] <- 0
okcdataset$looking_to_have_kids[okcdataset$q80041=="Yes"] <- 1
okcdataset$tattoos <- NA
okcdataset$tattoos[okcdataset$q128=="I have no tattoos"] <- 0
okcdataset$tattoos[okcdataset$q128=="I have 1 or more LITTLE tattoos"] <- 1
okcdataset$tattoos[okcdataset$q128=="I have 1 or more BIG tattoos"] <- 1
okcdataset$a_lot_of_black_clothes <- NA
okcdataset$a_lot_of_black_clothes[okcdataset$q476=="No"] <- 0
okcdataset$a_lot_of_black_clothes[okcdataset$q476=="Yes"] <- 1
okcdataset$are_freckles_attractive <- NA
okcdataset$are_freckles_attractive[okcdataset$q30957=="No"] <- 0
okcdataset$are_freckles_attractive[okcdataset$q30957=="I'm Not Sure"] <- 1
okcdataset$are_freckles_attractive[okcdataset$q30957=="It depends where on the body they are"] <- 1
okcdataset$are_freckles_attractive[okcdataset$q30957=="Yes"] <- 2
okcdataset$cosplay <- NA
okcdataset$cosplay[okcdataset$q18711=="No, but it sounds cool"] <- 0
okcdataset$cosplay[okcdataset$q18711=="No, and I wouldn't be interested"] <- 0
okcdataset$cosplay[okcdataset$q18711=="Yes"] <- 1
okcdataset$education_level <- NA
okcdataset$education_level[okcdataset$q274=="Junior High"] <- 0
okcdataset$education_level[okcdataset$q274=="High School"] <- 1
okcdataset$education_level[okcdataset$q274=="College"] <- 2
okcdataset$education_level[okcdataset$q274=="Graduate School"] <- 3
okcdataset$scared_of_commitment <- NA
okcdataset$scared_of_commitment[okcdataset$q4043=="No"] <- 0
okcdataset$scared_of_commitment[okcdataset$q4043=="I'm Not Sure"] <- 1
okcdataset$scared_of_commitment[okcdataset$q4043=="Yes"] <- 2
okcdataset$trauma <- NA
okcdataset$trauma[okcdataset$q1290=="No"] <- 0
okcdataset$trauma[okcdataset$q1290=="Yes"] <- 1
okcdataset$shyness <- NA
okcdataset$shyness[okcdataset$q1712=="Not at all shy"] <- 0
okcdataset$shyness[okcdataset$q1712=="Not really shy"] <- 1
okcdataset$shyness[okcdataset$q1712=="Kind of shy"] <- 2
okcdataset$shyness[okcdataset$q1712=="Very shy"] <- 3
okcdataset$left_wing <- NA
okcdataset$left_wing[okcdataset$q212813=="Conservative / Right-wing"] <- 0
okcdataset$left_wing[okcdataset$q212813=="Other"] <- 1
okcdataset$left_wing[okcdataset$q212813=="Centrist"] <- 1
okcdataset$left_wing[okcdataset$q212813=="Liberal / Left-wing"] <- 2
unique(okcdataset$sexuality)
[1] Straight Bisexual Other <NA> Gay
Levels: Straight Bisexual Gay Other
okcdataset$homosexuality <- NA
okcdataset$homosexuality[okcdataset$sexuality=="Other"] <- NA
okcdataset$homosexuality[okcdataset$sexuality=="Straight"] <- 0
okcdataset$homosexuality[okcdataset$sexuality=="Bisexual"] <- 1
okcdataset$homosexuality[okcdataset$sexuality=="Gay"] <- 2
unique(okcdataset$q18538)
[1] NA "It wasn't a 'phase'!"
[3] "Yes, and I'm a little embarassed about it." "Yes, and I'm proud of it."
[5] "No."
okcdataset$goth <- NA
okcdataset$goth[okcdataset$q18538=="No."] <- 0
okcdataset$goth[okcdataset$q18538=="Yes, and I'm a little embarassed about it."] <- 1
okcdataset$goth[okcdataset$q18538=="Yes, and I'm proud of it."] <- 2
okcdataset$goth[okcdataset$q18538=="It wasn't a 'phase'!"] <- 3
vars <- colnames(okcdataset[, 2630:2712])
Correlation between having identified as goth and each variable:
dafvars <- data.frame(r = rep(0, length(vars)), se = rep(0, length(vars)), varnames = vars)
for(i in 1:length(vars)) {
curvar <- vars[i]
idx <- complete.cases(okcdataset[c("goth", curvar)])
if(mean(idx) > 0 & !curvar=='goth') {
goth_clean <- okcdataset$goth[idx]
curvar_clean <- okcdataset[[curvar]][idx]
goth_factor <- factor(goth_clean, ordered = TRUE)
curvar_factor <- factor(curvar_clean, ordered = TRUE)
custard <- polychor(goth_factor, curvar_factor, std.err=T)
dafvars$r[i] <- custard$rho
dafvars$se[i] <- sqrt(custard$var)
}
else {
dafvars$r[i] <- NA
dafvars$se[i] <- NA
}
}
Warning: NaNs producedWarning: NaNs producedWarning: NaNs producedWarning: NaNs producedWarning: NaNs producedWarning: NaNs producedWarning: NaNs producedWarning: NaNs producedWarning: NaNs producedWarning: NaNs produced
dafvars
dafvars$uce <- dafvars$r + dafvars$se*1.96
dafvars$lce <- dafvars$r - dafvars$se*1.96
dafvars %>% filter(varnames=='much_depression')
###############################
daf_clean <- na.omit(dafvars)
# Sort by correlation 'r' from highest to lowest
daf_sorted <- daf_clean[order(-daf_clean$r), ]
# Convert varnames to a factor with levels in desired order (for ggplot)
daf_sorted$varnames <- factor(daf_sorted$varnames, levels = daf_sorted$varnames)
# Plot
p <- ggplot(daf_sorted, aes(x = varnames, y = r)) +
geom_point(size = 3, color = "steelblue") + # Dots for correlations
geom_errorbar(aes(ymin = lce, ymax = uce), width = 0.2, color = "black") +
coord_flip() +
labs(title = "",
x = "Variables",
y = "Correlation with having been goth") +
theme_minimal() +
theme(
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
legend.position = "right",
plot.background = element_rect(color = "white"),
panel.background = element_blank()
)
p
Correlation matrix
okcdataset$mental_illness <- getpc(okcdataset %>% select(seen_therapist, much_depression, emotional_diversity, happy_with_life, trauma), dofa=F, fillmissing=F, normalizeit=T)
okcdataset$sleep_issues <- getpc(okcdataset %>% select(sleep_time, insomniac, sleeping_during_the_day, sleep_time, sleep_trouble), dofa=F, fillmissing=F, normalizeit=T)
Warning: Matrix was not positive definite, smoothing was doneWarning: The matrix is not positive semi-definite, scores found from Structure loadings
okcdataset$alcohol_usage <- getpc(okcdataset %>% select(life_without_alcohol, alcohol_use, alcohol_inhibit_sexual_tensions, blackout_from_alcohol), dofa=F, fillmissing=F, normalizeit=T)
okcdataset$nationalism <- getpc(okcdataset %>% select(burning_own_flag_illegality, worth_more_than_strangers, into_foreign_accents, wearing_shirt_with_flag, partner_same_ethnicity_as_you), dofa=F, fillmissing=F, normalizeit=T)
okcdataset$hygiene <- getpc(okcdataset %>% select(brushing_teeth, showering, mess_cleaning, bathroom_cleanliness, messiness), dofa=F, fillmissing=F, normalizeit=T)
okcdataset$anger <- getpc(okcdataset %>% select(angry_video_games, general_angry, quietly_angry, throwing_objects), dofa=F, fillmissing=F, normalizeit=T)
okcdataset$physical_health <- getpc(okcdataset %>% select(six_pack_importance, physical_shape, physical_strength, good_posture), dofa=F, fillmissing=F, normalizeit=T)
okcdataset$gambling_tendency <- getpc(okcdataset %>% select(gambling, gambling2), dofa=F, fillmissing=F, normalizeit=T)
okcdataset$rebelliousness <- getpc(okcdataset %>% select(problem_with_authority, ever_arrested, trouble_as_child, fights_with_authority), dofa=F, fillmissing=F, normalizeit=T)
okcdataset$sexual_unc <- getpc(okcdataset %>% select(receiving_anal_sex, porn_collection, dating_sex_worker, dating_HIV_positive, dating_trans_person, dating_herpes_positive, foot_fetish), dofa=F, fillmissing=F, normalizeit=T)
okcdataset$confidence <- getpc(okcdataset %>% select(self_confidence, comfortable_with_yourself, self_esteem, sexual_confidence), dofa=F, fillmissing=F, normalizeit=T)
okcdataset$religion <- getpc(okcdataset %>% select(believe_in_god, religion_important, religious_duty_important, homosexuality_is_sin, belief_in_miracles), dofa=F, fillmissing=F, normalizeit=T)
okcdataset$drug_usage <- getpc(okcdataset %>% select(psychedelics_use, drug_use, marijuana), dofa=F, fillmissing=F, normalizeit=T)
okcdataset$sexual_liberalism <- -getpc(okcdataset %>% select(too_many_sex_partners, multiple_sex_partners_same_time, consider_fwb, fourteen_sexpart_alot), dofa=F, fillmissing=F, normalizeit=T)
okcdataset$finance_saving <- getpc(okcdataset %>% select(impulse_purchasing, money_saving), dofa=F, fillmissing=F, normalizeit=T)
okcdataset$polygamy <- getpc(okcdataset %>% select(open_to_poly, open_to_dating_poly, pref_poly), dofa=F, fillmissing=F, normalizeit=T)
okcdataset$vegetarianism <- getpc(okcdataset %>% select(am_vegeterian, consider_vegetarian), dofa=F, fillmissing=F, normalizeit=T)
okcdataset$natalism <- getpc(okcdataset %>% select(desired_fertility, planned_child_names, looking_to_have_kids), dofa=F, fillmissing=F, normalizeit=T)
correlation_matrix(okcdataset %>% select(mental_illness, drug_usage, sleep_issues, alcohol_usage, nationalism, hygiene, anger, physical_health, gambling_tendency, rebelliousness, sexual_unc, confidence, religion, sexual_liberalism, finance_saving, polygamy, vegetarianism, natalism, goth))
mental_illness drug_usage sleep_issues alcohol_usage nationalism hygiene
mental_illness "NA" "0.137 ***" "0.278 ***" "-0.01 " "-0.132 **" "-0.278 ***"
drug_usage "0.137 ***" "NA" "0.158 ***" "0.388 ***" "-0.2 ***" "-0.146 ***"
sleep_issues "0.278 ***" "0.158 ***" "NA" "0.085 ***" "-0.117 ***" "-0.206 ***"
alcohol_usage "-0.01 " "0.388 ***" "0.085 ***" "NA" "-0.087 ***" "-0.068 ***"
nationalism "-0.132 **" "-0.2 ***" "-0.117 ***" "-0.087 ***" "NA" "0.133 ***"
hygiene "-0.278 ***" "-0.146 ***" "-0.206 ***" "-0.068 ***" "0.133 ***" "NA"
anger "0.402 ***" "0.095 ***" "0.151 ***" "0.139 ***" "0.076 **" "-0.187 ***"
physical_health "-0.318 ***" "-0.029 " "-0.162 ***" "0.032 " "0.044 " "0.331 ***"
gambling_tendency "-0.027 " "0.054 *" "0.006 " "0.217 ***" "0.138 ***" "-0.072 **"
rebelliousness "0.148 ***" "0.411 ***" "0.19 ***" "0.203 ***" "-0.154 ***" "-0.121 ***"
sexual_unc "0.2 ***" "0.271 ***" "0.182 ***" "0.028 " "-0.222 ***" "-0.24 ***"
confidence "-0.553 ***" "0.058 ***" "-0.138 ***" "0.021 " "0.053 *" "0.286 ***"
religion "-0.101 *" "-0.215 ***" "-0.093 ***" "-0.185 ***" "0.435 ***" "0.1 ***"
sexual_liberalism "-0.005 " "0.401 ***" "0.118 ***" "0.272 ***" "-0.216 ***" "-0.081 ***"
finance_saving "-0.049 " "-0.133 ***" "-0.121 **" "-0.216 ***" "-0.025 " "0.044 "
polygamy "0.187 ***" "0.304 ***" "0.178 ***" "0.11 ***" "-0.187 ***" "-0.19 ***"
vegetarianism "0.063 " "0.127 ***" "-0.011 " "-0.029 *" "-0.227 ***" "0.007 "
natalism "-0.123 ***" "-0.144 ***" "-0.051 ***" "-0.037 ***" "0.148 ***" "0.034 **"
goth "0.194 ***" "0.168 ***" "0.173 ***" "0.046 ***" "-0.1 ***" "-0.088 ***"
anger physical_health gambling_tendency rebelliousness sexual_unc
mental_illness "0.402 ***" "-0.318 ***" "-0.027 " "0.148 ***" "0.2 ***"
drug_usage "0.095 ***" "-0.029 " "0.054 *" "0.411 ***" "0.271 ***"
sleep_issues "0.151 ***" "-0.162 ***" "0.006 " "0.19 ***" "0.182 ***"
alcohol_usage "0.139 ***" "0.032 " "0.217 ***" "0.203 ***" "0.028 "
nationalism "0.076 **" "0.044 " "0.138 ***" "-0.154 ***" "-0.222 ***"
hygiene "-0.187 ***" "0.331 ***" "-0.072 **" "-0.121 ***" "-0.24 ***"
anger "NA" "-0.134 ***" "0.093 ***" "0.215 ***" "0.087 ***"
physical_health "-0.134 ***" "NA" "-0.013 " "0.032 " "-0.175 ***"
gambling_tendency "0.093 ***" "-0.013 " "NA" "0.034 " "-0.031 "
rebelliousness "0.215 ***" "0.032 " "0.034 " "NA" "0.186 ***"
sexual_unc "0.087 ***" "-0.175 ***" "-0.031 " "0.186 ***" "NA"
confidence "-0.248 ***" "0.449 ***" "0.074 **" "0.058 ***" "-0.099 ***"
religion "0.014 " "0.126 ***" "0.079 **" "-0.154 ***" "-0.212 ***"
sexual_liberalism "0.028 " "0.065 **" "0.052 *" "0.254 ***" "0.382 ***"
finance_saving "-0.196 ***" "0.129 **" "-0.186 ***" "-0.074 " "-0.082 "
polygamy "0.06 ***" "-0.014 " "0.008 " "0.258 ***" "0.489 ***"
vegetarianism "-0.032 *" "0.004 " "-0.135 ***" "0.11 ***" "0.163 ***"
natalism "0.04 ***" "0.05 *" "0.105 ***" "-0.062 ***" "-0.15 ***"
goth "0.126 ***" "-0.113 ***" "-0.044 " "0.13 ***" "0.212 ***"
confidence religion sexual_liberalism finance_saving polygamy
mental_illness "-0.553 ***" "-0.101 *" "-0.005 " "-0.049 " "0.187 ***"
drug_usage "0.058 ***" "-0.215 ***" "0.401 ***" "-0.133 ***" "0.304 ***"
sleep_issues "-0.138 ***" "-0.093 ***" "0.118 ***" "-0.121 **" "0.178 ***"
alcohol_usage "0.021 " "-0.185 ***" "0.272 ***" "-0.216 ***" "0.11 ***"
nationalism "0.053 *" "0.435 ***" "-0.216 ***" "-0.025 " "-0.187 ***"
hygiene "0.286 ***" "0.1 ***" "-0.081 ***" "0.044 " "-0.19 ***"
anger "-0.248 ***" "0.014 " "0.028 " "-0.196 ***" "0.06 ***"
physical_health "0.449 ***" "0.126 ***" "0.065 **" "0.129 **" "-0.014 "
gambling_tendency "0.074 **" "0.079 **" "0.052 *" "-0.186 ***" "0.008 "
rebelliousness "0.058 ***" "-0.154 ***" "0.254 ***" "-0.074 " "0.258 ***"
sexual_unc "-0.099 ***" "-0.212 ***" "0.382 ***" "-0.082 " "0.489 ***"
confidence "NA" "0.058 ***" "0.206 ***" "0.126 **" "0.064 ***"
religion "0.058 ***" "NA" "-0.298 ***" "-0.123 *" "-0.214 ***"
sexual_liberalism "0.206 ***" "-0.298 ***" "NA" "-0.049 " "0.619 ***"
finance_saving "0.126 **" "-0.123 *" "-0.049 " "NA" "-0.002 "
polygamy "0.064 ***" "-0.214 ***" "0.619 ***" "-0.002 " "NA"
vegetarianism "0.005 " "-0.088 ***" "0.086 ***" "-0.003 " "0.091 ***"
natalism "0.024 *" "0.312 ***" "-0.221 ***" "-0.033 " "-0.215 ***"
goth "-0.082 ***" "-0.031 **" "0.13 ***" "-0.097 *" "0.179 ***"
vegetarianism natalism goth
mental_illness "0.063 " "-0.123 ***" "0.194 ***"
drug_usage "0.127 ***" "-0.144 ***" "0.168 ***"
sleep_issues "-0.011 " "-0.051 ***" "0.173 ***"
alcohol_usage "-0.029 *" "-0.037 ***" "0.046 ***"
nationalism "-0.227 ***" "0.148 ***" "-0.1 ***"
hygiene "0.007 " "0.034 **" "-0.088 ***"
anger "-0.032 *" "0.04 ***" "0.126 ***"
physical_health "0.004 " "0.05 *" "-0.113 ***"
gambling_tendency "-0.135 ***" "0.105 ***" "-0.044 "
rebelliousness "0.11 ***" "-0.062 ***" "0.13 ***"
sexual_unc "0.163 ***" "-0.15 ***" "0.212 ***"
confidence "0.005 " "0.024 *" "-0.082 ***"
religion "-0.088 ***" "0.312 ***" "-0.031 **"
sexual_liberalism "0.086 ***" "-0.221 ***" "0.13 ***"
finance_saving "-0.003 " "-0.033 " "-0.097 *"
polygamy "0.091 ***" "-0.215 ***" "0.179 ***"
vegetarianism "NA" "-0.071 ***" "0.101 ***"
natalism "-0.071 ***" "NA" "-0.039 ***"
goth "0.101 ***" "-0.039 ***" "NA"
Visualized graphically:
data_selected <- okcdataset %>% select(mental_illness, drug_usage, sleep_issues, alcohol_usage, nationalism, hygiene, anger, physical_health, gambling_tendency, rebelliousness, sexual_unc, confidence, religion, sexual_liberalism, finance_saving, polygamy, vegetarianism, natalism, goth)
cor_matrix <- cor(data_selected, use = "pairwise.complete.obs")
cor_long <- as.data.frame(as.table(cor_matrix))
colnames(cor_long) <- c("Var1", "Var2", "Correlation")
cor_long$label <- sprintf("%.2f", cor_long$Correlation)
p <- ggplot(cor_long, aes(x = Var1, y = Var2, fill = Correlation)) +
geom_tile(color = "white") +
geom_text(aes(label = label), color = "black", size = 3) +
scale_fill_gradient2(
low = "blue", mid = "white", high = "red", midpoint = 0,
limit = c(-1, 1), space = "Lab", name = "r"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
coord_fixed() +
labs(title = "", x = "", y = "")
p
Factor analysis. Fairly simple.
f <- fa(okcdataset[, 2630:2712], nfactors=1)
In smc, smcs < 0 were set to .0
In smc, smcs < 0 were set to .0
In smc, smcs < 0 were set to .0
Warning: Matrix was not positive definite, smoothing was doneWarning: pnchisq(x=2.49074e+06, f=3320, theta=2.62748e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.49611e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.49299e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.4915e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47742e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47707e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47373e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47206e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47201e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47199e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47181e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47171e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47171e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47171e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.4717e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47169e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47169e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47169e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47169e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47169e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47169e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47169e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47169e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47169e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47169e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47169e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47169e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47169e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47169e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47169e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47169e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.62748e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.54948e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47537e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47352e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47176e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47172e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.4717e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47169e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47169e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47169e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47169e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47169e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47169e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47169e+06, ..): not converged in 1000000 iter.Warning: pnchisq(x=2.49074e+06, f=3320, theta=2.47169e+06, ..): not converged in 1000000 iter.In factor.scores, the correlation matrix is singular, the pseudo inverse is used
print(f)
Factor Analysis using method = minres
Call: fa(r = okcdataset[, 2630:2712], nfactors = 1)
Standardized loadings (pattern matrix) based upon correlation matrix
MR1
SS loadings 7.21
Proportion Var 0.09
Mean item complexity = 1
Test of the hypothesis that 1 factor is sufficient.
df null model = 3403 with the objective function = 42.16 with Chi Square = 2881310
df of the model are 3320 and the objective function was 36.45
The root mean square of the residuals (RMSR) is 0.12
The df corrected root mean square of the residuals is 0.12
The harmonic n.obs is 6927 with the empirical chi square 701794.5 with prob < 0
The total n.obs was 68371 with Likelihood Chi Square = 2490738 with prob < 0
Tucker Lewis Index of factoring reliability = 0.114
RMSEA index = 0.105 and the 90 % confidence intervals are 0.104 NA
BIC = 2453777
Fit based upon off diagonal values = 0.54
Measures of factor score adequacy
MR1
Correlation of (regression) scores with factors 0.95
Multiple R square of scores with factors 0.90
Minimum correlation of possible factor scores 0.80
Linear regression models that test whether the association between goth identity and various variables are due to demographic confounding.
okcdataset$goth_stand <- normalise(okcdataset$goth)
okcdataset$mental_stand <- normalise(okcdataset$mental_illness)
okcdataset$homo_stand <- normalise(okcdataset$homosexuality)
okcdataset$sexual_unc_stand <- normalise(okcdataset$sexual_unc)
okcdataset$income_stand <- normalise(okcdataset$numincome)
okcdataset$age_stand <- normalise(okcdataset$d_age)
lr <- lm(data=okcdataset, mental_illness ~ goth_stand)
summary(lr)
Call:
lm(formula = mental_illness ~ goth_stand, data = okcdataset)
Residuals:
Min 1Q Median 3Q Max
-1.76478 -0.71032 -0.08174 0.39898 2.97736
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.008065 0.028996 0.278 0.781
goth_stand 0.203713 0.030832 6.607 0.0000000000605 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9637 on 1116 degrees of freedom
(67253 observations deleted due to missingness)
Multiple R-squared: 0.03765, Adjusted R-squared: 0.03678
F-statistic: 43.66 on 1 and 1116 DF, p-value: 0.00000000006048
lr <- lm(data=okcdataset, mental_illness ~ goth_stand + age_stand + sex + race + income_stand)
summary(lr)
Call:
lm(formula = mental_illness ~ goth_stand + age_stand + sex +
race + income_stand, data = okcdataset)
Residuals:
Min 1Q Median 3Q Max
-1.6649 -0.7014 -0.1060 0.4382 2.7281
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.09830 0.11432 0.860 0.390468
goth_stand 0.18931 0.05372 3.524 0.000482 ***
age_stand -0.02517 0.04958 -0.508 0.611952
sexMale -0.10741 0.12938 -0.830 0.407021
raceAsian 0.29912 0.34574 0.865 0.387548
raceBlack -0.52444 0.30480 -1.721 0.086218 .
raceHispanic / Latin -0.59912 0.27863 -2.150 0.032228 *
raceIndian 2.16417 0.94936 2.280 0.023238 *
raceOther 0.50448 0.36587 1.379 0.168826
raceSelected over 1 race 0.06093 0.15288 0.399 0.690459
income_stand -0.08880 0.05484 -1.619 0.106297
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9447 on 347 degrees of freedom
(68013 observations deleted due to missingness)
Multiple R-squared: 0.09553, Adjusted R-squared: 0.06947
F-statistic: 3.665 on 10 and 347 DF, p-value: 0.0001136
lr <- lm(data=okcdataset, sexual_unc_stand ~ goth_stand)
summary(lr)
Call:
lm(formula = sexual_unc_stand ~ goth_stand, data = okcdataset)
Residuals:
Min 1Q Median 3Q Max
-1.8398 -0.6841 -0.2023 0.5602 3.6226
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.02414 0.01562 1.546 0.122
goth_stand 0.21712 0.01586 13.691 <0.0000000000000002 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9858 on 3990 degrees of freedom
(64379 observations deleted due to missingness)
Multiple R-squared: 0.04487, Adjusted R-squared: 0.04463
F-statistic: 187.4 on 1 and 3990 DF, p-value: < 0.00000000000000022
lr <- lm(data=okcdataset, sexual_unc_stand ~ goth_stand + age_stand + sex + race + income_stand)
summary(lr)
Call:
lm(formula = sexual_unc_stand ~ goth_stand + age_stand + sex +
race + income_stand, data = okcdataset)
Residuals:
Min 1Q Median 3Q Max
-1.9375 -0.7329 -0.2215 0.5791 3.7706
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.08614 0.06446 1.336 0.18164
goth_stand 0.19215 0.02663 7.214 0.000000000000894 ***
age_stand 0.02739 0.02813 0.973 0.33048
sexMale -0.04802 0.07121 -0.674 0.50023
raceAsian -0.53567 0.29168 -1.836 0.06650 .
raceBlack 0.03135 0.15463 0.203 0.83938
raceHispanic / Latin -0.20170 0.14974 -1.347 0.17819
raceIndian -0.16997 0.58084 -0.293 0.76985
raceMiddle Eastern 0.61235 0.50324 1.217 0.22388
raceOther 0.07517 0.19920 0.377 0.70596
racePacific Islander -0.92679 1.00645 -0.921 0.35729
raceSelected over 1 race 0.05904 0.08090 0.730 0.46563
income_stand -0.08121 0.02661 -3.052 0.00232 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.003 on 1371 degrees of freedom
(66987 observations deleted due to missingness)
Multiple R-squared: 0.05431, Adjusted R-squared: 0.04603
F-statistic: 6.561 on 12 and 1371 DF, p-value: 0.00000000001652
lr <- lm(data=okcdataset, homo_stand ~ goth_stand)
summary(lr)
Call:
lm(formula = homo_stand ~ goth_stand, data = okcdataset)
Residuals:
Min 1Q Median 3Q Max
-0.6957 -0.3558 -0.3558 -0.3558 3.6555
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.053129 0.007238 7.34 0.000000000000221 ***
goth_stand 0.100109 0.007365 13.59 < 0.0000000000000002 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.065 on 21679 degrees of freedom
(46690 observations deleted due to missingness)
Multiple R-squared: 0.008451, Adjusted R-squared: 0.008405
F-statistic: 184.8 on 1 and 21679 DF, p-value: < 0.00000000000000022
lr <- lm(data=okcdataset, homo_stand ~ goth_stand + age_stand + sex + race + income_stand)
summary(lr)
Call:
lm(formula = homo_stand ~ goth_stand + age_stand + sex + race +
income_stand, data = okcdataset)
Residuals:
Min 1Q Median 3Q Max
-1.1890 -0.4255 -0.2708 -0.1314 4.6393
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.17256 0.02659 6.491 0.00000000009191845 ***
goth_stand 0.07520 0.01222 6.156 0.00000000078872816 ***
age_stand -0.09936 0.01333 -7.454 0.00000000000010223 ***
sexMale -0.23627 0.02989 -7.904 0.00000000000000316 ***
raceAsian 0.12864 0.09987 1.288 0.197801
raceBlack 0.24845 0.06765 3.672 0.000242 ***
raceHispanic / Latin 0.10336 0.06642 1.556 0.119722
raceIndian -0.12225 0.20251 -0.604 0.546068
raceMiddle Eastern 0.70162 0.29259 2.398 0.016513 *
raceOther 0.28544 0.08475 3.368 0.000761 ***
racePacific Islander 0.76735 0.34264 2.239 0.025158 *
raceSelected over 1 race 0.09102 0.04032 2.258 0.023999 *
income_stand -0.05210 0.01452 -3.589 0.000334 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9681 on 6434 degrees of freedom
(61924 observations deleted due to missingness)
Multiple R-squared: 0.04768, Adjusted R-squared: 0.04591
F-statistic: 26.85 on 12 and 6434 DF, p-value: < 0.00000000000000022
Chart that annotates how often people chose each answer to the goth-id question.
ner_percent <- okcdataset %>%
filter(!is.na(sex) & !is.na(q18538)) %>%
group_by(sex, q18538) %>%
summarise(n = n(), .groups = "drop") %>%
group_by(sex) %>%
mutate(percent = n / sum(n) * 100) %>%
ungroup()
ner_percent <- ner_percent %>%
mutate(q18538 = fct_reorder(q18538, percent, .fun = sum, .desc = TRUE))
# Plot
as <- ggplot(ner_percent, aes(x = q18538, y = percent / 100, fill = q18538)) +
geom_col() +
facet_wrap(~sex) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
labs(
title = "",
subtitle = "",
caption = "",
x = "Did you have a goth phase?'",
y = "%",
fill = "Response"
) +
theme_minimal() +
theme(
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
legend.position = "none")
plot(as)