library(pacman)
library(tidyverse)
library(dplyr)
library(skimr)
library(haven)
library(devtools)
library(sna) #tools for social network analysis
library(network) # relational data
library(igraph) #network analysis and visualizations
library(EGAnet)
library(AIGENIE)
library(reticulate) #interface to Python
library(patchwork) # plot composer
library(ggplot2)
#setting up a conda environment
reticulate::install_miniconda()
G1;H1;Errorh: Miniconda is already installed at path "~/Library/r-miniconda-arm64".
- Use `reticulate::install_miniconda(force = TRUE)` to overwrite the previous installation.
g
#reticulate::py_install("groq", envname = PBSM_environment)
#reticulate::py_install("openai == 0.28", envname = PBSM_environment)
#
#devtools::install_github("laralee/AI-GENIE")
# Add an OpenAI API key - baggo gmail
key <- "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"
# The persona we want the LLM to embody
# few shot examples help the model learn patterns from limited data, stay on task / maintain a consistent style, and reduces hallucinations and irrelevant content
system.role <- paste("You are an expert scale developer and substance use researcher who",
"writes high-quality, succinct items based on a new",
"model that highlights the use of protective",
"behavioral strategies for marijuana use")
# Item attributes
# capped at 250 characters, boooooo
aspects.of.pbsm.traits <- list(
`Limiting use quantities` = c("Intentionally measure out smaller use quantities per use session",
"Avoid additional use past a personal limit",
"Select low-potency products to avoid overuse"),
`Limiting use frequencies` = c(
"Designates specific days and contexts for use",
"Avoids habitual use",
"Takes breaks from using to avoid dependency",
"Intentionally limits the amount of marijuana they can access at one time"),
`Selecting appropriate use contexts` = c(
"Avoid use before situations requiring alertness",
"Only use in contexts where they feel physically safe",
"Only use in contexts where they feel socially secure",
"Prioritizes use in contexts they believe cannabis will enhance"),
`Limiting subjective high` = c(
"Wait between doses to gauge effects before continuing use",
"Stop once they feel a light or manageable high",
"Select lower-potency products to avoid getting too high",
"Avoid or limit co-use of marijuana and other substances"))
# Examples of what you would consider to be high-quality, well-written items
item.examples <- c("Limit use to weekends",
"Only use when you know you have nothing important to do for the rest of the day/night",
"Avoid using marijuana out of boredom",
"Only use before special events or on special occasions",
"Only use enough to achieve desired buzz or to avoid getting “too high”",
"Avoid driving a car after using",
"Avoid using marijuana and alcohol at the same time",
"Buy less marijuana at one time so you use less",
"Take breaks from marijuana if it feels like you are using too frequently")
#Name the field or specialty
sub.domain <- "Cannabis PBS"
# Name the Inventory being created
scale.title <- "Facets of PBSM"
# Run AI-GENIE to generate, validate, and redundancy-check an item pool for your new scale.
llm.PBSM.inventory.results <- AIGENIE(
EGA.model = "glasso",
system.role = system.role,
item.attributes = aspects.of.pbsm.traits,
openai.API = key,
item.examples = item.examples,
sub.domain = sub.domain,
scale.title = scale.title,
plot.stability = TRUE,
model = "gpt4o",
target.N = c(20,20,20,20)
)
Generating items for limiting use quantities ...
Items generated for limiting use quantities: 6
Items generated for limiting use quantities: 12
Items generated for limiting use quantities: 18
Items generated for limiting use quantities: 24
Generating items for limiting use frequencies ...
Items generated for limiting use frequencies: 8
Items generated for limiting use frequencies: 16
Items generated for limiting use frequencies: 24
Generating items for selecting appropriate use contexts ...
Items generated for selecting appropriate use contexts: 8
Items generated for selecting appropriate use contexts: 16
Items generated for selecting appropriate use contexts: 24
Generating items for limiting subjective high ...
Items generated for limiting subjective high: 8
Items generated for limiting subjective high: 16
Items generated for limiting subjective high: 24
All items generated. Final sample size: 96
Starting AI-GENIE Reduction Analysis for limiting subjective high items... embeddings obtained...
Computing EGA steps using glasso... UVA complete...
bootEGA sweeps complete... Done.
Starting AI-GENIE Reduction Analysis for limiting use frequencies items... embeddings obtained...
Computing EGA steps using glasso... UVA complete...
bootEGA sweeps complete... Done.
Starting AI-GENIE Reduction Analysis for limiting use quantities items... embeddings obtained...
Computing EGA steps using glasso... UVA complete...
bootEGA sweeps complete... Done.
Starting AI-GENIE Reduction Analysis for selecting appropriate use contexts items... embeddings obtained...
Computing EGA steps using glasso... UVA complete...
bootEGA sweeps complete... Done.
Item-level analysis complete.
Checking quality of item reduction on the sample overall...
Optimizing based on the final EGA network...
Final EGA network optimized. Now building initial EGA network based on optimal settings...
AI-Genie Results
----------------
Limiting Use Quantities Items
EGA Model: glasso Embeddings Used: full Staring N: 24 Final N: 15
Initial NMI: 81.19 Final NMI: 100
Limiting Use Frequencies Items
EGA Model: glasso Embeddings Used: full Staring N: 24 Final N: 18
Initial NMI: 91.6 Final NMI: 76.07
Selecting Appropriate Use Contexts Items
EGA Model: glasso Embeddings Used: full Staring N: 24 Final N: 17
Initial NMI: 85.71 Final NMI: 82.46
Limiting Subjective High Items
EGA Model: glasso Embeddings Used: sparse Staring N: 24 Final N: 23
Initial NMI: 100 Final NMI: 100
Overall Sample Results
EGA Model: glasso Embeddings Used: full Staring N: 96 Final N: 73
Initial NMI: 59.91 Final NMI: 73.18















# Start with item embeddings
#. Resample embeddings with replacement (or simulate from their covariance).
#. For each sample (e.g., 100 iterations), run: 1) Network estimation (TMFG or GLASSO-- glasso seems to work better for the sets I have run), 2 Community detection (Walktrap), 3) Compare the result to the original structure.
# Track how often each item stays in the same community. Items that wander too often are considered unstable. Those with stability < 0.75 are removed.
View(llm.PBSM.inventory.results$overall_sample$main_result)
LS0tCnRpdGxlOiAiQUktR0VOSUUgUEJTIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgoKCmBgYHtyfQpsaWJyYXJ5KHBhY21hbikKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkoc2tpbXIpCmxpYnJhcnkoaGF2ZW4pIApsaWJyYXJ5KGRldnRvb2xzKSAKbGlicmFyeShzbmEpICN0b29scyBmb3Igc29jaWFsIG5ldHdvcmsgYW5hbHlzaXMgCmxpYnJhcnkobmV0d29yaykgIyByZWxhdGlvbmFsIGRhdGEgCmxpYnJhcnkoaWdyYXBoKSAjbmV0d29yayBhbmFseXNpcyBhbmQgdmlzdWFsaXphdGlvbnMgCmxpYnJhcnkoRUdBbmV0KQpsaWJyYXJ5KEFJR0VOSUUpCmxpYnJhcnkocmV0aWN1bGF0ZSkgI2ludGVyZmFjZSB0byBQeXRob24gCmxpYnJhcnkocGF0Y2h3b3JrKSAjIHBsb3QgY29tcG9zZXIgCmxpYnJhcnkoZ2dwbG90MikKCmBgYAoKCmBgYHtyfQojc2V0dGluZyB1cCBhIGNvbmRhIGVudmlyb25tZW50IAojcmV0aWN1bGF0ZTo6aW5zdGFsbF9taW5pY29uZGEoKSAKIwojUEJTTV9lbnZpcm9ubWVudCA8LSAiQUlHRU5JRV9weXRob25fZW52IgojcmV0aWN1bGF0ZTo6Y29uZGFfY3JlYXRlKGVudm5hbWUgPSBQQlNNX2Vudmlyb25tZW50LCBweXRob25fdmVyc2lvbiA9IDMuMTEpCiMKI3JldGljdWxhdGU6OnVzZV9jb25kYWVudihQQlNNX2Vudmlyb25tZW50LCByZXF1aXJlZCA9IFRSVUUpCgpgYGAKCmBgYHtyfQojcmV0aWN1bGF0ZTo6cHlfaW5zdGFsbCgiZ3JvcSIsIGVudm5hbWUgPSBQQlNNX2Vudmlyb25tZW50KQojcmV0aWN1bGF0ZTo6cHlfaW5zdGFsbCgib3BlbmFpID09IDAuMjgiLCBlbnZuYW1lID0gUEJTTV9lbnZpcm9ubWVudCkKIwojZGV2dG9vbHM6Omluc3RhbGxfZ2l0aHViKCJsYXJhbGVlL0FJLUdFTklFIikKYGBgCgoKYGBge3J9CgojIEFkZCBhbiBPcGVuQUkgQVBJIGtleSAtIGJhZ2dvIGdtYWlsIAprZXkgPC0gInNrLXByb2otcVl2V3B1U2ZyTnpISlUwOXNPODVVWW5wekl2Vk80cG5tQzB5RnZpVmEzMmxweFFuSFVFUUFEaUtudmgxTmg4VnQ3YWhMaWtLXzZUM0JsYmtGSnFLUm00UnE4VERpdW96N0tWZlkydU9sbTQ0UUhBZHN3RlhUWFkxdjJ1cUg5SU43VHhFbmJadk5SSXBReUp3NmhqaExWUnNIX01BIgoKIyBUaGUgcGVyc29uYSB3ZSB3YW50IHRoZSBMTE0gdG8gZW1ib2R5CiMgZmV3IHNob3QgZXhhbXBsZXMgaGVscCB0aGUgbW9kZWwgbGVhcm4gcGF0dGVybnMgZnJvbSBsaW1pdGVkIGRhdGEsIHN0YXkgb24gdGFzayAvIG1haW50YWluIGEgY29uc2lzdGVudCBzdHlsZSwgYW5kIHJlZHVjZXMgaGFsbHVjaW5hdGlvbnMgYW5kIGlycmVsZXZhbnQgY29udGVudCAKCnN5c3RlbS5yb2xlIDwtIHBhc3RlKCJZb3UgYXJlIGFuIGV4cGVydCBzY2FsZSBkZXZlbG9wZXIgYW5kIHN1YnN0YW5jZSB1c2UgcmVzZWFyY2hlciB3aG8iLAogICAgICAgICAgICAgICAgICAgICAid3JpdGVzIGhpZ2gtcXVhbGl0eSwgc3VjY2luY3QgaXRlbXMgYmFzZWQgb24gYSBuZXciLAogICAgICAgICAgICAgICAgICAgICAibW9kZWwgdGhhdCBoaWdobGlnaHRzIHRoZSB1c2Ugb2YgcHJvdGVjdGl2ZSIsCiAgICAgICAgICAgICAgICAgICAgICJiZWhhdmlvcmFsIHN0cmF0ZWdpZXMgZm9yIG1hcmlqdWFuYSB1c2UiKQoKCgojIEl0ZW0gYXR0cmlidXRlcwojIGNhcHBlZCBhdCAyNTAgY2hhcmFjdGVycywgYm9vb29vbyAKYXNwZWN0cy5vZi5wYnNtLnRyYWl0cyA8LSBsaXN0KAogIGBMaW1pdGluZyB1c2UgcXVhbnRpdGllc2AgPSBjKCJJbnRlbnRpb25hbGx5IG1lYXN1cmUgb3V0IHNtYWxsZXIgdXNlIHF1YW50aXRpZXMgcGVyIHVzZSBzZXNzaW9uIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAiQXZvaWQgYWRkaXRpb25hbCB1c2UgcGFzdCBhIHBlcnNvbmFsIGxpbWl0IiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAiU2VsZWN0IGxvdy1wb3RlbmN5IHByb2R1Y3RzIHRvIGF2b2lkIG92ZXJ1c2UiKSwKICAKICBgTGltaXRpbmcgdXNlIGZyZXF1ZW5jaWVzYCA9IGMoCiAgICAiRGVzaWduYXRlcyBzcGVjaWZpYyBkYXlzIGFuZCBjb250ZXh0cyBmb3IgdXNlIiwKICAgICJBdm9pZHMgaGFiaXR1YWwgdXNlIiwKICAgICJUYWtlcyBicmVha3MgZnJvbSB1c2luZyB0byBhdm9pZCBkZXBlbmRlbmN5IiwKICAgICJJbnRlbnRpb25hbGx5IGxpbWl0cyB0aGUgYW1vdW50IG9mIG1hcmlqdWFuYSB0aGV5IGNhbiBhY2Nlc3MgYXQgb25lIHRpbWUiKSwKICAKICAgYFNlbGVjdGluZyBhcHByb3ByaWF0ZSB1c2UgY29udGV4dHNgID0gYygKICAgICJBdm9pZCB1c2UgYmVmb3JlIHNpdHVhdGlvbnMgcmVxdWlyaW5nIGFsZXJ0bmVzcyIsCiAgICAiT25seSB1c2UgaW4gY29udGV4dHMgd2hlcmUgdGhleSBmZWVsIHBoeXNpY2FsbHkgc2FmZSIsCiAgICAiT25seSB1c2UgaW4gY29udGV4dHMgd2hlcmUgdGhleSBmZWVsIHNvY2lhbGx5IHNlY3VyZSIsCiAgICAiUHJpb3JpdGl6ZXMgdXNlIGluIGNvbnRleHRzIHRoZXkgYmVsaWV2ZSBjYW5uYWJpcyB3aWxsIGVuaGFuY2UiKSwKICAgCiAgIGBMaW1pdGluZyBzdWJqZWN0aXZlIGhpZ2hgID0gYygKICAgICJXYWl0IGJldHdlZW4gZG9zZXMgdG8gZ2F1Z2UgZWZmZWN0cyBiZWZvcmUgY29udGludWluZyB1c2UiLAogICAgIlN0b3Agb25jZSB0aGV5IGZlZWwgYSBsaWdodCBvciBtYW5hZ2VhYmxlIGhpZ2giLAogICAgIlNlbGVjdCBsb3dlci1wb3RlbmN5IHByb2R1Y3RzIHRvIGF2b2lkIGdldHRpbmcgdG9vIGhpZ2giLAogICAgIkF2b2lkIG9yIGxpbWl0IGNvLXVzZSBvZiBtYXJpanVhbmEgYW5kIG90aGVyIHN1YnN0YW5jZXMiKSkKCgoKIyBFeGFtcGxlcyBvZiB3aGF0IHlvdSB3b3VsZCBjb25zaWRlciB0byBiZSBoaWdoLXF1YWxpdHksIHdlbGwtd3JpdHRlbiBpdGVtcwppdGVtLmV4YW1wbGVzIDwtIGMoIkxpbWl0IHVzZSB0byB3ZWVrZW5kcyIsCiAgICAgICAgICAgICAgICAgICAiT25seSB1c2Ugd2hlbiB5b3Uga25vdyB5b3UgaGF2ZSBub3RoaW5nIGltcG9ydGFudCB0byBkbyBmb3IgdGhlIHJlc3Qgb2YgdGhlIGRheS9uaWdodCIsCiAgICAgICAgICAgICAgICAgICAiQXZvaWQgdXNpbmcgbWFyaWp1YW5hIG91dCBvZiBib3JlZG9tIiwKICAgICAgICAgICAgICAgICAgICJPbmx5IHVzZSBiZWZvcmUgc3BlY2lhbCBldmVudHMgb3Igb24gc3BlY2lhbCBvY2Nhc2lvbnMiLAogICAgICAgICAgICAgICAgICAgIk9ubHkgdXNlIGVub3VnaCB0byBhY2hpZXZlIGRlc2lyZWQgYnV6eiBvciB0byBhdm9pZCBnZXR0aW5nIOKAnHRvbyBoaWdo4oCdIiwKICAgICAgICAgICAgICAgICAgICJBdm9pZCBkcml2aW5nIGEgY2FyIGFmdGVyIHVzaW5nIiwKICAgICAgICAgICAgICAgICAgICJBdm9pZCB1c2luZyBtYXJpanVhbmEgYW5kIGFsY29ob2wgYXQgdGhlIHNhbWUgdGltZSIsCiAgICAgICAgICAgICAgICAgICAiQnV5IGxlc3MgbWFyaWp1YW5hIGF0IG9uZSB0aW1lIHNvIHlvdSB1c2UgbGVzcyIsCiAgICAgICAgICAgICAgICAgICAiVGFrZSBicmVha3MgZnJvbSBtYXJpanVhbmEgaWYgaXQgZmVlbHMgbGlrZSB5b3UgYXJlIHVzaW5nIHRvbyBmcmVxdWVudGx5IikKCiNOYW1lIHRoZSBmaWVsZCBvciBzcGVjaWFsdHkKc3ViLmRvbWFpbiA8LSAiQ2FubmFiaXMgUEJTIgoKIyBOYW1lIHRoZSBJbnZlbnRvcnkgYmVpbmcgY3JlYXRlZApzY2FsZS50aXRsZSA8LSAiRmFjZXRzIG9mIFBCU00iCgojIFJ1biBBSS1HRU5JRSB0byBnZW5lcmF0ZSwgdmFsaWRhdGUsIGFuZCByZWR1bmRhbmN5LWNoZWNrIGFuIGl0ZW0gcG9vbCBmb3IgeW91ciBuZXcgc2NhbGUuCmxsbS5QQlNNLmludmVudG9yeS5yZXN1bHRzIDwtIEFJR0VOSUUoCiAgRUdBLm1vZGVsID0gImdsYXNzbyIsCiAgc3lzdGVtLnJvbGUgPSBzeXN0ZW0ucm9sZSwKICBpdGVtLmF0dHJpYnV0ZXMgPSBhc3BlY3RzLm9mLnBic20udHJhaXRzLAogIG9wZW5haS5BUEkgPSBrZXksCiAgaXRlbS5leGFtcGxlcyA9IGl0ZW0uZXhhbXBsZXMsCiAgc3ViLmRvbWFpbiA9IHN1Yi5kb21haW4sCiAgc2NhbGUudGl0bGUgPSBzY2FsZS50aXRsZSwKICBwbG90LnN0YWJpbGl0eSA9IFRSVUUsCiAgbW9kZWwgPSAiZ3B0NG8iLAogIHRhcmdldC5OID0gYygyMCwyMCwyMCwyMCkKKQoKCgojIFN0YXJ0IHdpdGggaXRlbSBlbWJlZGRpbmdzCiMuIFJlc2FtcGxlIGVtYmVkZGluZ3Mgd2l0aCByZXBsYWNlbWVudCAob3Igc2ltdWxhdGUgZnJvbSB0aGVpciBjb3ZhcmlhbmNlKS4KIy4gICAgRm9yIGVhY2ggc2FtcGxlIChlLmcuLCAxMDAgaXRlcmF0aW9ucyksIHJ1bjogMSkgTmV0d29yayBlc3RpbWF0aW9uIChUTUZHIG9yIEdMQVNTTy0tIGdsYXNzbyBzZWVtcyB0byB3b3JrIGJldHRlciBmb3IgdGhlIHNldHMgSSBoYXZlIHJ1biksICAyIENvbW11bml0eSBkZXRlY3Rpb24gKFdhbGt0cmFwKSwgMykgQ29tcGFyZSB0aGUgcmVzdWx0IHRvIHRoZSBvcmlnaW5hbCBzdHJ1Y3R1cmUuCiMgVHJhY2sgaG93IG9mdGVuIGVhY2ggaXRlbSBzdGF5cyBpbiB0aGUgc2FtZSBjb21tdW5pdHkuIEl0ZW1zIHRoYXQgd2FuZGVyIHRvbyBvZnRlbiBhcmUgY29uc2lkZXJlZCB1bnN0YWJsZS4gVGhvc2Ugd2l0aCBzdGFiaWxpdHkgPCAwLjc1IGFyZSByZW1vdmVkLgoKYGBgCgpgYGB7cn0KVmlldyhsbG0uUEJTTS5pbnZlbnRvcnkucmVzdWx0cyRvdmVyYWxsX3NhbXBsZSRtYWluX3Jlc3VsdCkKYGBgCgpgYGB7cn0KCmBgYAoKCgoKCgoKCgoKCgoKCgoKCgo=