library(glue)
library(httr)
library(httr2)
library(jsonlite)
library(knitr)
library(marker)
library(lsa)#just for cosine
library(stringr)
#library(stringi)
library(shinyjs)
library(kableExtra)
library(htmltools)
library(shinydashboard)
library(shinydashboardPlus)
library(shinybusy)
library(textreadr)
library(tidyverse)
library(purrr)
library(CausalMapFunctions)
library(aws.s3)
# define the API endpoint and API key
#api_endpoint <- "https://api.openai.com/v1/engines/davinci/jobs"
api_endpoint <- "https://api.openai.com/v1/chat/completions" 
api_endpoint <- "https://api.openai.com/v1/completions" # https://help.openai.com/en/articles/6283125-what-happened-to-engines
api_endpoint_embeddings <- "https://api.openai.com/v1/embeddings"

api_key <- "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" #cm




source("Rfiles/global_functions.R")

source("Rfiles/multi_import.R")



choose_example <- list.files(path="assets/examples",full.names = T)
example <- map(choose_example,~{readLines(.) %>% collap})
names(example) <- choose_example %>% str_remove_all("^assets/examples/") %>% str_remove_all(".txt$")

choose_starter <- list.files(path="assets/starters",full.names = T)
starter <- map(choose_starter,~{readLines(.) %>% collap})
names(starter) <- choose_starter %>% str_remove_all("^assets/starters/") %>% str_remove_all(".txt$")

tmp <- list.files(path="assets/cluster_starters",full.names = T)
cluster <- map(tmp,~{readLines(.) %>% collap})
names(cluster) <- tmp %>% str_remove_all("^assets/cluster_starters/") %>% str_remove_all(".txt$")

Problems

There are many questions to be considered.

  • Can GPT3 code a text at least approximately as well as a trained coder?
    • Can it recognise a good enough % of causal links in a typical text (sensitivity)
    • without too many false positives (specificity)?
  • What overall strategy should we use?
    • Asking for links
    • Asking for chains and reducing them to links
    • Asking directly for a network
  • For each strategy, how can we give examples to improve results?
    • Do we need domain-specific examples?
  • For each strategy, how can we pre-train the entire model to improve results?
  • How can we in particular help the NLP to:
    • Deal with difficult cases like intention / teleological cause?
    • Not code links which are unclear/unsure?
    • Do we need to cover many edge cases like distinguishing teleological causation? Or can you have too much of a good thing?
  • Is it better to code small or large chunks of text at once?
  • How to explicitly include questions/prompts in the text to be coded?
  • When using large chunks, how to produce the quotes?
    • One strategy is to ask for verbatim factor labels
      • But then we cant pre-process
    • Or, produce the links and then go back and ask it to find the quotes
    • Or put sentence numbers in the text and ask it for the numbers
  • Dealing with disjunctions and conjunctions
  • How to ensure that the NLP is not confused when eg a large text is prefaced by “this is NOT what happened”?

We will use statements 1-8 from example-file.

Expert benchmark

This is the expert-coded benchmark.

ex0 <- load_mapfile("example-file") 

ex <- ex0 %>%   pipe_find_statements(field="statement_id",value="9",operator="less")



ex %>% ltab(99,kable=T)
statement_id from_label to_label simple_frequency nr
5 ~Livestock health; Disease ~Livestock health; Death 1 1
7 ~Improved health Unable/less able to work on farm 1 2
8 Member of savings group Increased knowledge; Finance 1 3
8 Member of savings group Increased ability to save/increased savings 1 4
5 Resilient outlook/strive for better things Planted new crop/vegetable varieties 1 5
5 Received training (Organisation 1) Increased knowledge; Farming method/practice 1 6
5 Planting own crops No longer go hungry/starve 1 7
2 Religious beliefs ~Go to hospital 1 8
2 Received training (Organisation 1) Health behaviour; Use mosquito nets 1 9
2 Received training (Organisation 1) Health behaviour; Use pits to dispose of rubbish 1 10
2 Received training (Organisation 1) Health behaviour; Use better toilets; No longer in open spaces 1 11
2 Received training (Organisation 1) Health behaviour; Increase number of washes per day 1 12
4 ~Go to hospital Death in the family; Children 1 13
ex %>% make_print_mapNLP()

Here are the statements:

 ex$statements %>% select(statement_id,text) %>% kable_custom()
statement_id text
1 This is an example file to showcase the different functionalities in the Causal Map App.
2 In my family we are considerably healthy because people get sick from time to time. Even this year we got sick, although we do not visit the hospital we do get better eventually. Instead we go to church as we are prohibited of going to the hospital because of cultural issues, and beliefs (they believe church are more serious and that some diseases are linked to spiritual problems) and also going to the hospital takes longer time to be assisted. We do get sick because it is the way God made things to occur we do not know why. We try to change but we still do get sick even though we make use of mosquito nets. We also dig holes in which we put litter and we also have better latrines/toilets. We take three baths per day, and also wash our hands after visiting the washrooms. We do use mosquito nets. Yes, there are. We previously took a bath per day, and we would go to the bushes to alleviate our selves. We changed our behaviours because we learned from Organisation 1 that the way one behaves contributes to a health status. Caring for our health is considerably important. Men and women in my house hold mostly use the latrines/toilets and we take three baths per day.
3 Improved
4 This year things are getting worst, we often get sicker. I cannot explain why only God can explain as sickness was created by him. Our churches prohibited us from visiting the hospital and as a result five of my children are dead.
5 Not much has changed as we have the strength to farm more and things have not changed regardless of my husbands own business. We have altered because in the last year we planted millet and corn because, regardless of the difficulties in life we try to fight for better things. We have nothing, the chicken we had all died due to sickness. Yes, there had been, Organisation 1 taught us how to plant and how to harvest in the best manner. It has not changed as much because in my family we still arrange the barn and sift corn after removing the shell. Yes, in my garden in general we plant tomatoes, vegetables, cabbage, and corn because we grow crops in our farm and that way we have no food shortages.
6 Decreased
7 It has decreased because in my family we do get sick, my husband and I likewise for my husbands other wives. We all get sick and when this happened everything is affected and put on hold.
8 My family earns money only because of the savings that Organisation 1 has provided for us. They taught us how to save money. My husband has his own business. Without getting along well nothing has changed we live in the same way. Nothing has changed as we are not able to produce more. The earnings we obtain from agriculture has not changed at all, and everything we have is due to business that my husband does and also through savings. My husband sells sieves, brooms and much more and it is through this money that we make a living.

Note that this expert benchmark has the advantage over the NLP of knowing which questions were being answered. The questions could have been inserted into the text to improve the NLP recognition, below. Also, the labels have been revised and made more abstract. We will not expect the NLP to produce this kind of clean, abstract labels at this point.

Simple chain prompt with no examples

First we try the simple chain prompt with no examples.

Here’s the prompt:

prompt <- starter$`011 chain 3 steps no example`
print_prompt(prompt)
Task: identify causal chains in the text and list them so that each cause is followed by >> and then its effect: cause >> effect, or cause >> intermediate step >> effect, or cause >> intermediate step >> another intermediate step >> effect.

Text:

statement_id from_label to_label simple_frequency nr
1 Cultural issues and beliefs Prohibited from going to the hospital 1 1
1 Prohibited from going to the hospital Go to church instead 1 2
1 Go to church instead Improved behaviours 1 3
1 Improved behaviours Take three baths per day and wash hands after visiting washrooms 1 4
1 Take three baths per day and wash hands after visiting washrooms Use mosquito nets 1 5
1 Use mosquito nets Plant millet and corn 1 6
1 Plant millet and corn Plant tomatoes, vegetables, cabbage, and corn 1 7
1 Plant tomatoes, vegetables, cabbage, and corn Get sick 1 8
1 Get sick Affected and put on hold 1 9
1 Affected and put on hold Earn money through savings and husband’s business 1 10
1 Earn money through savings and husband’s business Sell sieves, brooms, and more 1 11
1 Sell sieves, brooms, and more Make a living 1 12

We can see this is really poor. We will always use examples from now on.

Simple chain prompt with some examples.

prompt <- starter$`010 chain 3 steps`
print_prompt(prompt)
Task: identify causal chains in the text and list them so that each cause is followed by >> and then its effect: cause >> effect, or cause >> intermediate step >> effect, or cause >> intermediate step >> another intermediate step >> effect.

Text:
Some families couldn’t afford the food shop, so the NGO gave them food vouchers so they didn't starve. I worked hard in order to get a better job. He covers the pots so the flies do not get into the food.
Now we are getting ill less often, because we wash our hands and because we boil cooking water. We learned those things because of the course I went on. My teacher told me to go on the course.
The stress and the underlying illness both contributed to her heart attack.
First one dog barked and then another dog barked.
We ate pies and then we ate fish.

Answer:
some families couldn't afford the food shop >> the NGO gave them food vouchers >> they didn't starve
I wanted to get a better job >> I worked hard
he did not want the flies to get into the food >> he covers the pots >> the flies do not get into the food
we learned hygiene practices >> we wash our hands >> now we are getting ill less often
we learned hygiene practices >> we boil cooking water >> now we are getting ill less often
The stress >> her heart attack
the underlying illness >> her heart attack
the course I went on >> we wash our hands
the course I went on >> we boil cooking water
my teacher told me >> I went on a course >> we learned hygiene practices

Text:

ex_coded <- 
  ex %>%
  pipe_NLPidentify_links(start=prompt)

ex_coded %>% ltab(3333,kable=T)
statement_id from_label to_label simple_frequency nr
1 people get sick from time to time Even this year we got sick 1 1
1 Even this year we got sick we do not visit the hospital 1 2
1 we do not visit the hospital we do get better eventually 1 3
1 cultural issues and beliefs prohibited of going to the hospital 1 4
1 prohibited of going to the hospital go to church instead 1 5
1 God made things to occur we do not know why 1 6
1 we do not know why we get sick 1 7
1 mosquito nets we try to change 1 8
1 we try to change we still do get sick 1 9
1 Organisation 1 we learned how to behave 1 10
1 we learned how to behave our health status improved 1 11
1 Organisation 1 taught us how to plant and harvest 1 12
1 taught us how to plant and harvest we plant millet and corn 1 13
1 Organisation 1 taught us how to save money 1 14
1 taught us how to save money my husband has his own business 1 15
1 my husband has his own business we make a living 1 16
1 we make a living earnings from agriculture has not changed 1 17
ex_coded %>% make_print_mapNLP()

We can see that this is much better. We do not expect the factor labels to be neatly organised and generalised as in the expert example. Notice the expert (consciously?) supresses some of the material about Church and hospitals as probably not relevant.

Does chunk length matter?

In the tables above we can see that the statement number for the NLP coding is always 1: all the text has been combined into one new larger statement.

Generally it is more efficient (cheaper/faster??) to combine small pieces of text in this way. But there are a lot of tradeoffs.

So far we have not asked the AI to identify which exact quote is behind each causal link. Ideally, unless we are being very trusting in its ability to synthesise text, we will want this. But this is an additional task - either we need to try to get it to include the exact quote with each link, or we will have to post-process, going back and ask it to identify the quotes. Either way is problematic, because the NLP is not very good at extracting exact, verbatim quotes.

So what happens if we leave the statements as they were?

prompt <- starter$`010 chain 3 steps`
print_prompt(prompt)
Task: identify causal chains in the text and list them so that each cause is followed by >> and then its effect: cause >> effect, or cause >> intermediate step >> effect, or cause >> intermediate step >> another intermediate step >> effect.

Text:
Some families couldn’t afford the food shop, so the NGO gave them food vouchers so they didn't starve. I worked hard in order to get a better job. He covers the pots so the flies do not get into the food.
Now we are getting ill less often, because we wash our hands and because we boil cooking water. We learned those things because of the course I went on. My teacher told me to go on the course.
The stress and the underlying illness both contributed to her heart attack.
First one dog barked and then another dog barked.
We ate pies and then we ate fish.

Answer:
some families couldn't afford the food shop >> the NGO gave them food vouchers >> they didn't starve
I wanted to get a better job >> I worked hard
he did not want the flies to get into the food >> he covers the pots >> the flies do not get into the food
we learned hygiene practices >> we wash our hands >> now we are getting ill less often
we learned hygiene practices >> we boil cooking water >> now we are getting ill less often
The stress >> her heart attack
the underlying illness >> her heart attack
the course I went on >> we wash our hands
the course I went on >> we boil cooking water
my teacher told me >> I went on a course >> we learned hygiene practices

Text:

ex_coded <- 
  ex %>%
  pipe_NLPidentify_links(start=prompt,sentences_per_statement = NULL)

ex_coded %>% ltab(3333,kable=T)
statement_id from_label to_label simple_frequency nr
1 This is an example file showcase the different functionalities in the Causal Map App 1 1
1 showcase the different functionalities in the Causal Map App users can use the app to identify causal chains 1 2
2 people get sick from time to time we do not visit the hospital 1 3
2 we do not visit the hospital we go to church 1 4
2 we go to church cultural issues and beliefs 1 5
2 cultural issues and beliefs going to the hospital takes longer time 1 6
2 God made things to occur we do not know why 1 7
2 we do not know why we try to change 1 8
2 we try to change we still do get sick 1 9
2 we try to change we make use of mosquito nets 1 10
2 we make use of mosquito nets we dig holes 1 11
2 we dig holes we put litter 1 12
2 we put litter we have better latrines/toilets 1 13
2 we try to change we take three baths per day 1 14
2 we take three baths per day we wash our hands after visiting the washrooms 1 15
2 we try to change we use mosquito nets 1 16
2 we previously took a bath per day we would go to the bushes to alleviate ourselves 1 17
2 we would go to the bushes to alleviate ourselves we changed our behaviours 1 18
2 Organisation 1 we learned from Organisation 1 1 19
2 we learned from Organisation 1 the way one behaves contributes to a health status 1 20
2 the way one behaves contributes to a health status we changed our behaviours 1 21
2 caring for our health is important men and women in my household mostly use the latrines/toilets 1 22
2 men and women in my household mostly use the latrines/toilets we take three baths per day 1 23
3 I wanted to get a better job I worked hard 1 24
3 he did not want the flies to get into the food he covers the pots 1 25
3 he covers the pots the flies do not get into the food 1 26
3 we learned hygiene practices we wash our hands 1 27
3 we wash our hands now we are getting ill less often 1 28
3 we learned hygiene practices we boil cooking water 1 29
3 we boil cooking water now we are getting ill less often 1 30
3 The stress her heart attack 1 31
3 the underlying illness her heart attack 1 32
3 the course I went on we wash our hands 1 33
3 the course I went on we boil cooking water 1 34
3 my teacher told me I went on a course 1 35
3 I went on a course we learned hygiene practices 1 36
3 some families couldn’t afford the food shop the NGO gave them food vouchers 1 37
3 the NGO gave them food vouchers they didn’t starve 1 38
3 first one dog barked then another dog barked 1 39
3 we ate pies then we ate fish 1 40
4 things are getting worse we often get sicker 1 41
4 God created sickness our churches prohibited us from visiting the hospital 1 42
4 our churches prohibited us from visiting the hospital five of my children are dead 1 43
5 we have the strength to farm more things have not changed 1 44
5 we try to fight for better things we planted millet and corn 1 45
5 the chicken we had all died due to sickness we have nothing 1 46
5 Organisation 1 taught us we learned how to plant and how to harvest in the best manner 1 47
5 we still arrange the barn and sift corn we remove the shell 1 48
5 we plant tomatoes, vegetables, cabbage, and corn we grow crops in our farm 1 49
5 we grow crops in our farm we have no food shortages 1 50
7 my family gets sick everything is affected and put on hold 1 51
7 everything is affected and put on hold it has decreased 1 52
8 Organisation 1 provided savings they taught us how to save money 1 53
8 they taught us how to save money my family earns money 1 54
8 my husband has his own business we live in the same way 1 55
8 we are not able to produce more nothing has changed 1 56
8 earnings from agriculture has not changed everything we have is due to business that my husband does and also through savings 1 57
8 my husband sells sieves, brooms and much more we make a living 1 58
ex_coded %>% make_print_mapNLP(map_n_factors = 99)

Best prompt so far with all the tweaks

This prompt also includes some more non-causal examples without corresponding answers. However it seems to lead to repetition of the examples in the answers, “the course I went on”.

prompt <- starter$`008-best-chains`
print_prompt(prompt)
Task: identify causal chains in the text and list them so that each cause is followed by >> and then its effect: cause >> effect, or cause >> intermediate step >> effect.
Not all of the sources are telling the truth, so make sure that you are only reporting causal chains you are sure of, where there is no doubt.
If people only express feelings or ideas or fears or hopes or hypotheses, do not identify these as causal links.
Only provide links which are definitely causal, do not include links which are maybe just one thing happening after another with no causal connection.

Example Text:
Some families couldn’t afford the food shop, so the NGO gave them food vouchers so they didn't starve.
I worked hard in order to get a better job.
He covers the pots so the flies do not get into the food.
Now we are getting ill less often, because we wash our hands and because we boil cooking water. We learned those things because of the course I went on. My teacher told me to go on the course.
The stress and the underlying illness both contributed to her heart attack.
First one dog barked and then another dog barked.
This is a lie: the earthquake made the house collapse
She told me the storms caused the famine, but she is not telling the truth.
We ate pies and then we ate fish.
I fear global warming could cause the sea to boil dry.

Example Answer:
some families couldn't afford the food shop >> the NGO gave them food vouchers >> they didn't starve
I wanted to get a better job >> I worked hard
he did not want the flies to get into the food >> he covers the pots >> the flies do not get into the food
we learned hygiene practices >> we wash our hands >> now we are getting ill less often
we learned hygiene practices >> we boil cooking water >> now we are getting ill less often
the course I went on >> we wash our hands
the course I went on >> we boil cooking water
The stress >> her heart attack
the underlying illness >> her heart attack
my teacher told me >> I went on a course >> we learned hygiene practices

Text:
ex_coded <- 
  ex %>%
  pipe_NLPidentify_links(start=prompt,sentences_per_statement = NULL)

ex_coded %>% ltab(3333,kable=T)
statement_id from_label to_label simple_frequency nr
6 we wanted to improve our health we learned hygiene practices 2 1
6 we wanted to improve our health we learned hygiene practices 2 2
1 Organisation 1 taught us how to plant and how to harvest we planted millet and corn 1 3
1 we planted millet and corn we try to fight for better things 1 4
1 the chicken we had all died due to sickness we have nothing 1 5
1 we plant tomatoes, vegetables, cabbage, and corn we grow crops in our farm 1 6
1 we grow crops in our farm we have no food shortages 1 7
2 my family gets sick everything is affected and put on hold 1 8
2 everything is affected and put on hold it has decreased 1 9
3 Organisation 1 provided savings they taught us how to save money 1 10
3 they taught us how to save money my family earns money 1 11
3 my husband has his own business we live in the same way 1 12
3 we are not able to produce more nothing has changed 1 13
3 earnings from agriculture has not changed everything we have is due to business that my husband does 1 14
3 everything we have is due to business that my husband does my husband sells sieves, brooms and more 1 15
3 my husband sells sieves, brooms and more we make a living 1 16
4 This is an example file showcase the different functionalities in the Causal Map App. 1 17
5 people get sick from time to time we go to church instead of the hospital 1 18
5 we go to church instead of the hospital we do not know why we get sick 1 19
5 we try to change we make use of mosquito nets 1 20
5 we make use of mosquito nets we dig holes and put litter in them 1 21
5 we dig holes and put litter in them we have better latrines/toilets 1 22
5 we take three baths per day we wash our hands after visiting the washrooms 1 23
5 we wash our hands after visiting the washrooms we use mosquito nets 1 24
5 we previously took a bath per day we would go to the bushes to alleviate ourselves 1 25
5 we learned from Organisation 1 we changed our behaviours 1 26
5 we changed our behaviours we care for our health 1 27
5 we care for our health men and women in my household mostly use the latrines/toilets 1 28
5 men and women in my household mostly use the latrines/toilets we take three baths per day 1 29
6 we learned hygiene practices we wash our hands 1 30
6 we wash our hands now we are getting ill less often 1 31
6 we learned hygiene practices we boil cooking water 1 32
6 we boil cooking water now we are getting ill less often 1 33
6 the course I went on we wash our hands 1 34
6 the course I went on we boil cooking water 1 35
7 Our churches prohibited us from visiting the hospital five of my children are dead 1 36
7 five of my children are dead this year things are getting worse 1 37
7 this year things are getting worse we often get sicker 1 38
ex_coded %>% make_print_mapNLP(map_n_factors = 99)

Best prompt so far with all the tweaks but fewer examples

This prompt also includes some more probably non-causal examples but will hopefully avoid copying some of the examples.

prompt <- starter$`007-best-chains-fewer-examples`
print_prompt(prompt)
Task: identify causal chains in the text and list them so that each cause is followed by >> and then its effect: cause >> effect, or cause >> intermediate step >> effect.
Not all of the sources are telling the truth, so make sure that you are only reporting causal chains you are sure of, where there is no doubt.
If people only express feelings or ideas or fears or hopes or hypotheses, do not identify these as causal links.
Only provide links which are definitely causal, do not include links which are maybe just one thing happening after another with no causal connection.

Example Text:
Some families couldn’t afford the food shop, so the NGO gave them food vouchers so they didn't starve. I worked hard in order to get a better job. He covers the pots so the flies do not get into the food.
Now we are getting ill less often, because we wash our hands and because we boil cooking water. We learned those things because of the course I went on. My teacher told me to go on the course.

Example Answer:
some families couldn't afford the food shop >> the NGO gave them food vouchers >> they didn't starve
I wanted to get a better job >> I worked hard
he did not want the flies to get into the food >> he covers the pots >> the flies do not get into the food
we learned hygiene practices >> we wash our hands >> now we are getting ill less often
we learned hygiene practices >> we boil cooking water >> now we are getting ill less often
the course I went on >> we wash our hands
the course I went on >> we boil cooking water
my teacher told me >> I went on a course >> we learned hygiene practices

Text:
ex_coded <- 
  ex %>%
  pipe_NLPidentify_links(start=prompt,sentences_per_statement = NULL)

ex_coded %>% ltab(3333,kable=T)
statement_id from_label to_label simple_frequency nr
1 This is an example file showcase the different functionalities in the Causal Map App 1 1
2 people get sick from time to time we do not visit the hospital 1 2
2 we do not visit the hospital we go to church 1 3
2 we go to church cultural issues and beliefs 1 4
2 cultural issues and beliefs going to the hospital takes longer time 1 5
2 God made things to occur we do not know why 1 6
2 we do not know why we try to change 1 7
2 we try to change we still do get sick 1 8
2 we try to change we make use of mosquito nets 1 9
2 we make use of mosquito nets we dig holes 1 10
2 we dig holes we put litter 1 11
2 we put litter we have better latrines/toilets 1 12
2 we try to change we take three baths per day 1 13
2 we take three baths per day we wash our hands after visiting the washrooms 1 14
2 we learned from Organisation 1 the way one behaves contributes to a health status 1 15
2 the way one behaves contributes to a health status we changed our behaviours 1 16
2 we changed our behaviours we take three baths per day 1 17
2 we take three baths per day we use latrines/toilets 1 18
2 we use latrines/toilets we use mosquito nets 1 19
3 I wanted to improve I worked hard 1 20
3 I worked hard I improved 1 21
4 things are getting worse we often get sicker 1 22
4 churches prohibited us from visiting the hospital five of my children are dead 1 23
5 we have the strength to farm more things have not changed 1 24
5 we wanted better things we planted millet and corn 1 25
5 the chicken died due to sickness we had nothing 1 26
5 Organisation 1 taught us we learned how to plant and how to harvest in the best manner 1 27
5 we still arrange the barn and sift corn we remove the shell 1 28
5 we wanted to have no food shortages we plant tomatoes, vegetables, cabbage, and corn 1 29
5 we plant tomatoes, vegetables, cabbage, and corn we grow crops in our farm 1 30
7 we all get sick everything is affected and put on hold 1 31
7 everything is affected and put on hold it has decreased 1 32
8 Organisation 1 provided savings they taught us how to save money 1 33
8 they taught us how to save money my family earns money 1 34
8 my husband has his own business we live in the same way 1 35
8 we are not able to produce more nothing has changed 1 36
8 earnings from agriculture has not changed everything we have is due to business that my husband does and also through savings 1 37
8 my husband sells sieves, brooms and much more we make a living 1 38
ex_coded %>% make_print_mapNLP(map_n_factors = 99)