Libraries
library(readr)
library(ggplot2)
library(langcog)
##
## Attaching package: 'langcog'
## The following object is masked from 'package:base':
##
## scale
library(boot)
library(readr)
library(knitr)
knitr::opts_chunk$set(fig.crop = F,echo=T,
warning=F, cache=F,
message=F, sanitize = T)
library(rwebppl)
## local webppl exists: v0.9.6 /usr/local/lib/node_modules/webppl
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(jsonlite)
library(scales)
##
## Attaching package: 'scales'
## The following objects are masked from 'package:readr':
##
## col_factor, col_numeric
library(coda)
theme_set(theme_bw())
estimate_mode <- function(s) {
d <- density(s)
return(d$x[which.max(d$y)])
}
hdi_upper<- function(s){
m <- HPDinterval(mcmc(s))
return(m["var1","upper"])
}
hdi_lower<- function(s){
m <- HPDinterval(mcmc(s))
return(m["var1","lower"])
}
Model <- '
var pickStick = function(sticks, J) {
return flip(sticks(J)) ? J : pickStick(sticks, J+1);
};
var makeSticks = function(alpha) {
var sticks = mem(function(index) {return beta(1, alpha)});
return function() {
return pickStick(sticks,1)
};
}
var DPmem = function(alpha, baseDist) {
var augmentedProc = mem(function(args, stickIndex) {return apply(baseDist, args)});
var DP = mem(function(args) {return makeSticks(alpha)});
return function(argsin) {
var stickIndex = DP(argsin)()
return augmentedProc(argsin, stickIndex);
}
}
var uuid = function() {
var s4 = function() {
return (Math.floor((1 + Math.random()) * 0x10000)
.toString(16)
.substring(1));
}
return s4() + s4() + "-" + s4() + "-" + s4() + "-" +
s4() + "-" + s4() + s4() + s4();
}
var data1={s:0, o:0}
var data2o=dataFromR[0].data2o
var data2s=dataFromR[0].data2s
var data2={s:data2s, o:data2o}
var bias=dataFromR[0].bias
var toleranceO = dataFromR[0].toleranceO
var toleranceS = dataFromR[0].toleranceS
var myfreq = dataFromR[0].myfreq
var Learning = function(){
var getCat = DPmem(bias, uuid);
var obsToCatID = mem(function(obs) {return getCat()});
var IDToMeans = mem(function(catID) {
return {o:uniform(-1,2), s:uniform(-1,2)}
});
//generate the categories
var mu1 = IDToMeans(obsToCatID(data1))
var mu2 = IDToMeans(obsToCatID(data2))
//define the categories
var Cat1= {o: Gaussian({mu: mu1.o, sigma: toleranceO }),
s: Gaussian({mu: mu1.s, sigma: toleranceS })}
var Cat2= {o: Gaussian({mu: mu2.o, sigma: toleranceO }),
s: Gaussian({mu: mu2.s, sigma: toleranceS })}
var learn = function(freq){
//I should probably observe with noise
//var cat1o= sample(Cat1.o)
//observe(Gaussian({mu: cat1o, sigma: 0.05}), data1.o)
//var cat1s= sample(Cat1.s)
//observe(Gaussian({mu: cat1s, sigma: 0.05}), data1.s)
//var cat2o= sample(Cat2.o)
//observe(Gaussian({mu: cat2o, sigma: 0.05}), data2.o)
//var cat2s= sample(Cat2.s)
//observe(Gaussian({mu: cat2s, sigma: 0.05}), data2.s)
observe(Cat1.o, data1.o)
observe(Cat1.s, data1.s)
observe(Cat2.o, data2.o)
observe(Cat2.s, data2.s)
}
//effect of frequency
var frequency = Array.apply(null, {length: myfreq}).map(Number.call, Number)
map(learn, frequency)
//Should deal with frequency later
var CatID = map(obsToCatID, [data1, data2])
var NCat = _.uniq(CatID)
//Observe conditional probability instead
//p(o1|s1)=p(o1|C1)p(s1|C1)+ p(o1|C2)p(s1|C2)
var Same2Cat= Math.exp(Cat1.o.score(data1.o))*Math.exp(Cat1.s.score(data1.s))+
Math.exp(Cat2.o.score(data1.o))*Math.exp(Cat2.s.score(data1.s))
//p(o2|s1)p=p(o2|C1)p(s1|C1)+ p(o2|C2)p(s1|C2)
var Switch2Cat= Math.exp(Cat1.o.score(data2.o))*Math.exp(Cat1.s.score(data1.s))+
Math.exp(Cat2.o.score(data2.o))*Math.exp(Cat2.s.score(data1.s))
var Same1Cat= Math.exp(Cat1.o.score(data1.o))*Math.exp(Cat1.s.score(data1.s))
var Switch1Cat= Math.exp(Cat1.o.score(data2.o))*Math.exp(Cat1.s.score(data1.s))
var Same = NCat.length==1 ? Same1Cat/(Same1Cat+Switch1Cat) : Same2Cat/(Same2Cat+Switch2Cat)
var Switch = NCat.length==1 ? Switch1Cat/(Same1Cat+Switch1Cat) : Switch2Cat/(Same2Cat+Switch2Cat)
var SuprisalSame=-Math.log(Same)
var SuprisalSwitch=-Math.log(Switch)
return {Number: NCat.length,
Same: Same,
Switch: Switch,
SurpSame: SuprisalSame,
SurpSwitch: SuprisalSwitch,
}
}
'
Initial state
x <- seq(0.1, 1.1, by=0.1)
word.number <- vector(length=length(x))
word.same <- vector(length=length(x))
word.switch <- vector(length=length(x))
word.sameSurp <- vector(length=length(x))
word.switchSurp <- vector(length=length(x))
for (i in 1:length(x)) {
dataToModel <- data.frame(data2o=0.5,
data2s=x[i],
bias=0.3,
toleranceO=0.2,
toleranceS=0.2,
myfreq=1
)
posterior <- webppl(Model,
data = dataToModel,
data_var = "dataFromR",
inference_opts = list(method = "MCMC", samples = 10000),
model_var = "Learning",
output_format = "webppl")
word.number[i] <- mean(posterior[["value.Number"]])
word.same[i] <- mean(posterior[["value.Same"]])
word.switch[i] <- mean(posterior[["value.Switch"]])
word.sameSurp[i] <- mean(posterior[["value.SurpSame"]])
word.switchSurp[i] <- mean(posterior[["value.SurpSwitch"]])
}
data <- data.frame(sound2=x, word.number, word.same, word.switch,word.sameSurp, word.switchSurp)
ggplot(data, aes(x=sound2, y=word.number)) +
geom_point() +
geom_smooth(se = FALSE)+
xlab("Perceptual distance between labels") +
ylab("Expectation Number of concepts") +
scale_y_continuous(limits = c(1, 2))+
theme_bw()
surprisal <- data %>%
select(sound2, word.sameSurp, word.switchSurp) %>%
rename(same=word.sameSurp,
switch=word.switchSurp) %>%
gather(type, surprisal, same, switch)
ggplot(surprisal, aes(x=sound2, y=surprisal, col=type)) +
geom_point() +
geom_smooth(se = FALSE)+
xlab("Perceptual distance between labels") +
ylab("Surprisal") +
theme_bw()
Increase the meaning distance of the referents
x <- seq(0.1, 1.1, by=0.1)
word.number <- vector(length=length(x))
word.same <- vector(length=length(x))
word.switch <- vector(length=length(x))
word.sameSurp <- vector(length=length(x))
word.switchSurp <- vector(length=length(x))
for (i in 1:length(x)) {
dataToModel <- data.frame(data2o=0.8,
data2s=x[i],
bias=0.3,
toleranceO=0.2,
toleranceS=0.2,
myfreq=1
)
posterior <- webppl(Model,
data = dataToModel,
data_var = "dataFromR",
inference_opts = list(method = "MCMC", samples = 10000),
model_var = "Learning",
output_format = "webppl")
word.number[i] <- mean(posterior[["value.Number"]])
word.same[i] <- mean(posterior[["value.Same"]])
word.switch[i] <- mean(posterior[["value.Switch"]])
word.sameSurp[i] <- mean(posterior[["value.SurpSame"]])
word.switchSurp[i] <- mean(posterior[["value.SurpSwitch"]])
}
data2 <- data.frame(sound2=x, word.number, word.same, word.switch, word.sameSurp, word.switchSurp)
ggplot(data2, aes(x=sound2, y=word.number)) +
geom_point() +
geom_smooth(se = FALSE)+
xlab("Perceptual distance between labels") +
ylab("Expectation Number of concepts") +
theme_bw()
surprisal <- data2 %>%
select(sound2, word.sameSurp, word.switchSurp) %>%
rename(same=word.sameSurp,
switch=word.switchSurp) %>%
gather(type, surprisal, same, switch)
ggplot(surprisal, aes(x=sound2, y=surprisal, col=type)) +
geom_point() +
geom_smooth(se = FALSE)+
xlab("Perceptual distance between labels") +
ylab("Surprisal") +
theme_bw()
Simulate the switch effect with various parameters
x <- seq(0.1, 1.1, by=0.1)
word.sameSurp <- vector(length=length(x))
word.switchSurp <- vector(length=length(x))
word.effect <- vector(length=length(x))
for (i in 1:length(x)) {
dataToModel <- data.frame(data2o=x[i],
data2s=0.5,
bias=0.3,
toleranceO=0.2,
toleranceS=0.2,
myfreq=1
)
posterior <- webppl(Model,
data = dataToModel,
data_var = "dataFromR",
inference_opts = list(method = "MCMC", samples = 10000),
model_var = "Learning",
output_format = "webppl")
word.sameSurp[i] <- mean(posterior[["value.SurpSame"]])
word.switchSurp[i] <- mean(posterior[["value.SurpSwitch"]])
word.effect[i]=word.switchSurp[i]-word.sameSurp[i]
}
data3 <- data.frame(sound2=x, word.effect)
ggplot(data3, aes(x=sound2, y=word.effect)) +
geom_point() +
geom_smooth(se = FALSE)+
xlab("Semantic distance") +
ylab("Switch-Same")+
theme_bw()
Simulate the switch effect with various parameters
x <- seq(1,5)
word.sameSurp <- vector(length=length(x))
word.switchSurp <- vector(length=length(x))
word.effect <- vector(length=length(x))
for (i in 1:length(x)) {
dataToModel <- data.frame(data2o=0.5,
data2s=0.5,
bias=0.3,
toleranceO=0.2,
toleranceS=0.2,
myfreq=x[i]
)
posterior <- webppl(Model,
data = dataToModel,
data_var = "dataFromR",
inference_opts = list(method = "MCMC", samples = 10000),
model_var = "Learning",
output_format = "webppl")
word.sameSurp[i] <- mean(posterior[["value.SurpSame"]])
word.switchSurp[i] <- mean(posterior[["value.SurpSwitch"]])
word.effect[i]=word.switchSurp[i]-word.sameSurp[i]
}
data4 <- data.frame(sound2=x, word.effect)
ggplot(data4, aes(x=sound2, y=word.effect)) +
geom_point() +
geom_smooth(se = FALSE)+
xlab("Frequency") +
ylab("Switch-Same")+
theme_bw()
Simulate the switch effect with various parameters
x <- seq(0.2, 5, by=0.5)
word.sameSurp <- vector(length=length(x))
word.switchSurp <- vector(length=length(x))
word.effect <- vector(length=length(x))
for (i in 1:length(x)) {
dataToModel <- data.frame(data2o=0.5,
data2s=0.5,
bias=x[i],
toleranceO=0.2,
toleranceS=0.2,
myfreq=1
)
posterior <- webppl(Model,
data = dataToModel,
data_var = "dataFromR",
inference_opts = list(method = "MCMC", samples = 10000),
model_var = "Learning",
output_format = "webppl")
word.sameSurp[i] <- mean(posterior[["value.SurpSame"]])
word.switchSurp[i] <- mean(posterior[["value.SurpSwitch"]])
word.effect[i]=word.switchSurp[i]-word.sameSurp[i]
}
data5 <- data.frame(sound2=x, word.effect)
ggplot(data5, aes(x=sound2, y=word.effect)) +
geom_point() +
geom_smooth(se = FALSE)+
xlab("Bias (for creating new concepts)") +
ylab("Switch-Same")+
theme_bw()
Simulate the switch effect with various parameters
x <- seq(0.1, 0.3, by=0.03)
word.sameSurp <- vector(length=length(x))
word.switchSurp <- vector(length=length(x))
word.effect <- vector(length=length(x))
for (i in 1:length(x)) {
dataToModel <- data.frame(data2o=0.5,
data2s=0.5,
bias=0.3,
toleranceO=x[i],
toleranceS=x[i],
myfreq=1
)
posterior <- webppl(Model,
data = dataToModel,
data_var = "dataFromR",
inference_opts = list(method = "MCMC", samples = 10000),
model_var = "Learning",
output_format = "webppl")
word.sameSurp[i] <- mean(posterior[["value.SurpSame"]])
word.switchSurp[i] <- mean(posterior[["value.SurpSwitch"]])
word.effect[i]=word.switchSurp[i]-word.sameSurp[i]
}
data6 <- data.frame(sound2=x, word.effect)
ggplot(data6, aes(x=sound2, y=word.effect)) +
geom_point() +
geom_smooth(se = FALSE)+
xlab("Tolerance for variation") +
ylab("Switch-Same")+
theme_bw()