Discrete probability simulation: suppose that a basketball player has a 60% chance of making a shot, and he keeps taking shots until he misses two in a row. Also assume his shots are independent (so that each shot has 60% probability of success, no matter what happened before).
prob_of_scoring<-.6
max_shots<-50
consec_miss<-0
for(q in 1:max_shots){
outcome<-rbinom(1,1,prob_of_scoring)
consec_miss<-ifelse(outcome==0,consec_miss +1,0)
if (consec_miss==2)break
}
q
## [1] 2
prob_of_scoring<-.6
max_shots<-50
consec_miss<-0
bballsims1<-1000
results<-rep(NA,bballsims1)
for(b in 1:bballsims1){
for(q in 1:max_shots){
outcome<-rbinom(1,1,prob_of_scoring)
consec_miss<-ifelse(outcome==0,consec_miss +1,0)
if (consec_miss==2)break
}
q
results[b]<-q
}
hist(results)
prob_score<-.6
max_shots<-100
simbball1<-1000
trials<-rep(NA,simbball1)
number_of_scored_shots<-rep(NA,simbball1)
prob_of_succ_shots<-rep(NA,simbball1)
for (a in 1:simbball1){
s<-0
for(c in 1:max_shots){
outcome<-rbinom(1,1,prob_score)
s<-ifelse(outcome==1,s+1,s)
consec_miss<-ifelse(outcome==0,consec_miss+1,0)
if(consec_miss==2)break
}
trials[a]<-c
number_of_scored_shots[a]<-s
prob_of_succ_shots[a]<-s/c
}
plot(trials,prob_of_succ_shots)
mean.savings<-5
se.savings<-4
size.market<-40000
se.market<-1000
sims1<-1000
savings.total<-rep(NA,sims1)
for(r in 1:sims1){
market<-rnorm(n=1,mean=size.market,sd=se.market)
savings<-rnorm(n=1,mean=mean.savings,sd=se.savings)
savings.total[r]<-market*savings
}
hist(savings.total)
require(arm)
## Loading required package: arm
## Loading required package: MASS
## Loading required package: Matrix
## Loading required package: lme4
##
## arm (Version 1.13-1, built: 2022-8-25)
## Working directory is C:/Users/brand/Desktop/UCR/PhD CCN/PSYC213_Spring2023/Lab/Week 5
require(ggplot2)
## Loading required package: ggplot2
require(foreign)
## Loading required package: foreign
prof <- read.csv("http://www.stat.columbia.edu/~gelman/arm/examples/beauty/ProfEvaltnsBeautyPublic.csv")
# convert into factors
prof$profnumber <- as.factor(prof$profnumber)
prof$female <- as.factor(prof$female)
# convert dummy `class*` variables into a factor
dummies <- prof[, 18:47]
prof$class <- factor(apply(dummies, FUN=function(r) r %*% 1:30, MARGIN=1))
# remove dummy variables
prof <- prof[-c(18:47)]
# normalise and centre professor evaluation (all other predictors are binary)
prof$c.profevaluation <- prof$profevaluation - mean(prof$profevaluation) / (2 * sd(prof$profevaluation))
# fit linear model
m1 <- lm(courseevaluation ~ female + onecredit + c.profevaluation*nonenglish, data=prof)
display(m1)
## lm(formula = courseevaluation ~ female + onecredit + c.profevaluation *
## nonenglish, data = prof)
## coef.est coef.se
## (Intercept) 3.69 0.01
## female1 -0.03 0.02
## onecredit 0.10 0.04
## c.profevaluation 0.94 0.02
## nonenglish -0.08 0.04
## c.profevaluation:nonenglish -0.17 0.09
## ---
## n = 463, k = 6
## residual sd = 0.19, R-Squared = 0.88
nsims1<-1000
sim1<-sim(m1,nsims1)
# Instructor A: 50-year-old woman who is a native English speaker and has a beauty score of −1
sim.outcome <- coef(sim1)[, 1] + 1 * coef(sim1)[, 2] +
rep(c(0,1), 500) * coef(sim1)[, 3] + -1 *coef(sim1)[, 4] +
0 * coef(sim1)[, 5] + -1 * 0 * coef(sim1)[, 6]
# Instructor B: 60-year-old man who is a native English speaker and has a beauty score of −0.5
sim.outcome2 <- coef(sim1)[, 1] + 0 * coef(sim1)[, 2] +
rep(c(0,1), 500) * coef(sim1)[, 3] + -.5 * coef(sim1)[, 4] +
0 * coef(sim1)[, 5] + -.5 * 0 * coef(sim1)[, 6]
ggplot(data=data.frame(x1=sim.outcome, x2=sim.outcome2)) +
geom_histogram(aes(x=x1), fill="red", alpha=.35) +
geom_histogram(aes(x=x2), fill="green", alpha=.35)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
sim.diff <- sim.outcome - sim.outcome2
ggplot(data=data.frame(x=sim.diff)) + geom_histogram(aes(x=x), fill="blue", alpha=.35) +
labs(title="Difference in final score of courses ", x="difference between course A and B scores")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
The probability the course A will have a higher score is zero
mean(sim.diff>=0)
## [1] 0
prop.female <- .48
men.log.weight <- 5.13
men.log.sd <- .17
women.log.weight <- 4.96
women.log.sd <- .20
n <- 10
n.sims <- 1000
capacity <- 1750
breaks <- rep(NA, n.sims)
weights <- rep(NA, n)
for (t in 1:n.sims) {
weights <- rep(NA, n)
sexes <- rbinom(n=n, size=1, prob=prop.female) # 1 if female, 0 if male
for (i in 1:n) {
weights[i] <- exp(ifelse(sexes[i]==1, rnorm(n=1, mean=women.log.weight, sd=women.log.sd),
rnorm(n=1, mean=men.log.weight, sd=men.log.sd)))
}
breaks[t] <- ifelse(capacity< sum(weights), 1,0)
}
sum(breaks)/n.sims
## [1] 0.061
I prefer to use my own data. You are welcome to use mine as well but encouraged to use your own data.
# make sure I am in the right directory where the data is stored
getwd()
## [1] "C:/Users/brand/Desktop/UCR/PhD CCN/PSYC213_Spring2023/Lab/Week 5"
#setwd("C:/Usersbrand/Desktop/UCR/PhD CCN/PSYC213_Spring2023/Lab/Week 5")
# Read in the data and take a peek
TLE3<- read.csv("TLE3_Reaction_Times_PSYC213C_02.csv", header=TRUE, colClasses=c('numeric','numeric','numeric','numeric','numeric','numeric'))
## Warning in read.table(file = file, header = header, sep = sep, quote = quote, :
## cols = 3 != length(data) = 6
TLE3<-data.frame(TLE3)
colnames(TLE3) <- c('Move', 'Target', 'Reaction.Time', 'Trial', 'Participant', 'Teamsize')
# Create lists for Individuals and Dyads
Individuals<-c(4,5,6,7,8,9,10,11,12,13,15,19,21,22,24,25,26,27,28,30,31,33,35,36,37,38,39,40,41,42,44,46,48,54,56,57,59,61,63,64,67,69,71,72,74,78,80,81,84,85,86,87)
Dyads<-c(2,3,14,16,17,18,20,23,29,32,34,43,45,47,49,50,51,52,53,55,58,60,62,65,66,68,70,73,75,76,77,79,82,83)
filtered_TLE3<-subset(TLE3,Move >= 2 & Move <=19)
# Generate Trials
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
for (t in 1:nrow(filtered_TLE3)){
filtered_TLE3$Trial[t]<-((t-1)%/%18)%%22+1
}
# Generate Participant
count<-as.double(2)
filtered_TLE3$Participant[1] <- count
for (q in 2:nrow(filtered_TLE3)) {
if ( (filtered_TLE3$Trial[q-1] != 1) & (filtered_TLE3$Trial[q] == 1) ) {
count <- count + 1
filtered_TLE3$Participant[q] <- count
} else {
filtered_TLE3$Participant[q] <- count
}
}
# Generate Teamsize
for (t in 1:nrow(filtered_TLE3)){
if (filtered_TLE3$Participant[t] %in% Individuals) {
filtered_TLE3$Teamsize[t]<- 1
} else if (filtered_TLE3$Participant[t] %in% Dyads) {
filtered_TLE3$Teamsize[t]<- 2
} else {
filtered_TLE3$Teamsize[t]<- -99
}
}
library(ggplot2)
n<-nrow(filtered_TLE3)
mu<-mean(filtered_TLE3$Reaction.Time)
theta<-sd(filtered_TLE3$Reaction.Time)
fTLE3sim1<-rep(NA,n)
Exp<-rep(NA,n)
for (si in 1:n){
Exp[si]<-ifelse(si<=.5*n,1,2)
}
fTLE3sim1=rnorm(n,mu,theta)
fTLE3sim2<-mutate(filtered_TLE3,sim.times=fTLE3sim1)
#fTLE3sim2=c(filtered_TLE3,fTLE3sim1)
# TS<-filtered_TLE3$Teamsize
# MN<-filtered_TLE3$Move
# TN<-filtered_TLE3$Trial
#FTLE3_sim<-data.frame(Exp,TS,MN,TN,fTLE3sim1)
filtered_TLE3$Teamsize<-as.factor(filtered_TLE3$Teamsize)
# Now plot them for comparison
ggplot(
data = filtered_TLE3, mapping = aes(x = Trial, y= Reaction.Time)) +
geom_jitter(mapping = aes(color= Teamsize))
fTLE3sim2$Teamsize<-as.factor(fTLE3sim2$Teamsize)
ggplot(
data=fTLE3sim2, mapping = aes(x = Trial, y = sim.times))+
geom_jitter(mapping = aes(color = Teamsize))
How do the new simulated data look compared to the collected data? The simulated data don’t look much like the collected data. We have negative numbers for simulated reaction times and in the lines below I took the absolute value of those simulated times to get a slightly better representation for the simulated data.
Is there a way to make the simulated data more similar to the collected data?
for (si in 1:n)
fTLE3sim2$sim.times[si]<-abs(fTLE3sim2$sim.times[si])
ggplot(
data=fTLE3sim2, mapping = aes(x = Trial, y = sim.times))+
geom_jitter(mapping = aes(color = Teamsize))
t.test(filtered_TLE3$Reaction.Time,fTLE3sim2$sim.times)
##
## Welch Two Sample t-test
##
## data: filtered_TLE3$Reaction.Time and fTLE3sim2$sim.times
## t = -25.716, df = 64770, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.10968561 -0.09414993
## sample estimates:
## mean of x mean of y
## 0.5580634 0.6599812
Garbage tried follows:
# Increment Trial and Participant based on conditions
# for (p in 1:nrow(filtered_TLE3)) {
# if (filtered_TLE3$Trial[p] <- filtered_TLE3$Participant[i - 1] + 1
#filtered_TLE3
# In this example, filtered_TLE3 is a data frame with three columns: Move, Trial, and Participant.
#
# Move contains the values ranging from 2 to 19, repeated 18 times for each move number.
# Trial contains the values ranging from 1 to 22, repeating 18 times for each move number.
# Participant starts with a value of 1 and increases by 1 only when there is a shift in Trial from 22 to 1.
# The code iterates over the rows of the data frame, checking for the specified conditions. If Trial shifts from 22 to 1, both Trial and Participant are incremented by 1. If the value in Move increases from the previous row, only Trial is incremented by 1.
#
# The resulting filtered_TLE3 data frame will show the updated values in Trial and Participant based on the provided conditions.
# #Check for shift and increment Participant column
# for (i in 2:nrow(filtered_TLE3)){
# if (filtered_TLE3$Trial[i-1]==22 && filtered_TLE3$Trial[i]==1){
# filtered_TLE3$Participant[i]<-filtered_TLE3$Participant[i-1]+1
# }
# }
# library(dplyr)
# # Create a new column, MutatedParticipant to store the modified values of Participant column
# filtered_TLE3<- filtered_TLE3 %>%
# mutate(
# Participant = ifelse(
# Trial ==1 & lag(Trial)==22,
# lag(Participant)+1,
# Participant
# ))
#
# filtered_TLE3
# # Initialize increment variable
# increment<-0
#
# #Loop through each row
# for (p in 1:nrow(filtered_TLE3$)){
# #Check if the value in Move shifts from 22 to 1
# if (filtered_TLE3$Trial[p]==22 && filtered_TLE3$Trial[p+1]==1){
# # Increment Participant by +1 and update increment variable
# increment<-increment+1
# filtered_TLE3$Participant[p + 1]<- increment
# }
#}
# for (i in 2:19) {
# cycle <- floor((i-2)/18) # Calculate the cycle number
# start <- cycle * 18 + 1 # Calculate the starting value for each cycle
# end <- (cycle + 1) * 18 # Calculate the ending value for each cycle
# values_col2 <- rep(1:22, each = 18)[start:end]
# values_col3 <- ifelse(i == 2 || i == start, 1:(22 * (cycle + 1)), 1:(22 * cycle))
# df$column1[(i-2)*18 + 1:(i-1)*18] <- i
# df$column2[(i-2)*18 + 1:(i-1)*18] <- values_col2
# df$column3[(i-2)*18 + 1:(i-1)*18] <- values_col3
# }
# for (p in 1:highest_value_in_trial_column) {
# cycle <- floor((p-1)/22) # Calculate the cycle number
# start <- cycle * 22 + 1 # Calculate the starting value for each cycle
# end <- (cycle + 1) * 22 # Calculate the ending value for each cycle
# values <- ifelse(p == 1 || p == start, rep(cycle + 1, 22), rep(cycle, 22))
# TLE3$Participant[(p-1)*22 + 1:p*22] <- values
# TLE3$Trial[(p-1)*22 + 1:p*22] <- p
# }
# TLE3
# for (p in 1:nrow(TLE3)){
# if(!is.na(TLE3$Trial[p])){
# TLE3$Participant[p]<-TLE3$Participant[p]+1
# }
# }
# Increment Trial and Participant based on conditions
# for (p in 1:nrow(filtered_TLE3)) {
# if (filtered_TLE3$Trial[p] <- filtered_TLE3$Participant[i - 1] + 1
#filtered_TLE3
# In this example, filtered_TLE3 is a data frame with three columns: Move, Trial, and Participant.
#
# Move contains the values ranging from 2 to 19, repeated 18 times for each move number.
# Trial contains the values ranging from 1 to 22, repeating 18 times for each move number.
# Participant starts with a value of 1 and increases by 1 only when there is a shift in Trial from 22 to 1.
# The code iterates over the rows of the data frame, checking for the specified conditions. If Trial shifts from 22 to 1, both Trial and Participant are incremented by 1. If the value in Move increases from the previous row, only Trial is incremented by 1.
#
# The resulting filtered_TLE3 data frame will show the updated values in Trial and Participant based on the provided conditions.
# #Check for shift and increment Participant column
# for (i in 2:nrow(filtered_TLE3)){
# if (filtered_TLE3$Trial[i-1]==22 && filtered_TLE3$Trial[i]==1){
# filtered_TLE3$Participant[i]<-filtered_TLE3$Participant[i-1]+1
# }
# }
# library(dplyr)
# # Create a new column, MutatedParticipant to store the modified values of Participant column
# filtered_TLE3<- filtered_TLE3 %>%
# mutate(
# Participant = ifelse(
# Trial ==1 & lag(Trial)==22,
# lag(Participant)+1,
# Participant
# ))
#
# filtered_TLE3
# # Initialize increment variable
# increment<-0
#
# #Loop through each row
# for (p in 1:nrow(filtered_TLE3$)){
# #Check if the value in Move shifts from 22 to 1
# if (filtered_TLE3$Trial[p]==22 && filtered_TLE3$Trial[p+1]==1){
# # Increment Participant by +1 and update increment variable
# increment<-increment+1
# filtered_TLE3$Participant[p + 1]<- increment
# }
#}
# for (i in 2:19) {
# cycle <- floor((i-2)/18) # Calculate the cycle number
# start <- cycle * 18 + 1 # Calculate the starting value for each cycle
# end <- (cycle + 1) * 18 # Calculate the ending value for each cycle
# values_col2 <- rep(1:22, each = 18)[start:end]
# values_col3 <- ifelse(i == 2 || i == start, 1:(22 * (cycle + 1)), 1:(22 * cycle))
# df$column1[(i-2)*18 + 1:(i-1)*18] <- i
# df$column2[(i-2)*18 + 1:(i-1)*18] <- values_col2
# df$column3[(i-2)*18 + 1:(i-1)*18] <- values_col3
# }
# for (p in 1:highest_value_in_trial_column) {
# cycle <- floor((p-1)/22) # Calculate the cycle number
# start <- cycle * 22 + 1 # Calculate the starting value for each cycle
# end <- (cycle + 1) * 22 # Calculate the ending value for each cycle
# values <- ifelse(p == 1 || p == start, rep(cycle + 1, 22), rep(cycle, 22))
# TLE3$Participant[(p-1)*22 + 1:p*22] <- values
# TLE3$Trial[(p-1)*22 + 1:p*22] <- p
# }
# TLE3
# for (p in 1:nrow(TLE3)){
# if(!is.na(TLE3$Trial[p])){
# TLE3$Participant[p]<-TLE3$Participant[p]+1
# }
# }
# index <- 1
#
# for (i in 1:nrow(TLE3)) {
# if (TLE3$Trial[i] == 1) {
# index <- index + 1
# }
# TLE3$Participant[i] <- index
# }
# increment <- FALSE
# index <- 1
#
# for (i in 1:nrow(TLE3)) {
# if (TLE3$column7[i] == 1) {
# increment <- TRUE
# index <- 1
# }
#
# if (TLE3$Trial[i] == 1) {
# increment <- TRUE
# TLE3$Participant[i] <- index
# index <- index + 1
# } else {
# increment <- FALSE
# }
#
# if (increment) {
# TLE3$Participant[i] <- index - 1
# }
#
# TLE3$column6[i] <- index
#
# for (t in 1:nrow(TLE3)){
# if ((t-1)%%18==0){
# if(TLE3$Trial[t]>22){
# TLE3$Trial[t]<-1
# }
# TLE3$Trial[t:(t+17)]<-TLE3$Trial[t]
# }
# if(TLE3$Trial[t]==1){
# TLE3$Participant[t]<-Participant.index
# Participant.index<-Participant.index +1
# }
# TLE3$Trial[t]<-Trial.index
# Trial.index<-Trial.index+1
# }