#install.packages("googlesheets4")
#https://github.com/tidyverse/googlesheets4
library(googlesheets4)
library(ggplot2)
beans <- read_sheet("https://docs.google.com/spreadsheets/d/1uet87MtwnXE16_ldijFgtu4aSv2vJ5qxN4luQxyw_p4/edit#gid=1780467196")
## Registered S3 method overwritten by 'openssl':
## method from
## print.bytes Rcpp
## Using an auto-discovered, cached token.
## To suppress this message, modify your code or options to clearly consent to the use of a cached token.
## See gargle's "Non-interactive auth" vignette for more details:
## https://gargle.r-lib.org/articles/non-interactive-auth.html
## The googlesheets4 package is using a cached token for tiagoandremarques@gmail.com.
## Reading from 'Untitled spreadsheet'
##
truebeans <- 2620
the actual estimas, in the order they came in
beans$`What is your best guess for the number of beans in the jar?`
## [1] 1200 3375 672 996 2419 2312 3333 150 800 850 1500 897 6200 400
## [15] 330 3000 1500 789 1500 1000 2000 953 3999 540 897 3060 1136 1000
## [29] 282 324 1745 1089 2000 5000 301 2500 1500 1000 1369 1800 989 3200
## [43] 650 2200
and sorted for easier reading
beans$`What is your best guess for the number of beans in the jar?`
## [1] 1200 3375 672 996 2419 2312 3333 150 800 850 1500 897 6200 400
## [15] 330 3000 1500 789 1500 1000 2000 953 3999 540 897 3060 1136 1000
## [29] 282 324 1745 1089 2000 5000 301 2500 1500 1000 1369 1800 989 3200
## [43] 650 2200
ests <- beans$`What is your best guess for the number of beans in the jar?`
The minimum estimate was 150 beans, the maximum estimate was 6200 beans, and the mean estimate was 1168. The true number of beans in the jar was 2620 beans.
hist(beans$`What is your best guess for the number of beans in the jar?`,main="Beans: You've bean estimated",xlab="Estimate",breaks=seq(0,7000,250))
abline(v=truebeans,lty=2,col="green")
abline(v=mean(beans$`What is your best guess for the number of beans in the jar?`),lty=2,col="red")
legend("topright",legend=c("Truth=2620","Estimated mean"),lty=2,col=c("green","red"))
ggplot(data=beans, aes(`What is your best guess for the number of beans in the jar?`)) +
geom_histogram()+geom_vline(xintercept = 2620,linetype="dotted",
color = "blue", size=1.5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Now by sex
#about sex
p<-ggplot(beans, aes(x=`What is your best guess for the number of beans in the jar?`, y=`What is your gender?`, color=`What is your gender?`)) +
geom_boxplot()+geom_vline(xintercept = truebeans,linetype="dotted",
color = "blue", size=1.5)
p
About ISEC
#About ISEC
p<-ggplot(beans, aes(x=`What is your best guess for the number of beans in the jar?`, y=`Have you ever attended an ISEC conference?`, color=`Have you ever attended an ISEC conference?`)) +
geom_boxplot()+geom_vline(xintercept = truebeans,linetype="dotted",
color = "blue", size=1.5)
p
About country
#about country
p<-ggplot(beans, aes(x=`What is your best guess for the number of beans in the jar?`, y=`Which country do you live in?`, color=`Which country do you live in?`)) +
geom_boxplot()+geom_vline(xintercept = truebeans,linetype="dotted",
color = "blue", size=1.5)
p
About children
#about children
p<-ggplot(beans, aes(x=`What is your best guess for the number of beans in the jar?`, y=`Do you have children?`, color=`Do you have children?`)) +
geom_boxplot()+geom_vline(xintercept = truebeans,linetype="dotted",
color = "blue", size=1.5)
p
Age vs estimate
plot(beans$`How old are you?`,beans$`What is your best guess for the number of beans in the jar?`)
Influence of age on error?
beans$error <- beans$`What is your best guess for the number of beans in the jar?`-truebeans
plot(beans$`How old are you?`,beans$error)
Errors:
sort(beans$error)
## [1] -2470 -2338 -2319 -2296 -2290 -2220 -2080 -1970 -1948 -1831 -1820
## [12] -1770 -1723 -1723 -1667 -1631 -1624 -1620 -1620 -1620 -1531 -1484
## [23] -1420 -1251 -1120 -1120 -1120 -1120 -875 -820 -620 -620 -420
## [34] -308 -201 -120 380 440 580 713 755 1379 2380 3580