This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
#a) p(sum=1) = 0, since sum must be bigger than 1.
0
## [1] 0
#b) (sum=5) = [(1,4) + (4,1)] + [(2,3) + (3,2)] = 4, p(sum=5) = 4/36
4/36
## [1] 0.1111111
#c) (sum=12) = [(6,6)] = 1, p(sum=12) = 1/36
1/36
## [1] 0.02777778
#a) No, because both events can happen at the same time. 4.2% fall into both categories.
#b)
library(VennDiagram)
## Warning: package 'VennDiagram' was built under R version 3.4.3
## Loading required package: grid
## Loading required package: futile.logger
## Warning: package 'futile.logger' was built under R version 3.4.3
poverty <- 14.6
forlanguage <- 20.7
both <- 4.2
povEng <- poverty - both
forlanguageOnly <- forlanguage - both
venn.plot <- draw.pairwise.venn(poverty,
forlanguage,
cross.area=both,
c("Poverty", "Foreign language"),
fill=c("yellow", "lightblue"),
cat.dist=-0.08,
ind=FALSE)
grid.draw(venn.plot)
#c)
povEng
## [1] 10.4
#d)
poverty + forlanguage - both
## [1] 31.1
#e)
#English Only - English Only in Poverty = English Only above poverty
(100 - forlanguage) - (povEng)
## [1] 68.9
#f)
#It is not independent since p(pov and forlanguage) is not equal to p(both).
poverty/100 * forlanguage/100
## [1] 0.030222
both/100
## [1] 0.042
#a)
#p(male is blue) + p(female is blue) - p(both are blue)
114/204 + 108/204 - 78/204
## [1] 0.7058824
#b)
#p(female is blue|male is blue), assuming that the question is asking for conditional probability
78/114
## [1] 0.6842105
#c)
#p(female is blue|male is brown), assuming that the question is asking for conditional probability
19/54
## [1] 0.3518519
#p(female is blue|male is green), assuming that the question is asking for conditional probability
11/36
## [1] 0.3055556
#d)
#Proportionately speaking, looking at the table, it seems apparent that male tend to prefer partner with same eye color. Therefore, the eyecorlors of male and female are not independent.
#2.30
hc <- c(13,15)
pb <- c(59,8)
total <- c(72,23)
df <- data.frame(hc, pb, total)
sum <- c(sum(df[,"hc"]),sum(df[,"pb"]),sum(df[,"total"]))
df <- rbind(df,sum)
row.names(df) <- c("fic", "non-fic", "total")
df
## hc pb total
## fic 13 59 72
## non-fic 15 8 23
## total 28 67 95
#a)
#p(hc) * p(pb fiction) -- without replacement
28/95 * 59/94
## [1] 0.1849944
#b)
#p(fic) * p(hc) -- without replacement
72/95 * 28/94
## [1] 0.2257559
#c)
#p(fic) * p(hc) -- with replacement
72/95 * 28/95
## [1] 0.2233795
#d)
#The answers are similar to each other because the difference is just 1 book (94 vs 95). The more you draw a book without replacement, the larger difference you would get. (1 vs 2 vs 3 vs ..... vs 95)
#a)
prob <- c(0.54, 0.34, 0.12)
bags <- c(0, 1, 2)
fees <- c(0, 25, 25 + 35)
exp.value <- prob * fees
avg.rev.pp <- sum(exp.value)
diff.mean <- fees - avg.rev.pp
diff.mean.sqr <- (diff.mean)^2
diff.mean.sqrTimesProb <- diff.mean.sqr * prob
var.rev.pp <- sum(diff.mean.sqrTimesProb)
df <- rbind(bags,prob,fees,exp.value,diff.mean,diff.mean.sqr,diff.mean.sqrTimesProb)
std.rev.pp <- sqrt(var.rev.pp)
#average revenue per pessenger
avg.rev.pp
## [1] 15.7
#corresponding standard deviation
std.rev.pp
## [1] 19.95019
#b)
revenue.passenger.upper <- (avg.rev.pp * 120) + std.rev.pp
revenue.passenger.lower <- (avg.rev.pp * 120) - std.rev.pp
revenue.passenger.upper
## [1] 1903.95
revenue.passenger.lower
## [1] 1864.05
#You are expecting [$1864 to $1904]
income <- c("$1 - $9,999 or loss",
"$10,000 to $14,999",
"$15,000 to $24,999",
"$25,000 to $34,999",
"$35,000 to $49,999",
"$50,000 to $64,000",
"$65,000 to $74,999",
"$75,000 to $99,999",
"$100,000 or more")
bounds <- c(1, 10000, 15000, 25000, 35000, 50000, 65000, 75000, 100000)
size <- c(9999, 4999, 9999, 9999, 14999, 14999, 9999, 24999, 99999)
center <- bounds + (size / 2)
total <- c(0.022, 0.047, 0.158, 0.183, 0.212, 0.139, 0.058, 0.084, 0.097)
df2 <- data.frame(income, center, total)
df2
## income center total
## 1 $1 - $9,999 or loss 5000.5 0.022
## 2 $10,000 to $14,999 12499.5 0.047
## 3 $15,000 to $24,999 19999.5 0.158
## 4 $25,000 to $34,999 29999.5 0.183
## 5 $35,000 to $49,999 42499.5 0.212
## 6 $50,000 to $64,000 57499.5 0.139
## 7 $65,000 to $74,999 69999.5 0.058
## 8 $75,000 to $99,999 87499.5 0.084
## 9 $100,000 or more 149999.5 0.097
total <- c(0.022,0.047,0.158,0.183,0.212,0.139,0.058,0.084,0.097)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.3
graph <- ggplot(data=df2) +
geom_bar(aes(x=center, y=total, width=size), stat='identity', position="identity") +
labs(x="Income ($)",
y="Relative Frequency",
title="Survey for 2005-2009")
## Warning: Ignoring unknown aesthetics: width
#It is bimodal that peaks around $35K to $50K and for $100K+.
graph
#b)
#p(less 50k)
less_50k_prob <- sum(df2[1:5,]$total)
#c)
f_prob <- 0.41
#p(less 50k) * p(female)
less_50k_prob * f_prob
## [1] 0.25502
#d)
f_prob_data_less50k <- 0.718
#the value is quite different from c). p(less 50k|female) * p(female)
f_prob_data_less50k * f_prob
## [1] 0.29438
#Check whether P(less 50k and female) = p(less 50k) * p(female) = p(less 50k|female) * p(female)
less_50k_prob * f_prob
## [1] 0.25502
f_prob_data_less50k * f_prob
## [1] 0.29438
#Since less_50k_prob is not equalt to f_prob_data_less50k, we can say making less than 50k and being female are not independent events.