4 / 36
## [1] 0.1111111
1 / 36
## [1] 0.02777778
#install.packages('VennDiagram')
library(VennDiagram)
# in %
below_poverty <- 14.6
foreign_language <- 20.7
cross <- 4.2
venn <- draw.pairwise.venn(below_poverty, foreign_language, cross,
c("BelowPoverty", "ForeignLanguage"))
grid.draw(venn)
# P(below poverty line or speak foreign language) = P(bp) + P(fl) − P(both)
bp_or_fl <- below_poverty + foreign_language - cross
bp_or_fl
## [1] 31.1
# P(neither bp nor fl) = 1 - P(bp or fl)
100 - bp_or_fl
## [1] 68.9
(below_poverty*foreign_language) / 100
## [1] 3.0222
cross/foreign_language
## [1] 0.2028986
below_poverty
## [1] 14.6
# A = Female Blue
# B = Male Blue
total <- 204
female_blue <- 108
male_blue <- 114
both_blue <- 78
#probability of female blue
p_fb <- female_blue/total
#probability of male blue
p_mb <- male_blue/total
#probability of male and female both blue
p_bnb <- both_blue/total
#probability of male or female blue
p_bub <- p_fb + p_mb - p_bnb
p_bnb / p_mb
## [1] 0.6842105
19 / 54
## [1] 0.3518519
11 / 36
## [1] 0.3055556
# P(Female Blue | Male Brown)
19/54
## [1] 0.3518519
# P(Female Blue)
p_fb
## [1] 0.5294118
#marginal probability for hard cover
hc <- 28/95
#joint probability for paperback fiction without replacement
pf <- 59/94
hc*pf
## [1] 0.1849944
#marginal probability for fiction
f <- 72/95
# marginal probability of hardcover fiction without replacement
hf <- 28/94
f*hf
## [1] 0.2257559
f
## [1] 0.7578947
# marginal probability of hardcover fiction is based on replacement
hf_rep <- 28/95
f * hf_rep
## [1] 0.2233795
luggages <- c("0", "1", "2")
price <- c(0, 25, 25+35)
p_pax <- c(0.54, 0.34, 0.12)
# Expected Value
X <- price*p_pax
EV <- sum(price * p_pax)
EV
## [1] 15.7
# SD
sd <- sqrt(0.54*(0-EV)^2 + 0.34*(25-EV)^2 + 0.12*(60-EV)^2)
sd
## [1] 19.95019
pax <- 120
pax * EV
## [1] 1884
# sqrt of pax * variance
sqrt(pax*(19.95^2))
## [1] 218.5413
library(ggplot2)
income <- c("$1 to $9,999 or loss",
"$10,000 to $14,999",
"$15,000 to $24,999",
"$25,000 to $34,999",
"$35,000 to $49,999",
"$50,000 to $64,000",
"$65,000 to $74,999",
"$75,000 to $99,999",
"$100,000 or more")
total <- c(.022, .047, .158, .183, .212, .139, .058, .084, 0.097)
income_gender <- data.frame(income, total)
ggplot(income_gender, aes(income, total)) + geom_bar(stat = "identity") + theme(axis.text.x = element_text(angle = 45, hjust = 1))
(a): Assuming that there are outliers on the high end due to the nature of the data, the distribution is right skewed, with a median between $35,000 - $49,999. The IQR distribution is roughly at about 30,000.
(b): 62.2%
less_than_50 <- sum(income_gender$total[1:5])
less_than_50
## [1] 0.622
less_than_50 * 0.41
## [1] 0.25502