DATA 606 Week 2 Assignment

Question 2.8 - Poverty and Language

belowPL <- 0.146
nonEnglishSpeaking <- 0.207
bothCategories <- 0.042

grid.newpage()
draw.pairwise.venn(
                    area1 = belowPL, area2 = nonEnglishSpeaking, 
                    cross.area = bothCategories, category = c("Below Poverty Line", "Foreign Language Speakers")
                    , lty = rep("blank", 2), fill = c("light blue", "yellow"), alpha = rep(0.5, 2)
                    , cat.pos = c(0, 0), cat.dist = rep(0.025, 2)
                  )

## (polygon[GRID.polygon.1], polygon[GRID.polygon.2], polygon[GRID.polygon.3], polygon[GRID.polygon.4], text[GRID.text.5], text[GRID.text.6], text[GRID.text.7], text[GRID.text.8], text[GRID.text.9])

Question 2.20 - Assortative Mating

assortiveMatingStats <- read.csv("https://raw.githubusercontent.com/jbryer/DATA606Fall2016/master/Data/Data%20from%20openintro.org/Ch%202%20Exercise%20Data/assortive_mating.csv")
table(assortiveMatingStats)

##          partner_female
## self_male blue brown green
##     blue    78    23    13
##     brown   19    23    12
##     green   11     9    16

# (a) What is the probability that a randomly chosen male respondent or his partner has blue eyes?
# P(Male Blue or Female Blue) = P(Male Blue) + P(Female Blue) - ( P(Male Blue total) + P(Female Blue Total) )
((sum(assortiveMatingStats$self_male =="blue")/nrow(assortiveMatingStats)) + (sum(assortiveMatingStats$partner_female =="blue")/nrow(assortiveMatingStats)))

## [1] 1.088235

  - (sum(assortiveMatingStats$self_male =="blue" & assortiveMatingStats$partner_female=="blue")/nrow(assortiveMatingStats))

## [1] -0.3823529

# (b) What is the probability that a randomly chosen male respondent with blue eyes has a partner with blue eyes?
# P(Male Blue and Female Blue)
(sum(assortiveMatingStats$self_male =="blue" & assortiveMatingStats$partner_female=="blue")/nrow(assortiveMatingStats))

## [1] 0.3823529

# (c) What is the probability that a randomly chosen male respondent with brown eyes has a partner with blue eyes? 
# P(male Brown and Female Blue) = P(Male Brown) + P(Female Blue)
( sum(assortiveMatingStats$self_male =="brown" & assortiveMatingStats$partner_female == "blue") / nrow(assortiveMatingStats) )/( sum(assortiveMatingStats$self_male =="brown") / nrow(assortiveMatingStats) )

## [1] 0.3518519

# What about the probability of a randomly chosen male respondent with green eyes having a partner with blue eyes?
( sum(assortiveMatingStats$self_male =="green" & assortiveMatingStats$partner_female == "blue") / nrow(assortiveMatingStats) )/( sum(assortiveMatingStats$self_male =="green") / nrow(assortiveMatingStats) )

## [1] 0.3055556

# (d) Does it appear that the eye colors of male respondents and their partners are independent? Explain your reasoning.
(sum(assortiveMatingStats$self_male =="blue" & assortiveMatingStats$partner_female=="blue")/nrow(assortiveMatingStats)) # Blue Male with Blue Female

## [1] 0.3823529

(sum(assortiveMatingStats$self_male =="blue")/nrow(assortiveMatingStats)) # Blue Male ratio

## [1] 0.5588235

# No, they are not independent. Probability of men with blue eyes are not equal to the female partners.

Question 2.30 - Books on a bookshelf

books <- read.csv("https://raw.githubusercontent.com/jbryer/DATA606Fall2016/master/Data/Data%20from%20openintro.org/Ch%202%20Exercise%20Data/books.csv")
table(books)

##             format
## type         hardcover paperback
##   fiction           13        59
##   nonfiction        15         8

# (a) Find the probability of drawing a hardcover book first then a paperback fiction book second when drawing without replacement.
(sum(books$format == "hardcover")/nrow(books)) * (sum(books$format == "paperback" & books$type == "fiction")/(nrow(books)-1) )

## [1] 0.1849944

# (b) Determine the probability of drawing a fiction book first and then a hardcover book second, when drawing without replacement.
(sum(books$type == "fiction")/nrow(books)) * (sum(books$format == "hardcover")/(nrow(books)-1) )

## [1] 0.2257559

# (c) Calculate the probability of the scenario in part (b), except this time complete the calculations 
# under the scenario where the first book is placed back on the bookcase before randomly drawing the second book.
(sum(books$type == "fiction")/nrow(books)) * (sum(books$format == "hardcover")/nrow(books) )

## [1] 0.2233795

# (d) The final answers to parts (b) and (c) are very similar. Explain why this is the case.
# Because the difference in the number of books is only one which is very small number compared to the total books size 95

Question 2.38 - Baggage fees

baggageCharge <- c(0, 25, 70)
probBags <- c(0.54, 0.34, 0.12)
baggageDF <- data.frame(baggageCharge, probBags)
names(baggageDF) <- c("Charge", "Probability")
baggageDF

##   Charge Probability
## 1      0        0.54
## 2     25        0.34
## 3     70        0.12

# (a) Build a probability model, compute the average revenue per passenger, and compute the corresponding standard deviation.
# Average Revenue per Passenger
averageRevenue <- (sum((baggageDF$Charge * baggageDF$Probability)))
averageRevenue

## [1] 16.9

Question 2.44 - Income and gender

# 2.44 The relative frequency table below displays the distribution of annual total personal income 
# (in 2009 inflation-adjusted dollars) for a representative sample of 96,420,486 Americans. 
# These data come from the American Community Survey for 2005-2009. 
# This sample is comprised of 59% males and 41% females.

# (a) Describe the distribution of total personal income.

annualIncome <- c(
                  "$1 to $9,999"
                  ,"$10,000 to $14,999"
                  ,"$15,000 to $24,999"
                  ,"$25,000 to $34,999"
                  ,"$35,000 to $49,999"
                  ,"$50,000 to $64,999"
                  ,"$65,000 to $74,999"
                  ,"$75,000 to $99,999"
                  ,"$100,000 or more"
                )
populationTotal <- c(
                    2.2
                    ,4.7
                    ,15.8
                    ,18.3
                    ,21.2
                    ,13.9
                    ,5.8
                    ,8.4
                    ,9.7
                  )

incomePopulationDF <- data.frame(annualIncome, populationTotal)
incomePopulationDF

##         annualIncome populationTotal
## 1       $1 to $9,999             2.2
## 2 $10,000 to $14,999             4.7
## 3 $15,000 to $24,999            15.8
## 4 $25,000 to $34,999            18.3
## 5 $35,000 to $49,999            21.2
## 6 $50,000 to $64,999            13.9
## 7 $65,000 to $74,999             5.8
## 8 $75,000 to $99,999             8.4
## 9   $100,000 or more             9.7

barplot(incomePopulationDF$populationTotal, main="Income Distribution", xlab="% of Population")

# Probability that a randomly chosen US resident makes less than $50,000 per year
(2.2 + 4.7 + 15.8 + 18.3 + 21.2) / 100

## [1] 0.622

DATA 606 Week 2 Assignment - Probability

Kalyanaraman Parthasarathy

Question 2.8 - Poverty and Language

Question 2.20 - Assortative Mating

Question 2.30 - Books on a bookshelf

Question 2.38 - Baggage fees

Question 2.44 - Income and gender