Practice Set 5.1

library(tidyverse)

Warning: package 'tidyverse' was built under R version 4.3.3

Warning: package 'ggplot2' was built under R version 4.3.3

Warning: package 'tibble' was built under R version 4.3.3

Warning: package 'tidyr' was built under R version 4.3.3

Warning: package 'readr' was built under R version 4.3.3

Warning: package 'purrr' was built under R version 4.3.3

Warning: package 'dplyr' was built under R version 4.3.3

Warning: package 'stringr' was built under R version 4.3.3

Warning: package 'forcats' was built under R version 4.3.3

Warning: package 'lubridate' was built under R version 4.3.3

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(purrrfect)


Attaching package: 'purrrfect'

The following objects are masked from 'package:base':

    replicate, tabulate

Question 6

Part C F(3,4)

(dfQ61 <- parameters(~x,~y,0:3,0:4)
 %>%mutate(pxy =  pmap_dbl(list(x,y), \(x,y) dpois(x,2)*dbinom(y,x,1/2)))
 %>%summarize("F(3,4)" =sum(pxy))
)

# A tibble: 1 × 1
  `F(3,4)`
     <dbl>
1    0.857

Part C F(4,2)

(dfQ62 <- parameters(~x,~y,0:4,0:2)
 %>%mutate(pxy =  pmap_dbl(list(x,y), \(x,y) dpois(x,2)*dbinom(y,x,1/2)))
 %>%summarize("F(3,4)" =sum(pxy))
)

# A tibble: 1 × 1
  `F(3,4)`
     <dbl>
1    0.897

Question 7

Part A

Setting Up dataframe

xy <- \() {
  coins <- sample(c("H","T"),3,replace = TRUE)
  x <- sum(coins =="H")
  y <- case_when(x==0 ~ -1,
                 coins[1]=="H" ~ 1,
                 coins[2]=="H" ~ 2,
                 coins[3]=="H" ~ 3)
  return(c(x,y))
}

(df2 <- replicate(10000,xy(),.as = coins)
  %>%mutate(x = map_dbl(coins,\(x) x[1]),y = map_dbl(coins,\(x) x[2]))
)

# A tibble: 10,000 × 4
   .trial coins         x     y
    <dbl> <list>    <dbl> <dbl>
 1      1 <dbl [2]>     2     1
 2      2 <dbl [2]>     1     2
 3      3 <dbl [2]>     1     2
 4      4 <dbl [2]>     1     3
 5      5 <dbl [2]>     2     2
 6      6 <dbl [2]>     1     3
 7      7 <dbl [2]>     3     1
 8      8 <dbl [2]>     3     1
 9      9 <dbl [2]>     0    -1
10     10 <dbl [2]>     3     1
# ℹ 9,990 more rows

Making Table

(df2
  %>%summarize("count" = n(), .by = c(x,y))
  %>%mutate(pxy = count/10000)
  %>%arrange(y)
  %>%pivot_wider(names_from = x,
                 names_sort = TRUE,
                 values_from = pxy,
                 names_prefix = "x=",
                 id_cols = y)
)

# A tibble: 4 × 5
      y  `x=0`  `x=1`  `x=2`  `x=3`
  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
1    -1  0.126 NA     NA     NA    
2     1 NA      0.122  0.247  0.130
3     2 NA      0.127  0.121 NA    
4     3 NA      0.126 NA     NA

Part B

Making jittered plot

(ggplot(data = df2) +
geom_jitter(aes(x = x, y = y,), width=.15, height=.35, size = .4, alpha = .8) +
scale_x_continuous(breaks = 0:3) +
scale_y_continuous(breaks = -1:3) +
theme_classic(base_size = 20))

Part C

Finding \(p_{X|Y}(x|1)\)

(df2
  %>%filter(y==1)
  %>%summarize("count" = n(), .by = c(x,y))
  %>%mutate(pxy = count/sum(count))
  %>%arrange(x)
)

# A tibble: 3 × 4
      x     y count   pxy
  <dbl> <dbl> <int> <dbl>
1     1     1  1224 0.245
2     2     1  2467 0.494
3     3     1  1305 0.261

Question 8

Part A

Setting up dataframe

x_y <- \(){
  group <- sample(rep(c("m","nm","d"),c(4,3,2)),3,replace = FALSE)
  x <- sum(group=="m")
  y <- sum(group =="nm")
  return(c(x,y))
}

(df3 <- replicate(10000,x_y(),.as = group)
  %>%mutate(x = map_dbl(group,\(x) x[1]),y = map_dbl(group,\(x) x[2]))
)

# A tibble: 10,000 × 4
   .trial group         x     y
    <dbl> <list>    <dbl> <dbl>
 1      1 <int [2]>     1     1
 2      2 <int [2]>     1     2
 3      3 <int [2]>     2     0
 4      4 <int [2]>     3     0
 5      5 <int [2]>     0     2
 6      6 <int [2]>     1     0
 7      7 <int [2]>     1     1
 8      8 <int [2]>     1     1
 9      9 <int [2]>     3     0
10     10 <int [2]>     0     3
# ℹ 9,990 more rows

Creating Pivot Table

(df3
  %>%summarize("count" = n(), .by = c(x,y))
  %>%mutate(pxy = count/10000)
  %>%arrange(y)
  %>%pivot_wider(names_from = x,
                 names_sort = TRUE,
                 values_from = pxy,
                 names_prefix = "x=",
                 id_cols = y)
)

# A tibble: 4 × 5
      y   `x=0`   `x=1`  `x=2`   `x=3`
  <dbl>   <dbl>   <dbl>  <dbl>   <dbl>
1     0 NA       0.0427  0.142  0.0446
2     1  0.0388  0.294   0.210 NA     
3     2  0.0763  0.139  NA     NA     
4     3  0.0121 NA      NA     NA

Part B

Create jitter plot

(ggplot(data = df3) +
geom_jitter(aes(x = x, y = y,), width=.15, height=.35, size = .4, alpha = .8) +
scale_x_continuous(breaks = 0:3) +
scale_y_continuous(breaks = 0:3) +
theme_classic(base_size = 20))

Part C

Finding \(p_{X|Y}(x|2)\)

(df3
  %>%filter(y==2)
  %>%summarize("count" = n(), .by = c(x,y))
  %>%mutate(pxy = count/sum(count))
  %>%arrange(x)
)

# A tibble: 2 × 4
      x     y count   pxy
  <dbl> <dbl> <int> <dbl>
1     0     2   763 0.354
2     1     2  1391 0.646