Problem 1:

According to Kantar Media (March 13, 2020), the top four primetime television shows in the Philippines were Ang Probinsyano (Prob), Make It With You (MIWY), Prima Donnas (PD), and Descendants of the Sun Philippine Adaptation (DS). Data indicating the preferred shows for a sample of 50 viewers follow. (15 points)

Prob, PD, MIWY, MIWY, Prob, MIWY, DS, PD, Prob, Prob, MIWY, Prob, Prob, MIWY, PD, Prob, PD, Prob, MIWY, PD, PD, PD, DS, PD, Prob, DS, Prob, Prob, PD, Prob, Prob, MIWY, Prob, DS, PD, Prob, Prob, PD, Prob, DS, DS, Prob, DS, Prob, DS, MIWY, MIWY, Prob, MIWY, PD

\(~\)

  1. Construct a frequency, relative frequency, and percent frequency distribution for the sample.
shows <- c("Prob","PD", "MIWY", "MIWY", "Prob", "MIWY", "DS", "PD", "Prob", "Prob", "MIWY", "Prob", "Prob", "MIWY", "PD", "Prob", "PD", "Prob", "MIWY", "PD", "PD", "PD", "DS", "PD", "Prob", "DS", "Prob", "Prob", "PD", "Prob", "Prob", "MIWY", "Prob", "DS", "PD", "Prob", "Prob", "PD", "Prob", "DS", "DS", "Prob", "DS", "Prob", "DS", "MIWY", "MIWY", "Prob", "MIWY", "PD")

data.freq <- table(shows)
data.relfreq <- data.freq/sum(data.freq)
data.pctfreq <- data.relfreq*100
freq.dist <- cbind(data.freq, data.relfreq, data.pctfreq)
colnames(freq.dist) <- c("Frequency", "Relative Frequency", "Percent Frequency")
pander(freq.dist)
  Frequency Relative Frequency Percent Frequency
DS 8 0.16 16
MIWY 10 0.2 20
PD 12 0.24 24
Prob 20 0.4 40

\(~\)

  1. Construct a bar chart and a pie chart for the sample.
library(tidyverse)
library(forcats)
shows <- as.data.frame(shows)
bar <- ggplot(shows, aes(x=shows)) + geom_bar(width = 0.5) + ggtitle("Primetime Television Shows")
bar

bar1 <- ggplot(mutate(shows, Shows = fct_infreq(shows))) + geom_bar(aes(x = Shows), width = 0.5) + ggtitle("Primetime Television Shows")
bar1

freq <- c(8, 10, 12, 20)
labels <- c("DS", "MIWY", "PD", "Prob")
percents <- round(freq/sum(freq)*100, 1)
labels <- paste(labels, percents)
labels <- paste(labels, "%", sep = " ")
piechart <- pie(freq, labels = labels, col = rainbow (length(labels)), main = "Pie Chart of Primetime Television Shows")

piechart
## NULL

\(~\)

  1. On the basis of the sample, which television show has the largest viewing audience? Which is second?

The television show with the largest viewing audience is Ang Probinsyano, with 40% audience share, followed by Prima Donnas, with 24% audience share.

\(~\)

Problem 2:

The data below shows the time in days required to complete year-end audits for a sample of 20 clients of Sanderson and Clifford, a small public accounting firm. Construct a dot plot for the sample. (5 points)

\(~\)

    Year-end Audit Time (in days)     

Data: 12, 20, 14, 15, 21, 18, 22, 18, 17, 13, 15, 22, 14, 27, 18, 19, 33, 16, 23, 28

time <- c(12, 20, 14, 15, 21, 18, 22, 18, 17, 13, 15, 22, 14, 27, 18, 19, 33, 16, 23, 28)
ID <- 1:20

data <- data.frame(ID, time)
dotplot <- ggplot(data, aes(time)) + geom_dotplot(binwidth = 0.8) + ggtitle("Dotplot of Year-end Audit Time (in days)")
dotplot

\(~\)

Problem 3:

Use the file salaries.csv to construct a crosstabulation of the following pairs of variables: (15 points)

\(~\)

  1. Rank (row variable) vs. Discipline (column variable)
library(readr)
library(summarytools)
library(pander)
data <- read.csv("salaries.csv")
crosstab1 <- ctable(x = data$rank, y = data$discipline)
pander(crosstab1)
crosstab1
  Cross-Tabulation, Row Proportions  
  rank * discipline  
  Data Frame: data  
  
  ----------- ------------ ------------- ------------- --------------
                discipline             A             B          Total
         rank                                                        
    AssocProf                 26 (40.6%)    38 (59.4%)    64 (100.0%)
     AsstProf                 24 (35.8%)    43 (64.2%)    67 (100.0%)
         Prof                131 (49.2%)   135 (50.8%)   266 (100.0%)
        Total                181 (45.6%)   216 (54.4%)   397 (100.0%)
  ----------- ------------ ------------- ------------- --------------

\(~\)

  1. Rank (row variable) vs. Years of Service (column variable, grouped by 10s)
breaks <- seq(0, 70, by = 10)
data$yrsint <- cut(data$yrs.service, breaks, right = FALSE)
crosstab2 <- ctable(x = data$rank, y = data$yrsint)
pander(crosstab2)
crosstab2
  Cross-Tabulation, Row Proportions  
  rank * yrsint  
  Data Frame: data  
  
  ----------- -------- -------------- ------------ ------------ ------------ ----------- ---------- ---------- --------------
                yrsint         [0,10)      [10,20)      [20,30)      [30,40)     [40,50)    [50,60)    [60,70)          Total
         rank                                                                                                                
    AssocProf             41 ( 64.1%)   13 (20.3%)    6 ( 9.4%)    2 ( 3.1%)    1 (1.6%)   1 (1.6%)   0 (0.0%)    64 (100.0%)
     AsstProf             67 (100.0%)    0 ( 0.0%)    0 ( 0.0%)    0 ( 0.0%)    0 (0.0%)   0 (0.0%)   0 (0.0%)    67 (100.0%)
         Prof             36 ( 13.5%)   83 (31.2%)   72 (27.1%)   51 (19.2%)   21 (7.9%)   2 (0.8%)   1 (0.4%)   266 (100.0%)
        Total            144 ( 36.3%)   96 (24.2%)   78 (19.6%)   53 (13.4%)   22 (5.5%)   3 (0.8%)   1 (0.3%)   397 (100.0%)
  ----------- -------- -------------- ------------ ------------ ------------ ----------- ---------- ---------- --------------

\(~\)

  1. Rank (row variable) vs. Salary (column variable, grouped by $25000s)
breaks <- seq(50000, 250000, by = 25000)
data$SalaryInt <- cut(data$salary, breaks, right = FALSE, dig.lab = 7)
crosstab3 <- ctable(x = data$rank, y = data$SalaryInt)
pander(crosstab3)
crosstab3
  Cross-Tabulation, Row Proportions  
  rank * SalaryInt  
  Data Frame: data  
  
  ----------- ----------- --------------- ---------------- ----------------- ----------------- ----------------- ----------------- ----------------- ----------------- --------------
                SalaryInt   [50000,75000)   [75000,100000)   [100000,125000)   [125000,150000)   [150000,175000)   [175000,200000)   [200000,225000)   [225000,250000)          Total
         rank                                                                                                                                                                        
    AssocProf                   8 (12.5%)       28 (43.8%)        27 (42.2%)         1 ( 1.6%)         0 ( 0.0%)          0 (0.0%)          0 (0.0%)          0 (0.0%)    64 (100.0%)
     AsstProf                  21 (31.3%)       46 (68.7%)         0 ( 0.0%)         0 ( 0.0%)         0 ( 0.0%)          0 (0.0%)          0 (0.0%)          0 (0.0%)    67 (100.0%)
         Prof                   3 ( 1.1%)       34 (12.8%)       101 (38.0%)        73 (27.4%)        41 (15.4%)         11 (4.1%)          2 (0.8%)          1 (0.4%)   266 (100.0%)
        Total                  32 ( 8.1%)      108 (27.2%)       128 (32.2%)        74 (18.6%)        41 (10.3%)         11 (2.8%)          2 (0.5%)          1 (0.3%)   397 (100.0%)
  ----------- ----------- --------------- ---------------- ----------------- ----------------- ----------------- ----------------- ----------------- ----------------- --------------