STAT50 MIDTERM EXAM

library(readxl)
Luna <- read_excel("E:/STAT50/MTE/Alliah1.xlsx")
Luna

## # A tibble: 158 × 38
##    Responde…¹ School   Age Sex   Salar…² Marit…³ Year …⁴ Posit…⁵ Highe…⁶ `S-C 1`
##         <dbl> <chr>  <dbl> <chr> <chr>   <chr>   <chr>   <chr>   <chr>     <dbl>
##  1          1 VCCS      30 Male  25,001… Married 2       T-I     Colleg…       4
##  2          2 VCCS      38 Fema… 27,001… Married 2       T-II    Colleg…       3
##  3          3 VCCS      40 Fema… 27,001… Married 2       T-II    Colleg…       5
##  4          4 VCCS      40 Male  25,001… Married 4       T-I     Colleg…       5
##  5          5 VCCS      54 Fema… 25,001… Married 2       T-I     Colleg…       4
##  6          6 VCCS      41 Fema… 25,001… Married 2       T-I     Colleg…       5
##  7          7 VCCS      40 Fema… 25,001… Married 3       T-I     Colleg…       5
##  8          8 VCCS      63 Fema… 31,001… Married 3       T-III   MAT           5
##  9          9 VCCS      53 Fema… 29,001… Married 1       T-III   Colleg…       4
## 10         10 VCCS      46 Male  25,001… Married 6       T-I     Colleg…       4
## # … with 148 more rows, 28 more variables: `S-C 2` <dbl>, `S-C 3` <dbl>,
## #   `S-C 4` <dbl>, `S-C 5` <dbl>, SCTotal <dbl>, AveSC <dbl>, `FK 1` <dbl>,
## #   `FK 2` <dbl>, `FK 3` <dbl>, `FK 4` <dbl>, `FK 5` <dbl>, FKTotal <dbl>,
## #   AveFK <dbl>, `PI 1` <dbl>, `PI 2` <dbl>, `PI 3` <dbl>, `PI 4` <dbl>,
## #   `PI 5` <dbl>, PITotal <dbl>, AvePI <dbl>, `RR 1` <dbl>, `RR 2` <dbl>,
## #   `RR 3` <dbl>, `RR 4` <dbl>, `RR 5` <dbl>, Rrtotal <dbl>, AveRR <dbl>,
## #   `Investment Alternatives` <chr>, and abbreviated variable names …

*Question 1

library(dplyr)
Luna %>%
  mutate(AveFKFrequency = recode(AveFK, "2.0"="Below Average", "2.6"="Average", "2.8"="Average", "3.0"="Average", "3.2"="Average", "3.4"="Average", "3.6"="Above Average", "3.8"="Above Average", "4.0"="Above Average", "4.2"="Above Average", "4.4"="Above Average","4.6"="Excellent", "4.8"="Excellent", "5.0"="Excellent")) %>%
  group_by(School,AveFKFrequency) %>%
  summarise(Frequency=n()) %>%
  mutate(Percentage = round(Frequency/sum(Frequency)*100,2))

## `summarise()` has grouped output by 'School'. You can override using the
## `.groups` argument.

## # A tibble: 7 × 4
## # Groups:   School [2]
##   School AveFKFrequency Frequency Percentage
##   <chr>  <chr>              <int>      <dbl>
## 1 VCCS   Above Average         41      66.1 
## 2 VCCS   Average                7      11.3 
## 3 VCCS   Excellent             14      22.6 
## 4 VNHS   Above Average         68      70.8 
## 5 VNHS   Average                7       7.29
## 6 VNHS   Below Average          2       2.08
## 7 VNHS   Excellent             19      19.8

*Question 2

library(dplyr)
Luna %>%
  mutate(AveSCFrequency = recode(AveSC, "2.4"="Below Average", "2.6"="Average", "2.8"="Average", "3.0"="Average", "3.2"="Average", "3.4"="Average", "3.6"="Above Average", "3.8"="Above Average", "4.0"="Above Average", "4.2"="Above Average", "4.4"="Above Average", "4.6"="Excellent", "4.8"="Excellent", "5.0"="Excellent")) %>%
  group_by(School, AveSCFrequency) %>%
  summarise(Frequency=n())%>%
  mutate(Percentage = round(Frequency/sum(Frequency)*100, 2))

## `summarise()` has grouped output by 'School'. You can override using the
## `.groups` argument.

## # A tibble: 8 × 4
## # Groups:   School [2]
##   School AveSCFrequency Frequency Percentage
##   <chr>  <chr>              <int>      <dbl>
## 1 VCCS   Above Average         38      61.3 
## 2 VCCS   Average               14      22.6 
## 3 VCCS   Below Average          1       1.61
## 4 VCCS   Excellent              9      14.5 
## 5 VNHS   Above Average         58      60.4 
## 6 VNHS   Average               24      25   
## 7 VNHS   Below Average          1       1.04
## 8 VNHS   Excellent             13      13.5

*Question 3

library(dplyr)
Luna %>%
  mutate(AvePIFrequency = recode(AvePI,  "1.6"=" Below Average", "2.2"="Below Average", "2.4"="Below Average", "2.6"="Average", "2.8"="Average", "3.0"="Average", "3.2"="Average", "3.4"="Average", "3.6"="Above Average", "3.8"="Above Average", "4.0"="Above Average", "4.2"="Above Average", "4.4"="Above Average", "4.6"="Excellent", "4.8"="Excellent", "5.0"="Excellent", "2.0"=" Below Average", "0.0"="Very Poor")) %>%
  group_by(School,AvePIFrequency) %>%
  summarise(Frequency=n()) %>%
  mutate(Percentage = round(Frequency/sum(Frequency)*100, 2))

## `summarise()` has grouped output by 'School'. You can override using the
## `.groups` argument.

## # A tibble: 10 × 4
## # Groups:   School [2]
##    School AvePIFrequency   Frequency Percentage
##    <chr>  <chr>                <int>      <dbl>
##  1 VCCS   "Above Average"         39      62.9 
##  2 VCCS   "Average"               15      24.2 
##  3 VCCS   "Below Average"          1       1.61
##  4 VCCS   "Excellent"              7      11.3 
##  5 VNHS   " Below Average"         3       3.12
##  6 VNHS   "Above Average"         49      51.0 
##  7 VNHS   "Average"               34      35.4 
##  8 VNHS   "Below Average"          4       4.17
##  9 VNHS   "Excellent"              5       5.21
## 10 VNHS   "Very Poor"              1       1.04

*Question 4

library(dplyr)
Luna %>%
  mutate(AveRRFrequency = recode (AveRR, "2.0"="Below Average", "2.4"="Below Average", "2.6"="Below Average", "2.8"="Average", "3.0"="Average", "3.2"="Average", "3.4"="Average", "3.6"="Above Average", "3.8"="Above Average", "4.0"="Above Average",  "4.2"="Above Average", "4.4"="Above Average", "4.6"="Excellent", "4.8"="Excellent", "5.0"="Excellent")) %>%
  group_by(School, AveRRFrequency) %>%
  summarise(Frequency=n()) %>%
  mutate(Percentage = round(Frequency/sum(Frequency)*100))

## `summarise()` has grouped output by 'School'. You can override using the
## `.groups` argument.

## # A tibble: 8 × 4
## # Groups:   School [2]
##   School AveRRFrequency Frequency Percentage
##   <chr>  <chr>              <int>      <dbl>
## 1 VCCS   Above Average         43         69
## 2 VCCS   Average               10         16
## 3 VCCS   Below Average          1          2
## 4 VCCS   Excellent              8         13
## 5 VNHS   Above Average         67         70
## 6 VNHS   Average               19         20
## 7 VNHS   Below Average          5          5
## 8 VNHS   Excellent              5          5

library(readxl)
Kyro <- read_excel("E:/STAT50/MTE/Alliah2.xlsx")

## New names:
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`

Kyro

## # A tibble: 64 × 12
##    PhaseIn…¹ Tempe…² Adlay…³ Adlay…⁴ Adlay…⁵ ...6  ...7  ...8  ...9  ...10 ...11
##    <chr>     <chr>     <dbl>   <dbl>   <dbl> <lgl> <lgl> <chr> <chr> <chr> <chr>
##  1 3- day    25°C        759     220     819 NA    NA    <NA>  <NA>  <NA>  <NA> 
##  2 3- day    25°C        736     812     763 NA    NA    Conc… dip   Day   Weig…
##  3 3- day    25°C        891     316     457 NA    NA    1     15    4     <NA> 
##  4 3- day    30°C        573      96     356 NA    NA    1     15    4     <NA> 
##  5 3- day    30°C        584     137     247 NA    NA    1     15    4     <NA> 
##  6 3- day    30°C        482      NA     298 NA    NA    1     15    4     <NA> 
##  7 3- day    35°C        384      79     346 NA    NA    1     30    4     <NA> 
##  8 3- day    35°C        264     934     823 NA    NA    1     30    4     <NA> 
##  9 3- day    35°C        208     556     536 NA    NA    1     30    4     <NA> 
## 10 5-day     25°C        888     267     879 NA    NA    1     30    4     <NA> 
## # … with 54 more rows, 1 more variable: ...12 <chr>, and abbreviated variable
## #   names ¹PhaseInterval, ²Temperature, ³`Adlay with wash`, ⁴`Adlay with milk`,
## #   ⁵`Adlay with milk and molasses`

library(rstatix)
Phase <- Kyro %>%
  gather(key ="Phase", value = "CFUcount", 'Adlay with wash', 'Adlay with milk', 'Adlay with milk and molasses') %>%
  convert_as_factor(Phase)
Phase

## # A tibble: 192 × 11
##    PhaseInterval Tempe…¹ ...6  ...7  ...8  ...9  ...10 ...11 ...12 Phase CFUco…²
##    <chr>         <chr>   <lgl> <lgl> <chr> <chr> <chr> <chr> <chr> <fct>   <dbl>
##  1 3- day        25°C    NA    NA    <NA>  <NA>  <NA>  <NA>  <NA>  Adla…     759
##  2 3- day        25°C    NA    NA    Conc… dip   Day   Weig… Color Adla…     736
##  3 3- day        25°C    NA    NA    1     15    4     <NA>  <NA>  Adla…     891
##  4 3- day        30°C    NA    NA    1     15    4     <NA>  <NA>  Adla…     573
##  5 3- day        30°C    NA    NA    1     15    4     <NA>  <NA>  Adla…     584
##  6 3- day        30°C    NA    NA    1     15    4     <NA>  <NA>  Adla…     482
##  7 3- day        35°C    NA    NA    1     30    4     <NA>  <NA>  Adla…     384
##  8 3- day        35°C    NA    NA    1     30    4     <NA>  <NA>  Adla…     264
##  9 3- day        35°C    NA    NA    1     30    4     <NA>  <NA>  Adla…     208
## 10 5-day         25°C    NA    NA    1     30    4     <NA>  <NA>  Adla…     888
## # … with 182 more rows, and abbreviated variable names ¹Temperature, ²CFUcount

*Question 5

Sky <- Phase %>%
  group_by(PhaseInterval) %>%
  get_summary_stats(CFUcount, type = "mean_sd")
Sky

## # A tibble: 3 × 5
##   PhaseInterval variable     n  mean    sd
##   <chr>         <fct>    <dbl> <dbl> <dbl>
## 1 3- day        CFUcount    26  488.  264.
## 2 5-day         CFUcount    27  284.  269.
## 3 7-day         CFUcount    27  182.  271.

*Question 6

Rain <- Phase %>%
  group_by(Temperature) %>%
  get_summary_stats(CFUcount, type = "mean_sd")
Rain

## # A tibble: 3 × 5
##   Temperature variable     n  mean    sd
##   <chr>       <fct>    <dbl> <dbl> <dbl>
## 1 25°C        CFUcount    27  480.  353.
## 2 30°C        CFUcount    26  235.  194.
## 3 35°C        CFUcount    27  229.  243.

*Question 7

Cloud <- Phase %>%
  group_by(Phase) %>%
  get_summary_stats(CFUcount, type = "mean_sd")
Cloud

## # A tibble: 3 × 5
##   Phase                        variable     n  mean    sd
##   <fct>                        <fct>    <dbl> <dbl> <dbl>
## 1 Adlay with milk              CFUcount    26  190   232.
## 2 Adlay with milk and molasses CFUcount    27  293.  261.
## 3 Adlay with wash              CFUcount    27  459.  323.

*Question 8

Thunder <- Phase %>%
  group_by(Temperature, PhaseInterval) %>%
  get_summary_stats(CFUcount, type = "mean_sd")
Thunder

## # A tibble: 9 × 6
##   PhaseInterval Temperature variable     n  mean    sd
##   <chr>         <chr>       <fct>    <dbl> <dbl> <dbl>
## 1 3- day        25°C        CFUcount     9 641.  244. 
## 2 5-day         25°C        CFUcount     9 460.  346. 
## 3 7-day         25°C        CFUcount     9 340.  416. 
## 4 3- day        30°C        CFUcount     8 347.  187. 
## 5 5-day         30°C        CFUcount     9 250.  218. 
## 6 7-day         30°C        CFUcount     9 120.  108. 
## 7 3- day        35°C        CFUcount     9 459.  282. 
## 8 5-day         35°C        CFUcount     9 142.   98.7
## 9 7-day         35°C        CFUcount     9  85.4 104.

*Question 9

Fogs <- Phase %>%
  group_by(Temperature,Phase) %>%
  get_summary_stats(CFUcount, type = "mean_sd")
Fogs

## # A tibble: 9 × 6
##   Temperature Phase                        variable     n  mean    sd
##   <chr>       <fct>                        <fct>    <dbl> <dbl> <dbl>
## 1 25°C        Adlay with milk              CFUcount     9  243. 230. 
## 2 25°C        Adlay with milk and molasses CFUcount     9  416. 330. 
## 3 25°C        Adlay with wash              CFUcount     9  783. 267. 
## 4 30°C        Adlay with milk              CFUcount     8   84   57.6
## 5 30°C        Adlay with milk and molasses CFUcount     9  193.  98.9
## 6 30°C        Adlay with wash              CFUcount     9  410. 213. 
## 7 35°C        Adlay with milk              CFUcount     9  232. 309. 
## 8 35°C        Adlay with milk and molasses CFUcount     9  269. 271. 
## 9 35°C        Adlay with wash              CFUcount     9  186. 135.

*Question 10

Data 5

hist(Sky$n, col="blue")

hist(Sky$mean, col="red")

hist(Sky$sd, col="green")

Data 7

with(Cloud, plot(mean, sd))

STAT50 MIDTERM EXAM

VANESSA LARAZO BANA

2023-03-23