library(readxl)

Warning: package 'readxl' was built under R version 4.2.3

library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

library(rstatix)

Warning: package 'rstatix' was built under R version 4.2.3


Attaching package: 'rstatix'

The following object is masked from 'package:stats':

    filter

library(ggpubr)

Warning: package 'ggpubr' was built under R version 4.2.3

Loading required package: ggplot2

#For questions 1 to 4 refer to the data “Dataques1to4.”

library(readxl)
Jorielyn<- read_excel("E:/JORIELYN/STAT MIDTERM/Dataques1to4.xlsx")
Jorielyn

# A tibble: 158 × 38
   Responde…¹ School   Age Sex   Salar…² Marit…³ Year …⁴ Posit…⁵ Highe…⁶ `S-C 1`
        <dbl> <chr>  <dbl> <chr> <chr>   <chr>   <chr>   <chr>   <chr>     <dbl>
 1          1 VCCS      30 Male  25,001… Married 2       T-I     Colleg…       4
 2          2 VCCS      38 Fema… 27,001… Married 2       T-II    Colleg…       3
 3          3 VCCS      40 Fema… 27,001… Married 2       T-II    Colleg…       5
 4          4 VCCS      40 Male  25,001… Married 4       T-I     Colleg…       5
 5          5 VCCS      54 Fema… 25,001… Married 2       T-I     Colleg…       4
 6          6 VCCS      41 Fema… 25,001… Married 2       T-I     Colleg…       5
 7          7 VCCS      40 Fema… 25,001… Married 3       T-I     Colleg…       5
 8          8 VCCS      63 Fema… 31,001… Married 3       T-III   MAT           5
 9          9 VCCS      53 Fema… 29,001… Married 1       T-III   Colleg…       4
10         10 VCCS      46 Male  25,001… Married 6       T-I     Colleg…       4
# … with 148 more rows, 28 more variables: `S-C 2` <dbl>, `S-C 3` <dbl>,
#   `S-C 4` <dbl>, `S-C 5` <dbl>, SCTotal <dbl>, AveSC <dbl>, `FK 1` <dbl>,
#   `FK 2` <dbl>, `FK 3` <dbl>, `FK 4` <dbl>, `FK 5` <dbl>, FKTotal <dbl>,
#   AveFK <dbl>, `PI 1` <dbl>, `PI 2` <dbl>, `PI 3` <dbl>, `PI 4` <dbl>,
#   `PI 5` <dbl>, PITotal <dbl>, AvePI <dbl>, `RR 1` <dbl>, `RR 2` <dbl>,
#   `RR 3` <dbl>, `RR 4` <dbl>, `RR 5` <dbl>, Rrtotal <dbl>, AveRR <dbl>,
#   `Investment Alternatives` <chr>, and abbreviated variable names …

Question 1.

Q1<-Jorielyn%>%
  filter(AveFK != "NA")%>%
  mutate(AveFKFrequency = ifelse(AveFK<=1.49, "Very Poor",
                                 ifelse(AveFK<=2.49, "Below Average",
                                        ifelse(AveFK<=3.49, "Average",
                                               ifelse(AveFK<=4.49, "Above Average",     "Excellent")))))%>%
  group_by(School,AveFKFrequency)%>%
  summarise(Frequency=n())%>%
  mutate(Percentage =round(Frequency/sum(Frequency)*100, 2))

`summarise()` has grouped output by 'School'. You can override using the
`.groups` argument.

Q1

# A tibble: 7 × 4
# Groups:   School [2]
  School AveFKFrequency Frequency Percentage
  <chr>  <chr>              <int>      <dbl>
1 VCCS   Above Average         41      66.1 
2 VCCS   Average                7      11.3 
3 VCCS   Excellent             14      22.6 
4 VNHS   Above Average         68      70.8 
5 VNHS   Average                7       7.29
6 VNHS   Below Average          2       2.08
7 VNHS   Excellent             19      19.8

Question 2.

Q2<-Jorielyn%>%
  filter(AveSC != "NA")%>%
  mutate(AveSCFrequency = ifelse(AveSC<=1.49, "Very Poor",
                                 ifelse(AveSC<=2.49, "Below Average",
                                        ifelse(AveSC<=3.49, "Average",
                                               ifelse(AveSC<=4.49, "Above Average",     "Excellent")))))%>%
  group_by(School,AveSCFrequency)%>%
  summarise(Frequency=n())%>%
  mutate(Percentage =round(Frequency/sum(Frequency)*100, 2))

`summarise()` has grouped output by 'School'. You can override using the
`.groups` argument.

Q2

# A tibble: 8 × 4
# Groups:   School [2]
  School AveSCFrequency Frequency Percentage
  <chr>  <chr>              <int>      <dbl>
1 VCCS   Above Average         38      61.3 
2 VCCS   Average               14      22.6 
3 VCCS   Below Average          1       1.61
4 VCCS   Excellent              9      14.5 
5 VNHS   Above Average         58      60.4 
6 VNHS   Average               24      25   
7 VNHS   Below Average          1       1.04
8 VNHS   Excellent             13      13.5

Question 3.

Q3<-Jorielyn%>%
  filter(AveFK != "NA")%>%
  mutate(AvePIFrequency = ifelse(AvePI<=1.49, "Very Poor",
                                 ifelse(AvePI<=2.49, "Below Average",
                                        ifelse(AvePI<=3.49, "Average",
                                               ifelse(AvePI<=4.49, "Above Average",     "Excellent")))))%>%
  group_by(School,AvePIFrequency)%>%
  summarise(Frequency=n())%>%
  mutate(Percentage =round(Frequency/sum(Frequency)*100, 2))

`summarise()` has grouped output by 'School'. You can override using the
`.groups` argument.

Q3

# A tibble: 9 × 4
# Groups:   School [2]
  School AvePIFrequency Frequency Percentage
  <chr>  <chr>              <int>      <dbl>
1 VCCS   Above Average         39      62.9 
2 VCCS   Average               15      24.2 
3 VCCS   Below Average          1       1.61
4 VCCS   Excellent              7      11.3 
5 VNHS   Above Average         49      51.0 
6 VNHS   Average               34      35.4 
7 VNHS   Below Average          7       7.29
8 VNHS   Excellent              5       5.21
9 VNHS   Very Poor              1       1.04

Question 4.

Q4<-Jorielyn%>%
  filter(AveFK != "NA")%>%
  mutate(AveRRFrequency = ifelse(AveRR<=1.49, "Very Poor",
                                 ifelse(AveRR<=2.49, "Below Average",
                                        ifelse(AveRR<=3.49, "Average",
                                               ifelse(AveRR<=4.49, "Above Average",     "Excellent")))))%>%
  group_by(School,AveRRFrequency)%>%
  summarise(Frequency=n())%>%
  mutate(Percentage =round(Frequency/sum(Frequency)*100, 2))

`summarise()` has grouped output by 'School'. You can override using the
`.groups` argument.

Q4

# A tibble: 7 × 4
# Groups:   School [2]
  School AveRRFrequency Frequency Percentage
  <chr>  <chr>              <int>      <dbl>
1 VCCS   Above Average         43      69.4 
2 VCCS   Average               11      17.7 
3 VCCS   Excellent              8      12.9 
4 VNHS   Above Average         67      69.8 
5 VNHS   Average               21      21.9 
6 VNHS   Below Average          3       3.12
7 VNHS   Excellent              5       5.21

library(readxl)
Jorielyn<- read_excel("E:/JORIELYN/STAT MIDTERM/Dataquest5to11.xlsx")

New names:
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`

Jorielyn

# A tibble: 64 × 12
   PhaseIn…¹ Tempe…² Adlay…³ Adlay…⁴ Adlay…⁵ ...6  ...7  ...8  ...9  ...10 ...11
   <chr>     <chr>     <dbl>   <dbl>   <dbl> <lgl> <lgl> <chr> <chr> <chr> <chr>
 1 3- day    25°C        759     220     819 NA    NA    <NA>  <NA>  <NA>  <NA> 
 2 3- day    25°C        736     812     763 NA    NA    Conc… dip   Day   Weig…
 3 3- day    25°C        891     316     457 NA    NA    1     15    4     <NA> 
 4 3- day    30°C        573      96     356 NA    NA    1     15    4     <NA> 
 5 3- day    30°C        584     137     247 NA    NA    1     15    4     <NA> 
 6 3- day    30°C        482      NA     298 NA    NA    1     15    4     <NA> 
 7 3- day    35°C        384      79     346 NA    NA    1     30    4     <NA> 
 8 3- day    35°C        264     934     823 NA    NA    1     30    4     <NA> 
 9 3- day    35°C        208     556     536 NA    NA    1     30    4     <NA> 
10 5-day     25°C        888     267     879 NA    NA    1     30    4     <NA> 
# … with 54 more rows, 1 more variable: ...12 <chr>, and abbreviated variable
#   names ¹PhaseInterval, ²Temperature, ³`Adlay with wash`, ⁴`Adlay with milk`,
#   ⁵`Adlay with milk and molasses`

Using the data “Dataquest5to11” for questions 5 to 10.

Create a new variable named “Phase” with the variables “Adlay with wash”, “Adlay with milk”, and “Adlay with milk and molasses” as the responses. With this, make a variable “CFUcount” for the responses of the variable “Phase.”

library(rstatix)
Phase <- Jorielyn%>%
  gather(key ="Phase", value = "CFUcount", 'Adlay with wash', 'Adlay with milk', 'Adlay with milk and molasses')%>%
  convert_as_factor(Phase)
Phase

# A tibble: 192 × 11
   PhaseInterval Tempe…¹ ...6  ...7  ...8  ...9  ...10 ...11 ...12 Phase CFUco…²
   <chr>         <chr>   <lgl> <lgl> <chr> <chr> <chr> <chr> <chr> <fct>   <dbl>
 1 3- day        25°C    NA    NA    <NA>  <NA>  <NA>  <NA>  <NA>  Adla…     759
 2 3- day        25°C    NA    NA    Conc… dip   Day   Weig… Color Adla…     736
 3 3- day        25°C    NA    NA    1     15    4     <NA>  <NA>  Adla…     891
 4 3- day        30°C    NA    NA    1     15    4     <NA>  <NA>  Adla…     573
 5 3- day        30°C    NA    NA    1     15    4     <NA>  <NA>  Adla…     584
 6 3- day        30°C    NA    NA    1     15    4     <NA>  <NA>  Adla…     482
 7 3- day        35°C    NA    NA    1     30    4     <NA>  <NA>  Adla…     384
 8 3- day        35°C    NA    NA    1     30    4     <NA>  <NA>  Adla…     264
 9 3- day        35°C    NA    NA    1     30    4     <NA>  <NA>  Adla…     208
10 5-day         25°C    NA    NA    1     30    4     <NA>  <NA>  Adla…     888
# … with 182 more rows, and abbreviated variable names ¹Temperature, ²CFUcount

Question 5.

#Summary statistics
Phase1<-Phase%>%
  group_by(PhaseInterval) %>%
   get_summary_stats(CFUcount, type = "mean_sd")
Phase1

# A tibble: 3 × 5
  PhaseInterval variable     n  mean    sd
  <chr>         <fct>    <dbl> <dbl> <dbl>
1 3- day        CFUcount    26  488.  264.
2 5-day         CFUcount    27  284.  269.
3 7-day         CFUcount    27  182.  271.

Question 6.

#Summary statistics
Phase2<-Phase%>%
  group_by(Temperature) %>%
   get_summary_stats(CFUcount, type = "mean_sd")
Phase2

# A tibble: 3 × 5
  Temperature variable     n  mean    sd
  <chr>       <fct>    <dbl> <dbl> <dbl>
1 25°C        CFUcount    27  480.  353.
2 30°C        CFUcount    26  235.  194.
3 35°C        CFUcount    27  229.  243.

Question 7.

#Summary statistics
Phase3<-Phase%>%
  group_by(`Phase`) %>%
   get_summary_stats(CFUcount, type = "mean_sd")
Phase3

# A tibble: 3 × 5
  Phase                        variable     n  mean    sd
  <fct>                        <fct>    <dbl> <dbl> <dbl>
1 Adlay with milk              CFUcount    26  190   232.
2 Adlay with milk and molasses CFUcount    27  293.  261.
3 Adlay with wash              CFUcount    27  459.  323.

Question 8.

#Summary statistics
Phase4<-Phase%>%
  group_by(PhaseInterval,Temperature) %>%
   get_summary_stats(CFUcount, type = "mean_sd")
Phase4

# A tibble: 9 × 6
  PhaseInterval Temperature variable     n  mean    sd
  <chr>         <chr>       <fct>    <dbl> <dbl> <dbl>
1 3- day        25°C        CFUcount     9 641.  244. 
2 3- day        30°C        CFUcount     8 347.  187. 
3 3- day        35°C        CFUcount     9 459.  282. 
4 5-day         25°C        CFUcount     9 460.  346. 
5 5-day         30°C        CFUcount     9 250.  218. 
6 5-day         35°C        CFUcount     9 142.   98.7
7 7-day         25°C        CFUcount     9 340.  416. 
8 7-day         30°C        CFUcount     9 120.  108. 
9 7-day         35°C        CFUcount     9  85.4 104.

Question 9.

#Summary statistics
Phase5<-Phase%>%
  group_by(Temperature, Phase) %>%
   get_summary_stats(CFUcount, type = "mean_sd")
Phase5

# A tibble: 9 × 6
  Temperature Phase                        variable     n  mean    sd
  <chr>       <fct>                        <fct>    <dbl> <dbl> <dbl>
1 25°C        Adlay with milk              CFUcount     9  243. 230. 
2 25°C        Adlay with milk and molasses CFUcount     9  416. 330. 
3 25°C        Adlay with wash              CFUcount     9  783. 267. 
4 30°C        Adlay with milk              CFUcount     8   84   57.6
5 30°C        Adlay with milk and molasses CFUcount     9  193.  98.9
6 30°C        Adlay with wash              CFUcount     9  410. 213. 
7 35°C        Adlay with milk              CFUcount     9  232. 309. 
8 35°C        Adlay with milk and molasses CFUcount     9  269. 271. 
9 35°C        Adlay with wash              CFUcount     9  186. 135.

Question 10.

#Data1:

NumberofResponses <- Q2$Frequency
var1<-data.frame(Q2, NumberofResponses)
var1

  School AveSCFrequency Frequency Percentage NumberofResponses
1   VCCS  Above Average        38      61.29                38
2   VCCS        Average        14      22.58                14
3   VCCS  Below Average         1       1.61                 1
4   VCCS      Excellent         9      14.52                 9
5   VNHS  Above Average        58      60.42                58
6   VNHS        Average        24      25.00                24
7   VNHS  Below Average         1       1.04                 1
8   VNHS      Excellent        13      13.54                13

library(ggplot2)
ggplot(var1, aes(x= AveSCFrequency, y= NumberofResponses)) +
         geom_bar(stat="identity")

#Data 2:

A<- Jorielyn%>%
  filter(Temperature== "25°C")
B<- Jorielyn%>%
  filter(Temperature == "30°C")
C<- Jorielyn%>%
  filter(Temperature == "35°C")

D<-nrow(A)
E<-nrow(B)
G<-nrow(C)

x<-  c(D,E,G)
labels <-  c("25°C","30°C","35°C")

pie(x, main = "Temperature",col = cm.colors(length(x)))
legend("topright", c("25°C","30°C","35°C"), cex = 0.8,
   fill = cm.colors(length(x)))

STAT 50 MIDTERM EXAM

JORIELYN S. MANLA

2023-03-22

Using the data “Dataquest5to11” for questions 5 to 10.