Midterm Exam

library(readxl)

## Warning: package 'readxl' was built under R version 4.2.3

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(rstatix)

## Warning: package 'rstatix' was built under R version 4.2.3

## 
## Attaching package: 'rstatix'

## The following object is masked from 'package:stats':
## 
##     filter

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.2.3

## Warning: package 'tidyr' was built under R version 4.2.3

## Warning: package 'readr' was built under R version 4.2.3

## Warning: package 'purrr' was built under R version 4.2.3

## Warning: package 'forcats' was built under R version 4.2.3

## Warning: package 'lubridate' was built under R version 4.2.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.4
## ✔ ggplot2   3.4.1     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.1.8
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ rstatix::filter() masks dplyr::filter(), stats::filter()
## ✖ dplyr::lag()      masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors

library(ggpubr)

## Warning: package 'ggpubr' was built under R version 4.2.3

library(readxl)
DATA<- read_excel("C:/StatMidterm 1to4/Dataques1to4.xlsx")
DATA

## # A tibble: 158 × 38
##    Responde…¹ School   Age Sex   Salar…² Marit…³ Year …⁴ Posit…⁵ Highe…⁶ `S-C 1`
##         <dbl> <chr>  <dbl> <chr> <chr>   <chr>   <chr>   <chr>   <chr>     <dbl>
##  1          1 VCCS      30 Male  25,001… Married 2       T-I     Colleg…       4
##  2          2 VCCS      38 Fema… 27,001… Married 2       T-II    Colleg…       3
##  3          3 VCCS      40 Fema… 27,001… Married 2       T-II    Colleg…       5
##  4          4 VCCS      40 Male  25,001… Married 4       T-I     Colleg…       5
##  5          5 VCCS      54 Fema… 25,001… Married 2       T-I     Colleg…       4
##  6          6 VCCS      41 Fema… 25,001… Married 2       T-I     Colleg…       5
##  7          7 VCCS      40 Fema… 25,001… Married 3       T-I     Colleg…       5
##  8          8 VCCS      63 Fema… 31,001… Married 3       T-III   MAT           5
##  9          9 VCCS      53 Fema… 29,001… Married 1       T-III   Colleg…       4
## 10         10 VCCS      46 Male  25,001… Married 6       T-I     Colleg…       4
## # … with 148 more rows, 28 more variables: `S-C 2` <dbl>, `S-C 3` <dbl>,
## #   `S-C 4` <dbl>, `S-C 5` <dbl>, SCTotal <dbl>, AveSC <dbl>, `FK 1` <dbl>,
## #   `FK 2` <dbl>, `FK 3` <dbl>, `FK 4` <dbl>, `FK 5` <dbl>, FKTotal <dbl>,
## #   AveFK <dbl>, `PI 1` <dbl>, `PI 2` <dbl>, `PI 3` <dbl>, `PI 4` <dbl>,
## #   `PI 5` <dbl>, PITotal <dbl>, AvePI <dbl>, `RR 1` <dbl>, `RR 2` <dbl>,
## #   `RR 3` <dbl>, `RR 4` <dbl>, `RR 5` <dbl>, Rrtotal <dbl>, AveRR <dbl>,
## #   `Investment Alternatives` <chr>, and abbreviated variable names …

Question # 1

Tatin1<- DATA%>%
  filter(AveFK != "NA")%>%
  mutate(AveFKFrequency= ifelse(AveFK <=1.49, "Very Poor",
                                ifelse(AveFK<=2.49, "Below Average",
                                       ifelse(AveFK<=3.49, "Average",
                                              ifelse(AveFK<=4.49, "Above Average","Excellent")))))%>%
  group_by(School,AveFKFrequency)%>%
  summarise(Frequency=n())%>%
  mutate(Percentage =round(Frequency/sum(Frequency)*100, 2))

## `summarise()` has grouped output by 'School'. You can override using the
## `.groups` argument.

Tatin1

## # A tibble: 7 × 4
## # Groups:   School [2]
##   School AveFKFrequency Frequency Percentage
##   <chr>  <chr>              <int>      <dbl>
## 1 VCCS   Above Average         41      66.1 
## 2 VCCS   Average                7      11.3 
## 3 VCCS   Excellent             14      22.6 
## 4 VNHS   Above Average         68      70.8 
## 5 VNHS   Average                7       7.29
## 6 VNHS   Below Average          2       2.08
## 7 VNHS   Excellent             19      19.8

Question # 2

Tatin2<- DATA%>%
  filter(AveSC != "NA")%>%
  mutate(AveSCFrequency= ifelse(AveSC <=1.49, "Very Poor",
                                ifelse(AveSC<=2.49, "Below Average",
                                       ifelse(AveSC<=3.49, "Average",
                                              ifelse(AveSC<=4.49, "Above Average","Excellent")))))%>%
  group_by(School,AveSCFrequency)%>%
  summarise(Frequency=n())%>%
  mutate(Percentage =round(Frequency/sum(Frequency)*100, 2))

## `summarise()` has grouped output by 'School'. You can override using the
## `.groups` argument.

Tatin2

## # A tibble: 8 × 4
## # Groups:   School [2]
##   School AveSCFrequency Frequency Percentage
##   <chr>  <chr>              <int>      <dbl>
## 1 VCCS   Above Average         38      61.3 
## 2 VCCS   Average               14      22.6 
## 3 VCCS   Below Average          1       1.61
## 4 VCCS   Excellent              9      14.5 
## 5 VNHS   Above Average         58      60.4 
## 6 VNHS   Average               24      25   
## 7 VNHS   Below Average          1       1.04
## 8 VNHS   Excellent             13      13.5

Question # 3

Tatin3<- DATA%>%
  filter(AvePI != "NA")%>%
  mutate(AvePIFrequency= ifelse(AvePI <=1.49, "Very Poor",
                                ifelse(AvePI<=2.49, "Below Average",
                                       ifelse(AvePI<=3.49, "Average",
                                              ifelse(AvePI<=4.49, "Above Average","Excellent")))))%>%
  group_by(School,AvePIFrequency)%>%
  summarise(Frequency=n())%>%
  mutate(Percentage =round(Frequency/sum(Frequency)*100, 2))

## `summarise()` has grouped output by 'School'. You can override using the
## `.groups` argument.

Tatin3

## # A tibble: 9 × 4
## # Groups:   School [2]
##   School AvePIFrequency Frequency Percentage
##   <chr>  <chr>              <int>      <dbl>
## 1 VCCS   Above Average         39      62.9 
## 2 VCCS   Average               15      24.2 
## 3 VCCS   Below Average          1       1.61
## 4 VCCS   Excellent              7      11.3 
## 5 VNHS   Above Average         49      51.0 
## 6 VNHS   Average               34      35.4 
## 7 VNHS   Below Average          7       7.29
## 8 VNHS   Excellent              5       5.21
## 9 VNHS   Very Poor              1       1.04

Question # 4

Tatin4<- DATA%>%
  filter(AveRR != "NA")%>%
  mutate(AveRRFrequency= ifelse(AveRR <=1.49, "Very Poor",
                                ifelse(AveRR<=2.49, "Below Average",
                                       ifelse(AveRR<=3.49, "Average",
                                              ifelse(AveRR<=4.49, "Above Average","Excellent")))))%>%
  group_by(School,AveRRFrequency)%>%
  summarise(Frequency=n())%>%
  mutate(Percentage =round(Frequency/sum(Frequency)*100, 2))

## `summarise()` has grouped output by 'School'. You can override using the
## `.groups` argument.

Tatin4

## # A tibble: 7 × 4
## # Groups:   School [2]
##   School AveRRFrequency Frequency Percentage
##   <chr>  <chr>              <int>      <dbl>
## 1 VCCS   Above Average         43      69.4 
## 2 VCCS   Average               11      17.7 
## 3 VCCS   Excellent              8      12.9 
## 4 VNHS   Above Average         67      69.8 
## 5 VNHS   Average               21      21.9 
## 6 VNHS   Below Average          3       3.12
## 7 VNHS   Excellent              5       5.21

library(ggplot2)
library(tibble)
library(tidyr)
library(readr)
library(purrr)
library(stringr)
library(forcats)

library(readxl)
Jaswan<- read_excel("C:/StatMidterm 1to4/Dataquest5to11.xlsx")

## New names:
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`

library(rstatix)
Jaswan <- Jaswan%>%
  gather(key ="Phase", value = "CFUcount", "Adlay with wash", "Adlay with milk and molasses", "Adlay with milk")%>%
  convert_as_factor(Phase)
Jaswan

## # A tibble: 192 × 11
##    PhaseInterval Tempe…¹ ...6  ...7  ...8  ...9  ...10 ...11 ...12 Phase CFUco…²
##    <chr>         <chr>   <lgl> <lgl> <chr> <chr> <chr> <chr> <chr> <fct>   <dbl>
##  1 3- day        25°C    NA    NA    <NA>  <NA>  <NA>  <NA>  <NA>  Adla…     759
##  2 3- day        25°C    NA    NA    Conc… dip   Day   Weig… Color Adla…     736
##  3 3- day        25°C    NA    NA    1     15    4     <NA>  <NA>  Adla…     891
##  4 3- day        30°C    NA    NA    1     15    4     <NA>  <NA>  Adla…     573
##  5 3- day        30°C    NA    NA    1     15    4     <NA>  <NA>  Adla…     584
##  6 3- day        30°C    NA    NA    1     15    4     <NA>  <NA>  Adla…     482
##  7 3- day        35°C    NA    NA    1     30    4     <NA>  <NA>  Adla…     384
##  8 3- day        35°C    NA    NA    1     30    4     <NA>  <NA>  Adla…     264
##  9 3- day        35°C    NA    NA    1     30    4     <NA>  <NA>  Adla…     208
## 10 5-day         25°C    NA    NA    1     30    4     <NA>  <NA>  Adla…     888
## # … with 182 more rows, and abbreviated variable names ¹Temperature, ²CFUcount

Question # 5

#Summary statistics
Jaswan1<-Jaswan%>%
  group_by(PhaseInterval) %>%
   get_summary_stats(CFUcount, type = "mean_sd")
Jaswan1

## # A tibble: 3 × 5
##   PhaseInterval variable     n  mean    sd
##   <chr>         <fct>    <dbl> <dbl> <dbl>
## 1 3- day        CFUcount    26  488.  264.
## 2 5-day         CFUcount    27  284.  269.
## 3 7-day         CFUcount    27  182.  271.

Question # 6

#Summary statistics
Jaswan2<-Jaswan%>%
  group_by(Temperature) %>%
   get_summary_stats(CFUcount, type = "mean_sd")
Jaswan2

## # A tibble: 3 × 5
##   Temperature variable     n  mean    sd
##   <chr>       <fct>    <dbl> <dbl> <dbl>
## 1 25°C        CFUcount    27  480.  353.
## 2 30°C        CFUcount    26  235.  194.
## 3 35°C        CFUcount    27  229.  243.

Question # 7

#Summary statistics
Jaswan3<-Jaswan%>%
  group_by(Phase) %>%
   get_summary_stats(CFUcount, type = "mean_sd")
Jaswan3

## # A tibble: 3 × 5
##   Phase                        variable     n  mean    sd
##   <fct>                        <fct>    <dbl> <dbl> <dbl>
## 1 Adlay with milk              CFUcount    26  190   232.
## 2 Adlay with milk and molasses CFUcount    27  293.  261.
## 3 Adlay with wash              CFUcount    27  459.  323.

Question # 8

#Summary statistics
Jaswan4<-Jaswan%>%
  group_by(PhaseInterval, Temperature) %>%
   get_summary_stats(CFUcount, type = "mean_sd")
Jaswan4

## # A tibble: 9 × 6
##   PhaseInterval Temperature variable     n  mean    sd
##   <chr>         <chr>       <fct>    <dbl> <dbl> <dbl>
## 1 3- day        25°C        CFUcount     9 641.  244. 
## 2 3- day        30°C        CFUcount     8 347.  187. 
## 3 3- day        35°C        CFUcount     9 459.  282. 
## 4 5-day         25°C        CFUcount     9 460.  346. 
## 5 5-day         30°C        CFUcount     9 250.  218. 
## 6 5-day         35°C        CFUcount     9 142.   98.7
## 7 7-day         25°C        CFUcount     9 340.  416. 
## 8 7-day         30°C        CFUcount     9 120.  108. 
## 9 7-day         35°C        CFUcount     9  85.4 104.

Question # 9

#Summary statistics
Jaswan5<-Jaswan%>%
  group_by(Temperature, Phase) %>%
   get_summary_stats(CFUcount, type = "mean_sd")
Jaswan5

## # A tibble: 9 × 6
##   Temperature Phase                        variable     n  mean    sd
##   <chr>       <fct>                        <fct>    <dbl> <dbl> <dbl>
## 1 25°C        Adlay with milk              CFUcount     9  243. 230. 
## 2 25°C        Adlay with milk and molasses CFUcount     9  416. 330. 
## 3 25°C        Adlay with wash              CFUcount     9  783. 267. 
## 4 30°C        Adlay with milk              CFUcount     8   84   57.6
## 5 30°C        Adlay with milk and molasses CFUcount     9  193.  98.9
## 6 30°C        Adlay with wash              CFUcount     9  410. 213. 
## 7 35°C        Adlay with milk              CFUcount     9  232. 309. 
## 8 35°C        Adlay with milk and molasses CFUcount     9  269. 271. 
## 9 35°C        Adlay with wash              CFUcount     9  186. 135.

Question # 10

library(readxl)
Justin<- read_excel("C:/StatMidterm 1to4/Dataques1to4.xlsx")

ggplot(Justin, aes(x=School)) + geom_bar() + ggtitle('Respondents in Different School') + xlab('School')

ggplot(Justin, aes(x=Position)) + geom_bar() + ggtitle('Different Position of the Respondents') + xlab('Position') + theme_bw() + theme(axis.text.x = element_text(angle = 30, hjust = 1))

Midterm Exam

Justin Lian Caballero

2023-04-23