library(readxl)
Warning: package 'readxl' was built under R version 4.2.3
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(rstatix)
Warning: package 'rstatix' was built under R version 4.2.3
Attaching package: 'rstatix'
The following object is masked from 'package:stats':
filter
library(ggpubr)
Warning: package 'ggpubr' was built under R version 4.2.3
Loading required package: ggplot2
#For questions 1 to 4 refer to the data “Dataques1to4.”
library(readxl)
Jorielyn<- read_excel("E:/JORIELYN/STAT MIDTERM/Dataques1to4.xlsx")
Jorielyn
# A tibble: 158 × 38
Responde…¹ School Age Sex Salar…² Marit…³ Year …⁴ Posit…⁵ Highe…⁶ `S-C 1`
<dbl> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
1 1 VCCS 30 Male 25,001… Married 2 T-I Colleg… 4
2 2 VCCS 38 Fema… 27,001… Married 2 T-II Colleg… 3
3 3 VCCS 40 Fema… 27,001… Married 2 T-II Colleg… 5
4 4 VCCS 40 Male 25,001… Married 4 T-I Colleg… 5
5 5 VCCS 54 Fema… 25,001… Married 2 T-I Colleg… 4
6 6 VCCS 41 Fema… 25,001… Married 2 T-I Colleg… 5
7 7 VCCS 40 Fema… 25,001… Married 3 T-I Colleg… 5
8 8 VCCS 63 Fema… 31,001… Married 3 T-III MAT 5
9 9 VCCS 53 Fema… 29,001… Married 1 T-III Colleg… 4
10 10 VCCS 46 Male 25,001… Married 6 T-I Colleg… 4
# … with 148 more rows, 28 more variables: `S-C 2` <dbl>, `S-C 3` <dbl>,
# `S-C 4` <dbl>, `S-C 5` <dbl>, SCTotal <dbl>, AveSC <dbl>, `FK 1` <dbl>,
# `FK 2` <dbl>, `FK 3` <dbl>, `FK 4` <dbl>, `FK 5` <dbl>, FKTotal <dbl>,
# AveFK <dbl>, `PI 1` <dbl>, `PI 2` <dbl>, `PI 3` <dbl>, `PI 4` <dbl>,
# `PI 5` <dbl>, PITotal <dbl>, AvePI <dbl>, `RR 1` <dbl>, `RR 2` <dbl>,
# `RR 3` <dbl>, `RR 4` <dbl>, `RR 5` <dbl>, Rrtotal <dbl>, AveRR <dbl>,
# `Investment Alternatives` <chr>, and abbreviated variable names …
Question 1.
Q1<-Jorielyn%>%
filter(AveFK != "NA")%>%
mutate(AveFKFrequency = ifelse(AveFK<=1.49, "Very Poor",
ifelse(AveFK<=2.49, "Below Average",
ifelse(AveFK<=3.49, "Average",
ifelse(AveFK<=4.49, "Above Average", "Excellent")))))%>%
group_by(School,AveFKFrequency)%>%
summarise(Frequency=n())%>%
mutate(Percentage =round(Frequency/sum(Frequency)*100, 2))
`summarise()` has grouped output by 'School'. You can override using the
`.groups` argument.
Q1
# A tibble: 7 × 4
# Groups: School [2]
School AveFKFrequency Frequency Percentage
<chr> <chr> <int> <dbl>
1 VCCS Above Average 41 66.1
2 VCCS Average 7 11.3
3 VCCS Excellent 14 22.6
4 VNHS Above Average 68 70.8
5 VNHS Average 7 7.29
6 VNHS Below Average 2 2.08
7 VNHS Excellent 19 19.8
Question 2.
Q2<-Jorielyn%>%
filter(AveSC != "NA")%>%
mutate(AveSCFrequency = ifelse(AveSC<=1.49, "Very Poor",
ifelse(AveSC<=2.49, "Below Average",
ifelse(AveSC<=3.49, "Average",
ifelse(AveSC<=4.49, "Above Average", "Excellent")))))%>%
group_by(School,AveSCFrequency)%>%
summarise(Frequency=n())%>%
mutate(Percentage =round(Frequency/sum(Frequency)*100, 2))
`summarise()` has grouped output by 'School'. You can override using the
`.groups` argument.
Q2
# A tibble: 8 × 4
# Groups: School [2]
School AveSCFrequency Frequency Percentage
<chr> <chr> <int> <dbl>
1 VCCS Above Average 38 61.3
2 VCCS Average 14 22.6
3 VCCS Below Average 1 1.61
4 VCCS Excellent 9 14.5
5 VNHS Above Average 58 60.4
6 VNHS Average 24 25
7 VNHS Below Average 1 1.04
8 VNHS Excellent 13 13.5
Question 3.
Q3<-Jorielyn%>%
filter(AveFK != "NA")%>%
mutate(AvePIFrequency = ifelse(AvePI<=1.49, "Very Poor",
ifelse(AvePI<=2.49, "Below Average",
ifelse(AvePI<=3.49, "Average",
ifelse(AvePI<=4.49, "Above Average", "Excellent")))))%>%
group_by(School,AvePIFrequency)%>%
summarise(Frequency=n())%>%
mutate(Percentage =round(Frequency/sum(Frequency)*100, 2))
`summarise()` has grouped output by 'School'. You can override using the
`.groups` argument.
Q3
# A tibble: 9 × 4
# Groups: School [2]
School AvePIFrequency Frequency Percentage
<chr> <chr> <int> <dbl>
1 VCCS Above Average 39 62.9
2 VCCS Average 15 24.2
3 VCCS Below Average 1 1.61
4 VCCS Excellent 7 11.3
5 VNHS Above Average 49 51.0
6 VNHS Average 34 35.4
7 VNHS Below Average 7 7.29
8 VNHS Excellent 5 5.21
9 VNHS Very Poor 1 1.04
Question 4.
Q4<-Jorielyn%>%
filter(AveFK != "NA")%>%
mutate(AveRRFrequency = ifelse(AveRR<=1.49, "Very Poor",
ifelse(AveRR<=2.49, "Below Average",
ifelse(AveRR<=3.49, "Average",
ifelse(AveRR<=4.49, "Above Average", "Excellent")))))%>%
group_by(School,AveRRFrequency)%>%
summarise(Frequency=n())%>%
mutate(Percentage =round(Frequency/sum(Frequency)*100, 2))
`summarise()` has grouped output by 'School'. You can override using the
`.groups` argument.
Q4
# A tibble: 7 × 4
# Groups: School [2]
School AveRRFrequency Frequency Percentage
<chr> <chr> <int> <dbl>
1 VCCS Above Average 43 69.4
2 VCCS Average 11 17.7
3 VCCS Excellent 8 12.9
4 VNHS Above Average 67 69.8
5 VNHS Average 21 21.9
6 VNHS Below Average 3 3.12
7 VNHS Excellent 5 5.21
library(readxl)
Jorielyn<- read_excel("E:/JORIELYN/STAT MIDTERM/Dataquest5to11.xlsx")
New names:
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
Jorielyn
# A tibble: 64 × 12
PhaseIn…¹ Tempe…² Adlay…³ Adlay…⁴ Adlay…⁵ ...6 ...7 ...8 ...9 ...10 ...11
<chr> <chr> <dbl> <dbl> <dbl> <lgl> <lgl> <chr> <chr> <chr> <chr>
1 3- day 25°C 759 220 819 NA NA <NA> <NA> <NA> <NA>
2 3- day 25°C 736 812 763 NA NA Conc… dip Day Weig…
3 3- day 25°C 891 316 457 NA NA 1 15 4 <NA>
4 3- day 30°C 573 96 356 NA NA 1 15 4 <NA>
5 3- day 30°C 584 137 247 NA NA 1 15 4 <NA>
6 3- day 30°C 482 NA 298 NA NA 1 15 4 <NA>
7 3- day 35°C 384 79 346 NA NA 1 30 4 <NA>
8 3- day 35°C 264 934 823 NA NA 1 30 4 <NA>
9 3- day 35°C 208 556 536 NA NA 1 30 4 <NA>
10 5-day 25°C 888 267 879 NA NA 1 30 4 <NA>
# … with 54 more rows, 1 more variable: ...12 <chr>, and abbreviated variable
# names ¹PhaseInterval, ²Temperature, ³`Adlay with wash`, ⁴`Adlay with milk`,
# ⁵`Adlay with milk and molasses`
Create a new variable named “Phase” with the variables “Adlay with wash”, “Adlay with milk”, and “Adlay with milk and molasses” as the responses. With this, make a variable “CFUcount” for the responses of the variable “Phase.”
library(rstatix)
Phase <- Jorielyn%>%
gather(key ="Phase", value = "CFUcount", 'Adlay with wash', 'Adlay with milk', 'Adlay with milk and molasses')%>%
convert_as_factor(Phase)
Phase
# A tibble: 192 × 11
PhaseInterval Tempe…¹ ...6 ...7 ...8 ...9 ...10 ...11 ...12 Phase CFUco…²
<chr> <chr> <lgl> <lgl> <chr> <chr> <chr> <chr> <chr> <fct> <dbl>
1 3- day 25°C NA NA <NA> <NA> <NA> <NA> <NA> Adla… 759
2 3- day 25°C NA NA Conc… dip Day Weig… Color Adla… 736
3 3- day 25°C NA NA 1 15 4 <NA> <NA> Adla… 891
4 3- day 30°C NA NA 1 15 4 <NA> <NA> Adla… 573
5 3- day 30°C NA NA 1 15 4 <NA> <NA> Adla… 584
6 3- day 30°C NA NA 1 15 4 <NA> <NA> Adla… 482
7 3- day 35°C NA NA 1 30 4 <NA> <NA> Adla… 384
8 3- day 35°C NA NA 1 30 4 <NA> <NA> Adla… 264
9 3- day 35°C NA NA 1 30 4 <NA> <NA> Adla… 208
10 5-day 25°C NA NA 1 30 4 <NA> <NA> Adla… 888
# … with 182 more rows, and abbreviated variable names ¹Temperature, ²CFUcount
Question 5.
#Summary statistics
Phase1<-Phase%>%
group_by(PhaseInterval) %>%
get_summary_stats(CFUcount, type = "mean_sd")
Phase1
# A tibble: 3 × 5
PhaseInterval variable n mean sd
<chr> <fct> <dbl> <dbl> <dbl>
1 3- day CFUcount 26 488. 264.
2 5-day CFUcount 27 284. 269.
3 7-day CFUcount 27 182. 271.
Question 6.
#Summary statistics
Phase2<-Phase%>%
group_by(Temperature) %>%
get_summary_stats(CFUcount, type = "mean_sd")
Phase2
# A tibble: 3 × 5
Temperature variable n mean sd
<chr> <fct> <dbl> <dbl> <dbl>
1 25°C CFUcount 27 480. 353.
2 30°C CFUcount 26 235. 194.
3 35°C CFUcount 27 229. 243.
Question 7.
#Summary statistics
Phase3<-Phase%>%
group_by(`Phase`) %>%
get_summary_stats(CFUcount, type = "mean_sd")
Phase3
# A tibble: 3 × 5
Phase variable n mean sd
<fct> <fct> <dbl> <dbl> <dbl>
1 Adlay with milk CFUcount 26 190 232.
2 Adlay with milk and molasses CFUcount 27 293. 261.
3 Adlay with wash CFUcount 27 459. 323.
Question 8.
#Summary statistics
Phase4<-Phase%>%
group_by(PhaseInterval,Temperature) %>%
get_summary_stats(CFUcount, type = "mean_sd")
Phase4
# A tibble: 9 × 6
PhaseInterval Temperature variable n mean sd
<chr> <chr> <fct> <dbl> <dbl> <dbl>
1 3- day 25°C CFUcount 9 641. 244.
2 3- day 30°C CFUcount 8 347. 187.
3 3- day 35°C CFUcount 9 459. 282.
4 5-day 25°C CFUcount 9 460. 346.
5 5-day 30°C CFUcount 9 250. 218.
6 5-day 35°C CFUcount 9 142. 98.7
7 7-day 25°C CFUcount 9 340. 416.
8 7-day 30°C CFUcount 9 120. 108.
9 7-day 35°C CFUcount 9 85.4 104.
Question 9.
#Summary statistics
Phase5<-Phase%>%
group_by(Temperature, Phase) %>%
get_summary_stats(CFUcount, type = "mean_sd")
Phase5
# A tibble: 9 × 6
Temperature Phase variable n mean sd
<chr> <fct> <fct> <dbl> <dbl> <dbl>
1 25°C Adlay with milk CFUcount 9 243. 230.
2 25°C Adlay with milk and molasses CFUcount 9 416. 330.
3 25°C Adlay with wash CFUcount 9 783. 267.
4 30°C Adlay with milk CFUcount 8 84 57.6
5 30°C Adlay with milk and molasses CFUcount 9 193. 98.9
6 30°C Adlay with wash CFUcount 9 410. 213.
7 35°C Adlay with milk CFUcount 9 232. 309.
8 35°C Adlay with milk and molasses CFUcount 9 269. 271.
9 35°C Adlay with wash CFUcount 9 186. 135.
Question 10.
#Data1:
NumberofResponses <- Q2$Frequency
var1<-data.frame(Q2, NumberofResponses)
var1
School AveSCFrequency Frequency Percentage NumberofResponses
1 VCCS Above Average 38 61.29 38
2 VCCS Average 14 22.58 14
3 VCCS Below Average 1 1.61 1
4 VCCS Excellent 9 14.52 9
5 VNHS Above Average 58 60.42 58
6 VNHS Average 24 25.00 24
7 VNHS Below Average 1 1.04 1
8 VNHS Excellent 13 13.54 13
library(ggplot2)
ggplot(var1, aes(x= AveSCFrequency, y= NumberofResponses)) +
geom_bar(stat="identity")
#Data 2:
A<- Jorielyn%>%
filter(Temperature== "25°C")
B<- Jorielyn%>%
filter(Temperature == "30°C")
C<- Jorielyn%>%
filter(Temperature == "35°C")
D<-nrow(A)
E<-nrow(B)
G<-nrow(C)
x<- c(D,E,G)
labels <- c("25°C","30°C","35°C")
pie(x, main = "Temperature",col = cm.colors(length(x)))
legend("topright", c("25°C","30°C","35°C"), cex = 0.8,
fill = cm.colors(length(x)))