library(readxl)
Data <- read_excel("D:/MARV BS MATH/Marv 3rd year 2nd sem/Statistical Software/Data.xlsx")
View(Data)
library(rmarkdown)
paged_table(Data)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Data<-Data%>%
  mutate(In1.1=recode(`In1`, 
                          "1" = "Strongly Disagree", "2" ="Disagree", "3" = "Moderately Disagree", "4" = "Neutral", "5"="Agree", "6" = "Strongly Agree", "7" = "Moderately Agree"))%>%
  mutate(In2.1=recode(`In2`, 
                          "1" = "Strongly Disagree", "2" ="Disagree", "3" = "Moderately Disagree", "4" = "Neutral", "5"="Agree", "6" = "Strongly Agree", "7" = "Moderately Agree"))
Data$In1=as.numeric(Data$In1)
Data$In2=as.numeric(Data$In2)
Data$In3=as.numeric(Data$In3)
Data$In4=as.numeric(Data$In4)
Data$In5=as.numeric(Data$In5)
Data$InAverage<-Data$In1+Data$In2+Data$In3+Data$In4+Data$In5
Data
## # A tibble: 163 x 36
##      Age Gender `Course Taken`     In1   In2   In3   In4   In5   In6   In7   In8
##    <dbl> <chr>  <chr>            <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1    22 Female BS Mathematics       4     3     2     1     4     7     6     7
##  2    23 Female BS Biology           6     6     4     4     4     5     4     7
##  3    20 Female BSED English         5     5     3     3     2     6     5     7
##  4    22 Female BSED Biology         4     5     4     3     3     6     6     7
##  5    23 Male   BSED English         7     6     5     5     4     6     4     7
##  6    22 Female BSED Biology         6     6     6     6     6     7     7     7
##  7    20 Male   BS Civil Engine~     4     5     6     2     5     7     4     1
##  8    21 Female BS Electrical E~     5     6     5     6     5     7     6     7
##  9    21 Female BS Mathematics       6     7     5     5     5     7     7     7
## 10    22 Male   BS Biology           6     7     5     6     7     7     7     7
## # ... with 153 more rows, and 25 more variables: Ex1 <dbl>, Ex2 <dbl>,
## #   Ex3 <dbl>, Ex4 <dbl>, Ex5 <dbl>, Ex6 <dbl>, Ex7 <dbl>, Ex8 <dbl>,
## #   Ex9 <dbl>, Ex10 <dbl>, Ex11 <dbl>, TP1 <dbl>, TP2 <dbl>, TP3 <dbl>,
## #   TP4 <dbl>, TP5 <dbl>, T6 <dbl>, CP1 <dbl>, CP2 <dbl>, CP3 <dbl>, CP4 <dbl>,
## #   CP5 <dbl>, In1.1 <chr>, In2.1 <chr>, InAverage <dbl>
Data<-Data%>%
 mutate(`Group 1` = ifelse(InAverage<=19,"Less than 20 years old", 
                   ifelse(InAverage>=21, "More than 20 years old", "Normal")))%>%
 mutate(`Group 2` = ifelse(Age<=20, "19 years old and below", "at most 20 years old"))
summary(Data$Age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   20.00   21.00   22.00   21.56   22.00   25.00
library(dplyr)
Data%>%
  group_by(`Course Taken`, `Age`) %>%
  summarise(count=n())%>%
  mutate(Percentage =count/sum(count))
## `summarise()` has grouped output by 'Course Taken'. You can override using the
## `.groups` argument.
## # A tibble: 23 x 4
## # Groups:   Course Taken [6]
##    `Course Taken`              Age count Percentage
##    <chr>                     <dbl> <int>      <dbl>
##  1 BS Biology                   20     3     0.0909
##  2 BS Biology                   21    10     0.303 
##  3 BS Biology                   22    18     0.545 
##  4 BS Biology                   23     2     0.0606
##  5 BS Civil Engineering         20     2     0.125 
##  6 BS Civil Engineering         21     5     0.312 
##  7 BS Civil Engineering         22     9     0.562 
##  8 BS Electrical Engineering    21     8     0.471 
##  9 BS Electrical Engineering    22     8     0.471 
## 10 BS Electrical Engineering    23     1     0.0588
## # ... with 13 more rows
Data%>%
  group_by(`Course Taken`)%>%
  summarise(Frequency=n(), 'Mean Age' = mean(Age))
## # A tibble: 6 x 3
##   `Course Taken`            Frequency `Mean Age`
##   <chr>                         <int>      <dbl>
## 1 BS Biology                       33       21.6
## 2 BS Civil Engineering             16       21.4
## 3 BS Electrical Engineering        17       21.6
## 4 BS Mathematics                   33       21.7
## 5 BSED Biology                     32       21.5
## 6 BSED English                     32       21.6
Data%>%
  group_by(`Age`, `InAverage`) %>%
  summarise(count=n())%>%
  mutate(Percentage =count/sum(count))
## `summarise()` has grouped output by 'Age'. You can override using the `.groups`
## argument.
## # A tibble: 61 x 4
## # Groups:   Age [5]
##      Age InAverage count Percentage
##    <dbl>     <dbl> <int>      <dbl>
##  1    20        12     1     0.0714
##  2    20        18     1     0.0714
##  3    20        19     1     0.0714
##  4    20        22     2     0.143 
##  5    20        24     1     0.0714
##  6    20        26     1     0.0714
##  7    20        27     3     0.214 
##  8    20        28     2     0.143 
##  9    20        33     1     0.0714
## 10    20        34     1     0.0714
## # ... with 51 more rows
library(dplyr)
Data%>%
  group_by(`Course Taken`)%>%
  summarise(Frequency=n(), 'Mean Intrinsic5' = mean(In5), 'Mean Extrinsic4' = mean(Ex4), 'Mean TP3' = mean(TP3), 'Mean CP2' = mean(CP2))
## # A tibble: 6 x 6
##   `Course Taken`          Frequency `Mean Intrinsi~` `Mean Extrinsi~` `Mean TP3`
##   <chr>                       <int>            <dbl>            <dbl>      <dbl>
## 1 BS Biology                     33             4.64             5.67       3.88
## 2 BS Civil Engineering           16             4.56             6.06       3.31
## 3 BS Electrical Engineer~        17             4.53             5.59       3.47
## 4 BS Mathematics                 33             4.52             5.64       3.55
## 5 BSED Biology                   32             4.47             5.56       3.25
## 6 BSED English                   32             4.41             5.56       3.91
## # ... with 1 more variable: `Mean CP2` <dbl>
Data%>%
  group_by(`Course Taken`)%>%
  summarise(Frequency=n(), 'Mean Intrinsic5' = mean(In5), 'Mean Extrinsic4' = mean(Ex4), 'Mean TP3' = mean(TP3), 'Mean CP2' = mean(CP2))
## # A tibble: 6 x 6
##   `Course Taken`          Frequency `Mean Intrinsi~` `Mean Extrinsi~` `Mean TP3`
##   <chr>                       <int>            <dbl>            <dbl>      <dbl>
## 1 BS Biology                     33             4.64             5.67       3.88
## 2 BS Civil Engineering           16             4.56             6.06       3.31
## 3 BS Electrical Engineer~        17             4.53             5.59       3.47
## 4 BS Mathematics                 33             4.52             5.64       3.55
## 5 BSED Biology                   32             4.47             5.56       3.25
## 6 BSED English                   32             4.41             5.56       3.91
## # ... with 1 more variable: `Mean CP2` <dbl>
Data
## # A tibble: 163 x 38
##      Age Gender `Course Taken`     In1   In2   In3   In4   In5   In6   In7   In8
##    <dbl> <chr>  <chr>            <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1    22 Female BS Mathematics       4     3     2     1     4     7     6     7
##  2    23 Female BS Biology           6     6     4     4     4     5     4     7
##  3    20 Female BSED English         5     5     3     3     2     6     5     7
##  4    22 Female BSED Biology         4     5     4     3     3     6     6     7
##  5    23 Male   BSED English         7     6     5     5     4     6     4     7
##  6    22 Female BSED Biology         6     6     6     6     6     7     7     7
##  7    20 Male   BS Civil Engine~     4     5     6     2     5     7     4     1
##  8    21 Female BS Electrical E~     5     6     5     6     5     7     6     7
##  9    21 Female BS Mathematics       6     7     5     5     5     7     7     7
## 10    22 Male   BS Biology           6     7     5     6     7     7     7     7
## # ... with 153 more rows, and 27 more variables: Ex1 <dbl>, Ex2 <dbl>,
## #   Ex3 <dbl>, Ex4 <dbl>, Ex5 <dbl>, Ex6 <dbl>, Ex7 <dbl>, Ex8 <dbl>,
## #   Ex9 <dbl>, Ex10 <dbl>, Ex11 <dbl>, TP1 <dbl>, TP2 <dbl>, TP3 <dbl>,
## #   TP4 <dbl>, TP5 <dbl>, T6 <dbl>, CP1 <dbl>, CP2 <dbl>, CP3 <dbl>, CP4 <dbl>,
## #   CP5 <dbl>, In1.1 <chr>, In2.1 <chr>, InAverage <dbl>, `Group 1` <chr>,
## #   `Group 2` <chr>
library(dplyr)
Data%>%
  group_by(`In1`, `In2`) %>%
  summarise(count=n())%>%
  mutate(Percentage =count/sum(count))
## `summarise()` has grouped output by 'In1'. You can override using the `.groups`
## argument.
## # A tibble: 27 x 4
## # Groups:   In1 [7]
##      In1   In2 count Percentage
##    <dbl> <dbl> <int>      <dbl>
##  1     1     1     2      0.4  
##  2     1     3     1      0.2  
##  3     1     4     1      0.2  
##  4     1     7     1      0.2  
##  5     2     2     1      0.25 
##  6     2     3     1      0.25 
##  7     2     5     2      0.5  
##  8     3     3     1      0.167
##  9     3     5     4      0.667
## 10     3     7     1      0.167
## # ... with 17 more rows
Data%>%
  group_by(`In1`, `In2`) %>%
  summarise(count=n())%>%
  mutate(Percentage =count/sum(count))
## `summarise()` has grouped output by 'In1'. You can override using the `.groups`
## argument.
## # A tibble: 27 x 4
## # Groups:   In1 [7]
##      In1   In2 count Percentage
##    <dbl> <dbl> <int>      <dbl>
##  1     1     1     2      0.4  
##  2     1     3     1      0.2  
##  3     1     4     1      0.2  
##  4     1     7     1      0.2  
##  5     2     2     1      0.25 
##  6     2     3     1      0.25 
##  7     2     5     2      0.5  
##  8     3     3     1      0.167
##  9     3     5     4      0.667
## 10     3     7     1      0.167
## # ... with 17 more rows
Data<-Data%>%
 mutate(`Summary Scale` = ifelse(`Age`== "Normal", "Normal", "Non-normal"))
#Summary statistics
Data%>%
  group_by(`Group 1`, `InAverage`) %>%
  summarise(count=n())%>%
  mutate(Percentage =count/sum(count))
## `summarise()` has grouped output by 'Group 1'. You can override using the
## `.groups` argument.
## # A tibble: 26 x 4
## # Groups:   Group 1 [3]
##    `Group 1`              InAverage count Percentage
##    <chr>                      <dbl> <int>      <dbl>
##  1 Less than 20 years old         5     1     0.0345
##  2 Less than 20 years old         8     1     0.0345
##  3 Less than 20 years old        11     3     0.103 
##  4 Less than 20 years old        12     3     0.103 
##  5 Less than 20 years old        14     2     0.0690
##  6 Less than 20 years old        15     7     0.241 
##  7 Less than 20 years old        16     3     0.103 
##  8 Less than 20 years old        17     1     0.0345
##  9 Less than 20 years old        18     2     0.0690
## 10 Less than 20 years old        19     6     0.207 
## # ... with 16 more rows
#### Summary statistics
Data%>%
  group_by(`Group 2`, `InAverage`) %>%
  summarise(count=n())%>%
  mutate(Percentage =count/sum(count))
## `summarise()` has grouped output by 'Group 2'. You can override using the
## `.groups` argument.
## # A tibble: 35 x 4
## # Groups:   Group 2 [2]
##    `Group 2`              InAverage count Percentage
##    <chr>                      <dbl> <int>      <dbl>
##  1 19 years old and below        12     1     0.0714
##  2 19 years old and below        18     1     0.0714
##  3 19 years old and below        19     1     0.0714
##  4 19 years old and below        22     2     0.143 
##  5 19 years old and below        24     1     0.0714
##  6 19 years old and below        26     1     0.0714
##  7 19 years old and below        27     3     0.214 
##  8 19 years old and below        28     2     0.143 
##  9 19 years old and below        33     1     0.0714
## 10 19 years old and below        34     1     0.0714
## # ... with 25 more rows
#### Summary statistics
Data%>%
  group_by(`Group 1`, `Group 2`) %>%
  summarise(count=n())%>%
  mutate(Percentage =count/sum(count))
## `summarise()` has grouped output by 'Group 1'. You can override using the
## `.groups` argument.
## # A tibble: 5 x 4
## # Groups:   Group 1 [3]
##   `Group 1`              `Group 2`              count Percentage
##   <chr>                  <chr>                  <int>      <dbl>
## 1 Less than 20 years old 19 years old and below     3     0.103 
## 2 Less than 20 years old at most 20 years old      26     0.897 
## 3 More than 20 years old 19 years old and below    11     0.0866
## 4 More than 20 years old at most 20 years old     116     0.913 
## 5 Normal                 at most 20 years old       7     1
Data%>%
  group_by(`Course Taken`)%>%
  summarise(Frequency=n(), Mean = mean(InAverage), `Standard Deviation` = sd(InAverage))
## # A tibble: 6 x 4
##   `Course Taken`            Frequency  Mean `Standard Deviation`
##   <chr>                         <int> <dbl>                <dbl>
## 1 BS Biology                       33  24.9                 5.18
## 2 BS Civil Engineering             16  24.2                 4.81
## 3 BS Electrical Engineering        17  24.2                 3.86
## 4 BS Mathematics                   33  23.7                 5.85
## 5 BSED Biology                     32  23.3                 5.73
## 6 BSED English                     32  23.7                 6.57

#Is there a significant difference between the two groups of age in terms of the variable “InAverage”?

two.way <- aov(InAverage ~ `Group 1` + `Group 2`, data = Data)

summary(two.way)
##              Df Sum Sq Mean Sq F value Pr(>F)    
## `Group 1`     2   3196  1597.8 146.403 <2e-16 ***
## `Group 2`     1     15    14.8   1.359  0.245    
## Residuals   159   1735    10.9                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Is there a significant difference among the courses taken in terms of the variable “InAverage”?

one.way <- aov(InAverage ~ `Course Taken`, data = Data)

summary(one.way)
##                 Df Sum Sq Mean Sq F value Pr(>F)
## `Course Taken`   5     49   9.783   0.314  0.904
## Residuals      157   4897  31.189