#task 2

Importing data:

library(readxl)

business <- read_xlsx("Business School.xlsx")

business <- as.data.frame(business)

Showng basic parameters:

summary(business)
##    Student ID     Undergrad Degree   Undergrad Grade    MBA Grade    
##  Min.   :  1.00   Length:100         Min.   : 61.20   Min.   :58.14  
##  1st Qu.: 25.75   Class :character   1st Qu.: 71.47   1st Qu.:71.14  
##  Median : 50.50   Mode  :character   Median : 76.65   Median :76.38  
##  Mean   : 50.50                      Mean   : 76.90   Mean   :76.04  
##  3rd Qu.: 75.25                      3rd Qu.: 81.70   3rd Qu.:82.15  
##  Max.   :100.00                      Max.   :100.00   Max.   :95.00  
##  Work Experience    Employability (Before) Employability (After)
##  Length:100         Min.   :101.0          Min.   :119.0        
##  Class :character   1st Qu.:245.8          1st Qu.:312.0        
##  Mode  :character   Median :256.8          Median :435.6        
##                     Mean   :257.9          Mean   :422.7        
##                     3rd Qu.:261.0          3rd Qu.:529.0        
##                     Max.   :421.0          Max.   :631.0        
##     Status          Annual Salary   
##  Length:100         Min.   : 20000  
##  Class :character   1st Qu.: 87125  
##  Mode  :character   Median :103500  
##                     Mean   :109058  
##                     3rd Qu.:124000  
##                     Max.   :340000

Taking out variables that are not numeric:

summary(business[ , -c(1,2,5,8)])
##  Undergrad Grade    MBA Grade     Employability (Before) Employability (After)
##  Min.   : 61.20   Min.   :58.14   Min.   :101.0          Min.   :119.0        
##  1st Qu.: 71.47   1st Qu.:71.14   1st Qu.:245.8          1st Qu.:312.0        
##  Median : 76.65   Median :76.38   Median :256.8          Median :435.6        
##  Mean   : 76.90   Mean   :76.04   Mean   :257.9          Mean   :422.7        
##  3rd Qu.: 81.70   3rd Qu.:82.15   3rd Qu.:261.0          3rd Qu.:529.0        
##  Max.   :100.00   Max.   :95.00   Max.   :421.0          Max.   :631.0        
##  Annual Salary   
##  Min.   : 20000  
##  1st Qu.: 87125  
##  Median :103500  
##  Mean   :109058  
##  3rd Qu.:124000  
##  Max.   :340000
library(pastecs)

stat.desc(business[ , -c(1,2,5,8)])
##              Undergrad Grade    MBA Grade Employability (Before)
## nbr.val          100.0000000  100.0000000             100.000000
## nbr.null           0.0000000    0.0000000               0.000000
## nbr.na             0.0000000    0.0000000               0.000000
## min               61.2000000   58.1400000             101.000000
## max              100.0000000   95.0000000             421.000000
## range             38.8000000   36.8600000             320.000000
## sum             7689.9000000 7604.0550000           25793.080510
## median            76.6500000   76.3800000             256.831735
## mean              76.8990000   76.0405500             257.930805
## SE.mean            0.7461856    0.7675114               5.934729
## CI.mean.0.95       1.4805941    1.5229091              11.775790
## var               55.6792919   58.9073727            3522.101083
## std.dev            7.4618558    7.6751139              59.347292
## coef.var           0.0970345    0.1009345               0.230090
##              Employability (After) Annual Salary
## nbr.val               1.000000e+02  1.000000e+02
## nbr.null              0.000000e+00  0.000000e+00
## nbr.na                0.000000e+00  0.000000e+00
## min                   1.190000e+02  2.000000e+04
## max                   6.310322e+02  3.400000e+05
## range                 5.120322e+02  3.200000e+05
## sum                   4.226906e+04  1.090580e+07
## median                4.356379e+02  1.035000e+05
## mean                  4.226906e+02  1.090580e+05
## SE.mean               1.292335e+01  4.150149e+03
## CI.mean.0.95          2.564273e+01  8.234796e+03
## var                   1.670130e+04  1.722373e+09
## std.dev               1.292335e+02  4.150149e+04
## coef.var              3.057402e-01  3.805451e-01
round(stat.desc(business[ , -c(1,2,5,8)]))
##              Undergrad Grade MBA Grade Employability (Before)
## nbr.val                  100       100                    100
## nbr.null                   0         0                      0
## nbr.na                     0         0                      0
## min                       61        58                    101
## max                      100        95                    421
## range                     39        37                    320
## sum                     7690      7604                  25793
## median                    77        76                    257
## mean                      77        76                    258
## SE.mean                    1         1                      6
## CI.mean.0.95               1         2                     12
## var                       56        59                   3522
## std.dev                    7         8                     59
## coef.var                   0         0                      0
##              Employability (After) Annual Salary
## nbr.val                        100           100
## nbr.null                         0             0
## nbr.na                           0             0
## min                            119         20000
## max                            631        340000
## range                          512        320000
## sum                          42269      10905800
## median                         436        103500
## mean                           423        109058
## SE.mean                         13          4150
## CI.mean.0.95                    26          8235
## var                          16701    1722373475
## std.dev                        129         41501
## coef.var                         0             0

Calculating descriptive statistics.

stat.desc(business$`Annual Salary`)
##      nbr.val     nbr.null       nbr.na          min          max        range 
## 1.000000e+02 0.000000e+00 0.000000e+00 2.000000e+04 3.400000e+05 3.200000e+05 
##          sum       median         mean      SE.mean CI.mean.0.95          var 
## 1.090580e+07 1.035000e+05 1.090580e+05 4.150149e+03 8.234796e+03 1.722373e+09 
##      std.dev     coef.var 
## 4.150149e+04 3.805451e-01
round(stat.desc(business$`Annual Salary`),1)
##      nbr.val     nbr.null       nbr.na          min          max        range 
##        100.0          0.0          0.0      20000.0     340000.0     320000.0 
##          sum       median         mean      SE.mean CI.mean.0.95          var 
##   10905800.0     103500.0     109058.0       4150.1       8234.8 1722373474.7 
##      std.dev     coef.var 
##      41501.5          0.4

Calculating frequency for each degree:

table(business$`Undergrad Degree`)
## 
##              Art         Business Computer Science      Engineering 
##                6               35               25                9 
##          Finance 
##               25

Creating new data frame in second window to be able to use frequency data in chart:

freq <- as.data.frame(table(business$`Undergrad Degree`))
colnames(freq) <- c("Diploma", "Count")

Making a histogram using ggplot to show frequency od undergrad degrees among students.

library(ggplot2)

ggplot(freq, aes(x = reorder(Diploma, -Count), y = Count)) +
  geom_col(fill = "steelblue") +
  geom_text(aes(label = Count), vjust = -0.3) +
  labs(title = "Frequency of Degrees",
       x = "Degree",
       y = "Amount of students") +
  theme_minimal()

From the chart we are able to see that data is asymmetrical to the right.

Continuing with t test for the average grade of MBA students.

Null hypothesis: Average grade in the population is equal to 74. Alternative hypothesis: Average grade in population is different to 74.

Using t.test function to get the p value.

t.test(business$`MBA Grade`, 
       mu = 74,
       alternative = "two.sided")
## 
##  One Sample t-test
## 
## data:  business$`MBA Grade`
## t = 2.6587, df = 99, p-value = 0.00915
## alternative hypothesis: true mean is not equal to 74
## 95 percent confidence interval:
##  74.51764 77.56346
## sample estimates:
## mean of x 
##  76.04055

If we take alpha to be 5%, we can see that p value is smaller than 0.05, meaning we cannot except null hypothesis. This means we must accept the alternative hypothesis. Meaning average grade has changed from last year.

Calculating effect size:

library(effectsize)

cohens_d(business$`MBA Grade`, mu = 74)
## Cohen's d |       95% CI
## ------------------------
## 0.27      | [0.07, 0.46]
## 
## - Deviation from a difference of 74.

interpreting the result:

library(effectsize)

effectsize::interpret_cohens_d(0,27, rules = "cohen1988")
## [1] "very small"
## (Rules: cohen1988)

From the interpretation function we are able to see that the effect size is very small, meaning that the change in arithmetic mean from last years students to this years was very small.

We know that the average grade has changed, but to check if it increased or decreased, we have to calculate the arithmetic mean of our population.

mean(business$`MBA Grade`)
## [1] 76.04055

Because the arithmetic mean of the sample is larger, we can conclude that the average grade increased.