#This is a data Analysis on Data set Regarding salary for male and Female "Equal pay for Equal work"
sal=read.csv("~/Book1.csv")
View(sal)
dim(sal)
## [1] 50 12
summary(sal)
## ID Salary Compa.ratio Midpoint
## Min. : 1.00 Min. :21.80 Min. :0.8760 Min. :23.00
## 1st Qu.:13.25 1st Qu.:24.32 1st Qu.:0.9985 1st Qu.:23.00
## Median :25.50 Median :42.20 Median :1.0540 Median :40.00
## Mean :25.50 Mean :44.45 Mean :1.0503 Mean :41.76
## 3rd Qu.:37.75 3rd Qu.:61.73 3rd Qu.:1.1033 3rd Qu.:57.00
## Max. :50.00 Max. :79.10 Max. :1.1840 Max. :67.00
## Age Performance.Rating Service Gender
## Min. :22.00 Min. : 55.0 Min. : 1.00 Min. :0.0
## 1st Qu.:30.00 1st Qu.: 80.0 1st Qu.: 4.25 1st Qu.:0.0
## Median :35.00 Median : 90.0 Median : 8.00 Median :0.5
## Mean :35.72 Mean : 85.9 Mean : 8.96 Mean :0.5
## 3rd Qu.:42.00 3rd Qu.: 95.0 3rd Qu.:12.00 3rd Qu.:1.0
## Max. :52.00 Max. :100.0 Max. :22.00 Max. :1.0
## Raise Degree Gender1 Grade
## Min. :3.000 Min. :0.0 F:25 A:15
## 1st Qu.:4.300 1st Qu.:0.0 M:25 B: 7
## Median :4.900 Median :0.5 C: 5
## Mean :4.938 Mean :0.5 D: 5
## 3rd Qu.:5.600 3rd Qu.:1.0 E:12
## Max. :6.600 Max. :1.0 F: 6
sal1=sal[,-c(11,12)]
sum(is.na(sal1))
## [1] 0
#variance
var(sal1$Salary)
## [1] 365.6005
#standard deviation
sd(sal1$Salary)
## [1] 19.12068
#avg salary
mean(sal1$Salary)
## [1] 44.454
library(LaplacesDemon)
## Warning: package 'LaplacesDemon' was built under R version 3.6.2
Mode(sal$Salary)
## [1] 26.00804
max(sal1$Salary)
## [1] 79.1
#normal Distribution
windows()
qqnorm(sal1$Salary)

attach(sal1)
#Female Employee's Details
f=sal1[sal1$Gender==1,]
f
## ID Salary Compa.ratio Midpoint Age Performance.Rating Service Gender Raise
## 3 3 36.2 1.168 31 30 75 5 1 3.6
## 7 7 42.1 1.052 40 32 100 8 1 5.7
## 8 8 22.8 0.992 23 32 90 9 1 5.8
## 10 10 23.0 0.998 23 30 80 7 1 4.7
## 11 11 21.8 0.949 23 41 100 19 1 4.8
## 13 13 40.5 1.012 40 30 100 2 1 4.7
## 14 14 23.6 1.028 23 32 90 12 1 6.0
## 15 15 23.4 1.016 23 32 80 8 1 4.9
## 17 17 67.5 1.184 57 27 55 3 1 3.0
## 18 18 34.1 1.101 31 31 80 11 1 5.6
## 20 20 35.2 1.135 31 44 70 16 1 4.8
## 22 22 51.1 1.065 48 48 65 6 1 3.8
## 23 23 25.3 1.101 23 36 65 6 1 3.3
## 24 24 51.0 1.063 48 30 75 9 1 3.8
## 26 26 24.4 1.060 23 22 95 2 1 6.2
## 28 28 77.2 1.152 67 44 95 9 1 4.4
## 31 31 22.7 0.985 23 29 60 4 1 3.9
## 35 35 23.9 1.038 23 23 90 4 1 5.3
## 36 36 22.8 0.993 23 27 75 3 1 4.3
## 37 37 22.6 0.984 23 22 95 2 1 6.2
## 39 39 33.5 1.081 31 27 90 6 1 5.5
## 42 42 24.1 1.049 23 32 100 8 1 5.7
## 43 43 74.5 1.112 67 42 95 20 1 5.5
## 45 45 48.9 1.018 48 36 95 8 1 5.2
## 48 48 65.6 1.150 57 34 90 11 1 5.3
## Degree
## 3 1
## 7 1
## 8 1
## 10 1
## 11 1
## 13 0
## 14 1
## 15 1
## 17 1
## 18 0
## 20 0
## 22 1
## 23 0
## 24 0
## 26 0
## 28 0
## 31 1
## 35 0
## 36 0
## 37 0
## 39 0
## 42 1
## 43 0
## 45 1
## 48 1
#dimensions of a data set
dim(f)
## [1] 25 10
#summary(f)=>mean, median, mode,1Q, 3Q
summary(f)
## ID Salary Compa.ratio Midpoint Age
## Min. : 3.0 Min. :21.80 Min. :0.949 Min. :23.00 Min. :22.00
## 1st Qu.:14.0 1st Qu.:23.40 1st Qu.:1.012 1st Qu.:23.00 1st Qu.:29.00
## Median :23.0 Median :33.50 Median :1.052 Median :31.00 Median :32.00
## Mean :24.6 Mean :37.51 Mean :1.059 Mean :34.88 Mean :32.52
## 3rd Qu.:36.0 3rd Qu.:48.90 3rd Qu.:1.101 3rd Qu.:48.00 3rd Qu.:36.00
## Max. :48.0 Max. :77.20 Max. :1.184 Max. :67.00 Max. :48.00
## Performance.Rating Service Gender Raise Degree
## Min. : 55.0 Min. : 2.00 Min. :1 Min. :3.00 Min. :0.00
## 1st Qu.: 75.0 1st Qu.: 4.00 1st Qu.:1 1st Qu.:4.30 1st Qu.:0.00
## Median : 90.0 Median : 8.00 Median :1 Median :4.90 Median :1.00
## Mean : 84.2 Mean : 7.92 Mean :1 Mean :4.88 Mean :0.52
## 3rd Qu.: 95.0 3rd Qu.: 9.00 3rd Qu.:1 3rd Qu.:5.60 3rd Qu.:1.00
## Max. :100.0 Max. :20.00 Max. :1 Max. :6.20 Max. :1.00
max(f$Salary)
## [1] 77.2
mean(f$Salary)
## [1] 37.512
range(f$Salary)
## [1] 21.8 77.2
var(f$Salary)
## [1] 316.8136
sd(f$Salary)
## [1] 17.79926
#male Employee's Details
m=sal1[sal1$Gender==0,]
m
## ID Salary Compa.ratio Midpoint Age Performance.Rating Service Gender Raise
## 1 1 57.7 1.012 57 34 85 8 0 5.7
## 2 2 27.9 0.899 31 52 80 7 0 3.9
## 4 4 63.7 1.117 57 42 100 16 0 5.5
## 5 5 45.5 0.947 48 36 90 16 0 5.7
## 6 6 74.4 1.110 67 36 70 12 0 4.5
## 9 9 77.7 1.159 67 49 100 10 0 4.0
## 12 12 55.5 0.973 57 52 95 22 0 4.5
## 16 16 42.3 1.058 40 44 90 4 0 5.7
## 19 19 24.3 1.056 23 32 85 1 0 4.6
## 21 21 79.1 1.181 67 43 95 13 0 6.3
## 25 25 23.0 1.000 23 41 70 4 0 4.0
## 27 27 42.9 1.073 40 35 80 7 0 3.9
## 29 29 77.5 1.157 67 52 95 5 0 5.4
## 30 30 46.2 0.963 48 45 90 18 0 4.3
## 32 32 27.2 0.876 31 25 95 4 0 5.6
## 33 33 62.9 1.104 57 35 90 9 0 5.5
## 34 34 28.1 0.905 31 26 80 2 0 4.9
## 38 38 60.0 1.052 57 45 95 11 0 4.5
## 40 40 23.0 1.002 23 24 90 2 0 6.3
## 41 41 42.1 1.052 40 25 80 5 0 4.3
## 44 44 63.8 1.120 57 45 90 16 0 5.2
## 46 46 61.8 1.085 57 39 75 20 0 3.9
## 47 47 61.5 1.079 57 37 95 5 0 5.5
## 49 49 54.9 0.963 57 41 95 21 0 6.6
## 50 50 61.9 1.086 57 38 80 12 0 4.6
## Degree
## 1 0
## 2 0
## 4 1
## 5 1
## 6 1
## 9 1
## 12 0
## 16 0
## 19 1
## 21 1
## 25 0
## 27 1
## 29 0
## 30 0
## 32 0
## 33 1
## 34 1
## 38 0
## 40 0
## 41 0
## 44 1
## 46 1
## 47 1
## 49 0
## 50 0
dim(m)
## [1] 25 10
#summary(m)=>mean, median, mode,1Q, 3Q
summary(m)
## ID Salary Compa.ratio Midpoint Age
## Min. : 1.0 Min. :23.0 Min. :0.876 Min. :23.00 Min. :24.00
## 1st Qu.:12.0 1st Qu.:42.1 1st Qu.:0.973 1st Qu.:40.00 1st Qu.:35.00
## Median :29.0 Median :55.5 Median :1.056 Median :57.00 Median :39.00
## Mean :26.4 Mean :51.4 Mean :1.041 Mean :48.64 Mean :38.92
## 3rd Qu.:40.0 3rd Qu.:62.9 3rd Qu.:1.104 3rd Qu.:57.00 3rd Qu.:45.00
## Max. :50.0 Max. :79.1 Max. :1.181 Max. :67.00 Max. :52.00
## Performance.Rating Service Gender Raise Degree
## Min. : 70.0 Min. : 1 Min. :0 Min. :3.900 Min. :0.00
## 1st Qu.: 80.0 1st Qu.: 5 1st Qu.:0 1st Qu.:4.300 1st Qu.:0.00
## Median : 90.0 Median : 9 Median :0 Median :4.900 Median :0.00
## Mean : 87.6 Mean :10 Mean :0 Mean :4.996 Mean :0.48
## 3rd Qu.: 95.0 3rd Qu.:16 3rd Qu.:0 3rd Qu.:5.600 3rd Qu.:1.00
## Max. :100.0 Max. :22 Max. :0 Max. :6.600 Max. :1.00
mean(m$Salary)
## [1] 51.396
range(m$Salary)
## [1] 23.0 79.1
var(m$Salary)
## [1] 329.2221
sd(m$Salary)
## [1] 18.14448
max(m$Salary)
## [1] 79.1
#(avg(m)-avg(f))/avg(m)
sal_gap= (mean(m$Salary)-mean(f$Salary))/mean(m$Salary)
sal_gap
## [1] 0.2701378