basic stats
# MBA programe 2018
x = read.csv("C:/Users/EOJVD/Desktop/data science classes/data science classes/Data Science Classes Yogesh/mba.csv")
dim(x)
## [1] 773 3
# dimension of data set ex.(no of rows & columns)
colnames(x)
## [1] "data.srno" "workex" "gmat"
#column names will display
head(x,12)
## data.srno workex gmat
## 1 1 21 720
## 2 2 107 640
## 3 3 57 740
## 4 4 99 690
## 5 5 208 710
## 6 6 136 660
## 7 7 70 660
## 8 8 103 710
## 9 9 79 700
## 10 10 22 730
## 11 11 69 700
## 12 12 41 740
# to display first six records in the data set (12 is displays 12 records in x)
tail(x,3)
## data.srno workex gmat
## 771 771 28 610
## 772 772 10 610
## 773 773 52 620
# to display last six records in the data set
x[,2]
## [1] 21 107 57 99 208 136 70 103 79 22 69 41 72 69 20 21 19
## [18] 86 231 20 175 21 44 23 20 70 46 33 130 57 57 45 55 42
## [35] 34 55 44 79 45 38 44 83 118 45 89 77 91 61 47 69 59
## [52] 32 74 279 33 33 34 110 44 44 46 58 45 34 54 45 33 82
## [69] 72 77 43 54 92 90 66 38 25 57 31 58 55 68 79 71 68
## [86] 32 32 45 69 65 57 45 92 57 69 32 70 59 44 44 46 33
## [103] 46 82 53 33 34 37 55 52 68 51 69 47 56 45 56 48 34
## [120] 55 43 69 31 46 58 47 56 43 58 27 46 58 34 34 45 68
## [137] 34 46 45 56 33 33 44 33 69 82 64 126 80 80 45 45 68
## [154] 58 45 130 80 130 34 80 48 45 40 58 35 40 79 34 56 68
## [171] 32 57 53 57 45 46 77 44 68 45 54 58 59 38 94 46 96
## [188] 44 50 91 117 58 32 57 57 57 32 33 41 70 45 68 90 82
## [205] 33 46 45 45 79 109 53 44 33 70 44 51 85 57 66 44 50
## [222] 30 33 89 55 128 45 33 35 33 58 49 57 45 63 81 56 45
## [239] 86 45 69 43 45 70 33 43 34 47 57 70 93 92 32 80 33
## [256] 32 55 46 66 65 42 71 41 30 45 91 46 44 74 45 112 46
## [273] 69 32 69 44 69 45 81 82 44 34 46 34 43 69 34 32 45
## [290] 50 57 50 58 92 131 45 64 48 32 32 100 56 56 45 68 28
## [307] 57 44 58 46 142 47 64 125 55 32 51 94 53 57 53 56 88
## [324] 62 69 91 93 106 68 56 57 56 70 70 40 46 45 52 80 69
## [341] 56 82 58 68 39 92 34 42 69 33 268 70 54 44 42 44 57
## [358] 81 103 69 34 33 69 57 66 34 69 45 45 42 47 38 82 89
## [375] 45 34 53 69 48 90 22 43 41 82 56 80 37 33 81 131 43
## [392] 69 72 34 58 69 81 116 77 45 57 56 57 109 44 37 45 46
## [409] 68 78 63 45 67 33 69 45 176 82 55 32 45 57 69 32 93
## [426] 131 49 39 37 111 66 45 44 35 67 33 57 52 49 29 31 71
## [443] 34 46 46 122 34 29 70 43 116 69 43 44 57 70 55 44 35
## [460] 58 40 33 79 46 32 81 29 87 69 24 68 49 58 69 38 45
## [477] 43 57 69 34 33 38 73 34 29 57 40 93 29 98 33 45 36
## [494] 46 40 54 45 54 57 52 141 81 44 45 57 54 59 62 33 58
## [511] 46 28 68 143 39 44 91 84 69 57 33 33 92 44 45 25 33
## [528] 45 86 46 105 33 41 45 44 34 37 124 32 58 43 44 93 34
## [545] 75 92 42 43 70 60 36 36 68 35 44 70 34 54 70 34 36
## [562] 69 45 132 81 78 34 43 58 82 34 34 62 50 45 70 59 33
## [579] 99 57 30 46 48 35 46 57 43 58 58 34 44 45 46 56 33
## [596] 92 31 33 69 53 37 119 30 57 33 55 32 43 78 70 53 69
## [613] 56 45 9 38 82 68 44 74 33 72 57 44 29 44 79 30 34
## [630] 46 45 81 45 83 44 28 33 58 61 72 58 45 66 44 45 81
## [647] 33 45 31 82 30 34 81 47 93 22 40 58 57 24 39 40 46
## [664] 57 44 50 34 92 43 106 69 43 69 57 43 101 33 33 130 58
## [681] 33 56 30 44 58 57 44 81 94 58 56 57 45 79 33 57 58
## [698] 43 42 68 34 22 27 41 42 79 57 85 33 59 57 40 81 58
## [715] 82 52 56 34 71 41 53 32 45 69 44 45 118 45 46 69 81
## [732] 44 34 52 55 69 83 39 69 31 33 56 32 94 105 95 43 89
## [749] 68 46 69 93 60 69 68 46 23 50 33 39 34 69 34 31 29
## [766] 46 44 38 88 132 28 10 52
# displays column values (any thing written on the sqare paranthesis will display the rows and coloumns)
attach(x)
# to attach the variable for the coloumn
#detach(x)
# to remove the variable for the coloumn
mean(gmat)
## [1] 711.1643
#to get the mean of the gmat column in the data set first we need to attach the vairable ex: attach(x)
#if we didnt use this then the code is (mean(x$gmat))
median(gmat)
## [1] 710
# median of the data set
sd(gmat)
## [1] 29.33971
# standard deviation of the data set
var(gmat)
## [1] 860.8188
# variance of gmat coloumn
range = max(gmat) - min(gmat)
range
## [1] 180
#install.packages("haven")
library(e1071)
#to know the skewness this library will helpfull
skewness(gmat)
## [1] -0.5931675
kurtosis(gmat)
## [1] 1.141141
#windows()
#display blank window
hist(gmat)

barplot(gmat)

boxplot(gmat)
