\(\Large \sf To \ clean \ the \ environment \ variables\)

rm(list=ls(all=T))


\(\Large \sf Defining \ a \ Vector\)

vec <- c(20,34,56,23,45,22,60,23,56,78,23,45)


\(\Large \sf Mean \ of \ the \ vector\)

\(\frac{\Large\sum_{\Large i=1}^{\Large n}{\Large x_i}}{\Large n}\)

mean(vec)
## [1] 40.41667


\(\Large \sf Median\ of \ the \ vector\)

\(\Large \frac{(n+1)}{2} \normalsize \sf \ ranked \ value\)

median(vec)
## [1] 39.5


\(\Large \sf Harmonic\ Mean\ of\ a\ vector\)

\(\Large\frac {\Large n}{\Large \sum_{\Large i=1}^{\Large n}\Large \frac{\Large 1}{\Large x_i}}\)

hm_ <- function(vcetor) {
  return(length(vec)/sum(1/vec))
}
hm_(vec)
## [1] 32.88149


\(\Large \sf Geometric\ Mean\ of\ a\ vector\)

\(\large \bar X_g = (X_1*X_2*X_3 * \ .... \ X_n )^\frac{1}{n}\)

gm_ <- function(vcetor) {
  return(prod(vcetor)^(1/length(vcetor)))
}
gm_(vec)
## [1] 36.39962



\(\sf \huge Variation\ and\ shape\)

\(\sf \Large Range\ of\ a\ vector\)

\(\sf \normalsize Range = (\large x_{max}-\large x_{min})\)

range_ <- function(vcetor) {
  return(max(vcetor)-min(vcetor))
}
range_(vec)
## [1] 58


\(\sf \Large Variance\ of\ a\ vector\)

\(\Large \sigma^2 \small =\Large \frac{\sum_{i=1}^{n} (x_i-\bar X)^2}{n}\)

variance_ <- function(vcetor){
    var_ <-  0
    for (ele in 1:length(vcetor)){
      var_ = var_ + (vcetor[ele] - mean(vcetor))^2
    }
    return (var_/length(vcetor)-1)
}
variance_(vec)
## [1] 334.9097


\(\sf \Large Standard\ deviation\ of\ a\ vector\)

\(\Large \sigma \small =\sqrt{\Large \frac{\sum_{i=1}^{n} (x_i-\bar X)^2}{n}}\)

stddev_ <- function(vcetor){
  return (variance_(vcetor) ^ 0.5)
}
stddev_(vec)
## [1] 18.30054


\(\sf \Large Coefficient\ of\ Variation\)

It measures the scatter in the data with respect to the mean

\(\Large (\frac {\sigma}{\mu})*100\)

CoeffVar_ <- function(vcetor){
  return (stddev_(vcetor)/mean(vcetor)) *100
}
CoeffVar_(vec)
## [1] 0.4527968


\(\sf \Large Z-Score\)

\(\Large Z = \Large(\frac{x-\mu}{\sigma})\)

zscore <- function(vcetor){
  temp_ <- c()
  for (ele in 1:length(vcetor)){
    temp_ <- append(temp_, (vcetor[ele]-mean(vcetor))/stddev_(vcetor))
  }
  return (temp_)
}
zscore(vec)
##  [1] -1.1156320 -0.3506272  0.8515232 -0.9517024  0.2504480 -1.0063456
##  [7]  1.0700960 -0.9517024  0.8515232  2.0536736 -0.9517024  0.2504480


\(\Large Skewness\)


\(\Large Kurtosis\)

\(\Large Quartiles\)

\(\normalsize \sf Quartiles\ split\ the\ data\ into\ 4\ equal\ parts\)

\(\normalsize Q1 = \frac {n+1}{4} \ \ \ \ \ \ \ \ Q2 = 2(\frac {n+1}{4})\)
\(\normalsize Q3 = 3(\frac {n+1}{4}) \ \ \ \ Q4 = 4(\frac {n+1}{4})\)

\(\Large Percentile\)

\(\normalsize \sf \text{Percentile divides the data into 100 equal parts}\)

\(\Large \sf \text{The Interquartile Range (IQR)}\)

\(\sf \normalsize \text{The interquartile range is the difference between third quartile }\textbf{(Q3) }\text{and first quartile }\textbf{(Q1)}\)

\(\sf \ \ \ \ \ \ \ \ \ \ \ \textbf{IQR} = \textbf{Q3 - Q1}\)

\(\Large \sf \text{The Empirical rule}\)

\(\sf \normalsize \text{The Empirical rule states that :}\)

\(\ \ \ \ \ \sf \normalsize \text{In a Normal distribution,}\)

\(\sf \Large \text{Chebyshev's theorem}\)

\(\sf \normalsize \text{For heavily skewed datasets that do not appear to be normally distributed, you should use chebyshev's theorem:}\)

\(\sf \normalsize \text{Regardless of the shape, the percentage of values that are found within distances of k standard deviations from the mean must be at least}\)

\(\Large (1-\frac{1}{k^2}) * 100\)

\(\sf \LARGE \text{The Covariance and the Coefficient of Correlation}\)

\(\large \sf \text{The Covariance}\)

\(\sf \normalsize \text{It measures the strength of a linear relationship between two numerical variables}\)

\(Sample,\large \sf \ cov(X,Y) = \frac{\sum_{i=1}^{n}(X_i-\bar X)(Y_i-\bar Y)}{n-1}\)

\(Population,\large \sf \ cov(X,Y) = \frac{\sum_{i=1}^{n}(X_i-\bar X)(Y_i-\bar Y)}{n}\)

vec1 <- c(23,45,34,23,34,56,78,65,45,34)
vec2 <- c(65,54,34,45,23,45,67,88,96,33)
cov_ <- function(vcetor_x,vcetor_y,sample){
  numerator <- 0
  for (i in 1:length(vcetor_x)){
    numerator <- numerator + (vcetor_x[i]-mean(vcetor_x))*(vcetor_y[i]-mean(vcetor_y))
  }
  if (sample==T){
    return (numerator/(length(vcetor_x)-1)) 
  }else if (sample==F){
    return (numerator/length(vcetor_x))
  }else{
    return ('Check your parameters !!!')
  }
}
cov_(vec1,vec2,T) # sample Variance
## [1] 196.7778
cov_(vec1,vec2,F) # Population Variance
## [1] 177.1


\(\sf \large \text{The Coefficient of Correlation}\)

\(\sf \text{It measures the relative strength of a linear relationship between two numerical variables}\)

\(\large sample\ correlation,\ \ \bf r = \frac {Cov(X,Y)}{S_XS_Y}\)

vec1 <- c(23,45,34,23,34,56,78,65,45,34)
vec2 <- c(65,54,34,45,23,45,67,88,96,33)
corr_ <- function(vcetor_x,vcetor_y){
  return(cov_(vcetor_x,vcetor_y,F)/(stddev_(vcetor_x)*stddev_(vcetor_y)))
}
corr_(vec1,vec2)
## [1] 0.4569768