#Clearing the global environment
rm(list = ls())
#Created a matrix, inputting values by row
(matrix1 = matrix(data = list(1, 2, 3, 4), nrow =  2, byrow = TRUE, 
                  dimnames = list(
                    c('Row1', 'Row2'), 
                    c('Col1', 'Col2'))
                  ))
##      Col1 Col2
## Row1 1    2   
## Row2 3    4
#Created a factor
(factor_x <- factor(x = c("cat", "dog", "dog","cat", "dog", "cat")))
## [1] cat dog dog cat dog cat
## Levels: cat dog
table(factor_x) #Added the factor to a table, output : frequencies
## factor_x
## cat dog 
##   3   3
unclass(factor_x)
## [1] 1 2 2 1 2 1
## attr(,"levels")
## [1] "cat" "dog"
#Using Base R dataset
dataset = AirPassengers
head(dataset)
## [1] 112 118 132 129 121 135

Types of classes:

Data types:

#Finding type and class of the dataset
class(dataset) 
## [1] "ts"
typeof(dataset)
## [1] "double"
str(dataset)
##  Time-Series [1:144] from 1949 to 1961: 112 118 132 129 121 135 148 148 136 119 ...
#Using in built fucntion to find Standard Deviation
(vector1 = seq(5, 11))
## [1]  5  6  7  8  9 10 11
(R_StandardDeviation_InBuilt <- sd(vector1))
## [1] 2.160247
#Calculating Standard Deviation manually
(R_StandardDeviation_Hand = sqrt(sum((vector1-mean(vector1))^2/(length(vector1)-1))))
## [1] 2.160247
#Mathematical working of the function
mad
## function (x, center = median(x), constant = 1.4826, na.rm = FALSE, 
##     low = FALSE, high = FALSE) 
## {
##     if (na.rm) 
##         x <- x[!is.na(x)]
##     n <- length(x)
##     constant * if ((low || high) && n%%2 == 0) {
##         if (low && high) 
##             stop("'low' and 'high' cannot be both TRUE")
##         n2 <- n%/%2 + as.integer(high)
##         sort(abs(x - center), partial = n2)[n2]
##     }
##     else median(abs(x - center))
## }
## <bytecode: 0x134341d78>
## <environment: namespace:stats>
#Mathematical working of the function
IQR
## function (x, na.rm = FALSE, type = 7) 
## diff(quantile(as.numeric(x), c(0.25, 0.75), na.rm = na.rm, names = FALSE, 
##     type = type))
## <bytecode: 0x1543392e0>
## <environment: namespace:stats>
IQR(vector1)
## [1] 3
#Created my own function
fahrenheit_to_celsius <- function(tempF){
  (tempF-32) * 5/9
}

print(fahrenheit_to_celsius(12))
## [1] -11.11111
library(psych)
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(moments)
data()
#Exploring Base R dataset
ds <- mtcars
describe(ds)
##      vars  n   mean     sd median trimmed    mad   min    max  range  skew
## mpg     1 32  20.09   6.03  19.20   19.70   5.41 10.40  33.90  23.50  0.61
## cyl     2 32   6.19   1.79   6.00    6.23   2.97  4.00   8.00   4.00 -0.17
## disp    3 32 230.72 123.94 196.30  222.52 140.48 71.10 472.00 400.90  0.38
## hp      4 32 146.69  68.56 123.00  141.19  77.10 52.00 335.00 283.00  0.73
## drat    5 32   3.60   0.53   3.70    3.58   0.70  2.76   4.93   2.17  0.27
## wt      6 32   3.22   0.98   3.33    3.15   0.77  1.51   5.42   3.91  0.42
## qsec    7 32  17.85   1.79  17.71   17.83   1.42 14.50  22.90   8.40  0.37
## vs      8 32   0.44   0.50   0.00    0.42   0.00  0.00   1.00   1.00  0.24
## am      9 32   0.41   0.50   0.00    0.38   0.00  0.00   1.00   1.00  0.36
## gear   10 32   3.69   0.74   4.00    3.62   1.48  3.00   5.00   2.00  0.53
## carb   11 32   2.81   1.62   2.00    2.65   1.48  1.00   8.00   7.00  1.05
##      kurtosis    se
## mpg     -0.37  1.07
## cyl     -1.76  0.32
## disp    -1.21 21.91
## hp      -0.14 12.12
## drat    -0.71  0.09
## wt      -0.02  0.17
## qsec     0.34  0.32
## vs      -2.00  0.09
## am      -1.92  0.09
## gear    -1.07  0.13
## carb     1.26  0.29
?mtcars
# PLotting Basic density using ggplot
p <- ggplot(ds, aes(x=wt)) + 
  geom_density(color="darkgreen", fill="#e9ecef", alpha=0.8)
  
p + ggtitle("Weight distribution of 32 Automobiles") +
  ylab("Density") + xlab("Weight (1000 lbs)")

#Finding skewness of the dataset
skewness(ds)
##        mpg        cyl       disp         hp       drat         wt       qsec 
##  0.6404399 -0.1831287  0.4002724  0.7614356  0.2788734  0.4437855  0.3870456 
##         vs         am       gear       carb 
##  0.2519763  0.3817709  0.5546495  1.1021304

As seen here, the positive skewness of weight doesn’t seem to be a problem for this data as it lies between -0.5 and 0.5.