#Clearing the global environment
rm(list = ls())
#Created a matrix, inputting values by row
(matrix1 = matrix(data = list(1, 2, 3, 4), nrow = 2, byrow = TRUE,
dimnames = list(
c('Row1', 'Row2'),
c('Col1', 'Col2'))
))
## Col1 Col2
## Row1 1 2
## Row2 3 4
#Created a factor
(factor_x <- factor(x = c("cat", "dog", "dog","cat", "dog", "cat")))
## [1] cat dog dog cat dog cat
## Levels: cat dog
table(factor_x) #Added the factor to a table, output : frequencies
## factor_x
## cat dog
## 3 3
unclass(factor_x)
## [1] 1 2 2 1 2 1
## attr(,"levels")
## [1] "cat" "dog"
#Using Base R dataset
dataset = AirPassengers
head(dataset)
## [1] 112 118 132 129 121 135
Types of classes:
Numeric (e.g., 1, 2.5)
Character (e.g., “hello world”)
Integer (e.g., 430)
Logical (e.g., TRUE, FALSE)
Data types:
Vector : a fundamental R data structure that supports the inclusion of objects with the same atomic data type. A numeric vector, for instance, can hold several numeric values.
List : A data structure that is capable of holding components of several data kinds. Vectors, data frames, functions, and other objects can all be stored in lists thanks to their adaptability.
Matrix : It represents a two-dimensional data structure with just one atomic data type and one element per dimension. Matrices have columns and rows.
Data frame : A table-like, two-dimensional data structure. Tabular data is stored in data frames, where columns can include a variety of data kinds.
Factors : An exclusive kind of vector that is employed with categorical data. Levels of factors correspond to several categories or factors. When modeling statistics, they are helpful.
Tables : It is to evaluate and compile categorical data, you can use these functions to create frequency tables, contingency tables, and cross-tabulations, respectively.
#Finding type and class of the dataset
class(dataset)
## [1] "ts"
typeof(dataset)
## [1] "double"
str(dataset)
## Time-Series [1:144] from 1949 to 1961: 112 118 132 129 121 135 148 148 136 119 ...
#Using in built fucntion to find Standard Deviation
(vector1 = seq(5, 11))
## [1] 5 6 7 8 9 10 11
(R_StandardDeviation_InBuilt <- sd(vector1))
## [1] 2.160247
#Calculating Standard Deviation manually
(R_StandardDeviation_Hand = sqrt(sum((vector1-mean(vector1))^2/(length(vector1)-1))))
## [1] 2.160247
#Mathematical working of the function
mad
## function (x, center = median(x), constant = 1.4826, na.rm = FALSE,
## low = FALSE, high = FALSE)
## {
## if (na.rm)
## x <- x[!is.na(x)]
## n <- length(x)
## constant * if ((low || high) && n%%2 == 0) {
## if (low && high)
## stop("'low' and 'high' cannot be both TRUE")
## n2 <- n%/%2 + as.integer(high)
## sort(abs(x - center), partial = n2)[n2]
## }
## else median(abs(x - center))
## }
## <bytecode: 0x134341d78>
## <environment: namespace:stats>
#Mathematical working of the function
IQR
## function (x, na.rm = FALSE, type = 7)
## diff(quantile(as.numeric(x), c(0.25, 0.75), na.rm = na.rm, names = FALSE,
## type = type))
## <bytecode: 0x1543392e0>
## <environment: namespace:stats>
IQR(vector1)
## [1] 3
#Created my own function
fahrenheit_to_celsius <- function(tempF){
(tempF-32) * 5/9
}
print(fahrenheit_to_celsius(12))
## [1] -11.11111
library(psych)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(moments)
data()
#Exploring Base R dataset
ds <- mtcars
describe(ds)
## vars n mean sd median trimmed mad min max range skew
## mpg 1 32 20.09 6.03 19.20 19.70 5.41 10.40 33.90 23.50 0.61
## cyl 2 32 6.19 1.79 6.00 6.23 2.97 4.00 8.00 4.00 -0.17
## disp 3 32 230.72 123.94 196.30 222.52 140.48 71.10 472.00 400.90 0.38
## hp 4 32 146.69 68.56 123.00 141.19 77.10 52.00 335.00 283.00 0.73
## drat 5 32 3.60 0.53 3.70 3.58 0.70 2.76 4.93 2.17 0.27
## wt 6 32 3.22 0.98 3.33 3.15 0.77 1.51 5.42 3.91 0.42
## qsec 7 32 17.85 1.79 17.71 17.83 1.42 14.50 22.90 8.40 0.37
## vs 8 32 0.44 0.50 0.00 0.42 0.00 0.00 1.00 1.00 0.24
## am 9 32 0.41 0.50 0.00 0.38 0.00 0.00 1.00 1.00 0.36
## gear 10 32 3.69 0.74 4.00 3.62 1.48 3.00 5.00 2.00 0.53
## carb 11 32 2.81 1.62 2.00 2.65 1.48 1.00 8.00 7.00 1.05
## kurtosis se
## mpg -0.37 1.07
## cyl -1.76 0.32
## disp -1.21 21.91
## hp -0.14 12.12
## drat -0.71 0.09
## wt -0.02 0.17
## qsec 0.34 0.32
## vs -2.00 0.09
## am -1.92 0.09
## gear -1.07 0.13
## carb 1.26 0.29
?mtcars
# PLotting Basic density using ggplot
p <- ggplot(ds, aes(x=wt)) +
geom_density(color="darkgreen", fill="#e9ecef", alpha=0.8)
p + ggtitle("Weight distribution of 32 Automobiles") +
ylab("Density") + xlab("Weight (1000 lbs)")
#Finding skewness of the dataset
skewness(ds)
## mpg cyl disp hp drat wt qsec
## 0.6404399 -0.1831287 0.4002724 0.7614356 0.2788734 0.4437855 0.3870456
## vs am gear carb
## 0.2519763 0.3817709 0.5546495 1.1021304
As seen here, the positive skewness of weight doesn’t seem to be a problem for this data as it lies between -0.5 and 0.5.