IntroToR.R

# Introduction to R

# Clean up everything

rm(list=ls())

# Get working directory 

d <- getwd()

d

## [1] "F:/BSMMU/Spatial Data Analysis in R"

# Setting path of working directory

setwd(d)

getwd()

## [1] "F:/BSMMU/Spatial Data Analysis in R"

# Checking r-version

print(version$version.string)

## [1] "R version 4.1.1 (2021-08-10)"

#==================================
# Lesson-2: Basic data types in R
#==================================

# **Followings are vector data. A vector is a one-dimensional array or structure**

# Numeric values

# Integer values

# Character values

# Logical values 

# Factors

# Missing values 

# Time

# Numeric values===============================================================
a <- 7 # one element 
show(a)

## [1] 7

print(a)

## [1] 7

## [1] 7

class(a)

## [1] "numeric"

length(a) # to see how many elements or observations

## [1] 1

rm(a) # Remove any variable or file

# try this function show(a)

# Integer values================================================================

b <- 7L 
b

## [1] 7

class(b)

## [1] "integer"

# Character values=============================================================
x <- "Proloy"
x

## [1] "Proloy"

class(x)

## [1] "character"

# Logical values ==============================================================
x <- FALSE 
y<- TRUE 
x

## [1] FALSE

## [1] TRUE

class(x)

## [1] "logical"

class (y)

## [1] "logical"

# Factors======================================================================

countries <- c('Bangladesh', 'Bangladesh', 'India', 'Afghanistan', 'India')
countries

## [1] "Bangladesh"  "Bangladesh"  "India"       "Afghanistan" "India"

class(countries)

## [1] "character"

# converting character values into factor values 

f1 <- as.factor(countries) 
f1

## [1] Bangladesh  Bangladesh  India       Afghanistan India      
## Levels: Afghanistan Bangladesh India

class(f1)

## [1] "factor"

# Missing values=============================================================== 

m <- c(2, NA, 5, 2, NA, 2) # NA ("Not Available") (e.g. missing value = .)
is.na(m) # To check NA or missing values

## [1] FALSE  TRUE FALSE FALSE  TRUE FALSE

class(m)

## [1] "numeric"

which(is.na(m)) # Get positions of NA

## [1] 2 5

n <- c(5, 9, NaN, 3, 8, NA, NaN) # NaN ("Not a Number") (e.g. 0 / 0)
is.nan(n) # To check NaN values

## [1] FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE

class(n)

## [1] "numeric"

 which(is.nan(n)) # Get positions of NaN

## [1] 3 7

 # Time========================================================================
d<- Sys.Date()
d

## [1] "2022-12-18"

class(d)

## [1] "Date"

#================================
#Lesson-3: Basic data structures 
#================================

# **In the previous lesson we learned one dimensional data structure (vector). In this lesson, we will learn multi-dimensional data structures that can store basic data or vector data**

# Matrix========================================================================

# **A two-dimensional rectangular layout is called a matrix. We can create a matrix with two rows and three columns using following codes**

m <- matrix(ncol=3, nrow=2)
m

##      [,1] [,2] [,3]
## [1,]   NA   NA   NA
## [2,]   NA   NA   NA

# All values were missing (NA) in above matrix. Let's make a matrix with values 1 to 6

m <- matrix(data=c(1:6), ncol=3, nrow=2, byrow = TRUE) # Arguments, 

# like parameters, are information passed to functions.

m

##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6

m <- matrix(data=c(1:6), ncol=3, nrow=2, byrow = FALSE)
# By default elements are arranged sequentially by column.
m

##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6

# switching the number of columns and rows and using the t (transpose) function
t(m)

##      [,1] [,2]
## [1,]    1    2
## [2,]    3    4
## [3,]    5    6

# A matrix can only store a single data type. If you try to mix character and numeric values, all values will become character values (as the other way around may not be possible)

vchar <- c("a", "b")
class(vchar)

## [1] "character"

vnumb <- c(1,2)
class(vnumb)

## [1] "numeric"

matrix(c(vchar,vnumb), ncol=2, nrow=2, byrow = FALSE)

##      [,1] [,2]
## [1,] "a"  "1" 
## [2,] "b"  "2"

# Define the column and row names in matrix m
m <- matrix(data=c(1:6), ncol=3, nrow=2, byrow = FALSE)
m

##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6

rownames(m) = c("row1", "row2") # Row names are less important.
colnames(m) = c("ID", "X", "Y")
m

##      ID X Y
## row1  1 3 5
## row2  2 4 6

class(m)

## [1] "matrix" "array"

# List =========================================================================

# **A list in R is similar to your to-do list at work or school a list is some kind super data type**

v <- c(1:10) 
m <- matrix(data=c(1:6), ncol = 3, nrow=2)
c <- "abc"
l<- list(v, m, c)
# Naming of list elements 
names(l) <- c("first", "second", "third")
print(l)

## $first
##  [1]  1  2  3  4  5  6  7  8  9 10
## 
## $second
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
## 
## $third
## [1] "abc"

class(l)

## [1] "list"

# Data frame ===================================================================

# It is rectangular like a matrix, but unlike matrices a data.frame can have columns (variables) of different data types such as numeric, character, factor

# Let's create a data frame with the following four variables or vectors
ID <- as.integer(c(1,2,3,4))
name <- c("name1", "name2", "name3", "name4")
sex <- as.factor(c("Female","Male","Male","Female"))
age <- as.numeric(c(36, 27, 37, 32))
df <- data.frame(ID, name, sex, age, stringsAsFactors=FALSE)
print(df)

##   ID  name    sex age
## 1  1 name1 Female  36
## 2  2 name2   Male  27
## 3  3 name3   Male  37
## 4  4 name4 Female  32

class(df)

## [1] "data.frame"

# to see the data structure 

str(df)

## 'data.frame':    4 obs. of  4 variables:
##  $ ID  : int  1 2 3 4
##  $ name: chr  "name1" "name2" "name3" "name4"
##  $ sex : Factor w/ 2 levels "Female","Male": 1 2 2 1
##  $ age : num  36 27 37 32

#================================
# Lesson-4: Indexing 
#================================

# Vector========================================================================

# Access element(s) of a vector
b <- c(10:15)
b

## [1] 10 11 12 13 14 15

# Get the first element of a vector
b[1]

## [1] 10

# Get all elements except the second
b[-2]

## [1] 10 12 13 14 15

# use an index to change values
b[1] <- 11
b[3:6] <- -99
b

## [1]  11  11 -99 -99 -99 -99

# Matrix========================================================================

# values of matrices can be accessed through indexing

m <- matrix(1:9, nrow=3, ncol=3, byrow=TRUE)
colnames(m) <- c('a', 'b', 'c')
m

##      a b c
## [1,] 1 2 3
## [2,] 4 5 6
## [3,] 7 8 9

# use two numbers in a double index, the first for the row number(s) and the second for the column number(s).

m[2,2]

## b 
## 5

# entire column
m[ ,2]

## [1] 2 5 8

# two columns
m[, c('a', 'c')]

##      a c
## [1,] 1 3
## [2,] 4 6
## [3,] 7 9

# setting values
m[1,1] <- 5


# List==========================================================================

v <- c(1:10) 
m <- matrix(data=c(1:6), ncol = 3, nrow=2)
c <- "abc"
l<- list(v, m, c)
# Naming of list elements 
names(l) <- c("first", "second", "third")
print(l)

## $first
##  [1]  1  2  3  4  5  6  7  8  9 10
## 
## $second
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
## 
## $third
## [1] "abc"

class(l)

## [1] "list"

# the elements can be extracted by using the $ (dollar) operator

l$first

##  [1]  1  2  3  4  5  6  7  8  9 10

l$second

##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6

l$third

## [1] "abc"

l[["first"]]

##  [1]  1  2  3  4  5  6  7  8  9 10

# Data frame ===================================================================

# create a data.frame from matrix m
m <- matrix(1:9, nrow=3, ncol=3, byrow=TRUE)
colnames(m) <- c('a', 'b', 'c')
d <- data.frame(m)
class(d)

## [1] "data.frame"

# extract a column by column number
d[,2]

## [1] 2 5 8

# use the column name to get values
d[, 'b']

## [1] 2 5 8

d[ , 'b', drop=FALSE]

##   b
## 1 2
## 2 5
## 3 8

# Which ========================================================================

# When we need to find indices of the elements in a vector that have values above 15?. The function which() gives us the entries of a logical vector that are true.

x <- 10:20
i <- which(x > 15)
print(i)

## [1]  7  8  9 10 11

x[i]

## [1] 16 17 18 19 20

# %in% =========================================================================

# A very useful operator that allows you to ask whether a set of values is present in a vector is %in%.
x <- c(10:20)
j <- c(7,9,11,13)
j %in% x

## [1] FALSE FALSE  TRUE  TRUE

which(j %in% x)

## [1] 3 4

# Match=========================================================================

# The function match() looks for entries in a vector and returns the index needed to access them

# Another handy similar function is match
match(j, x)

## [1] NA NA  2  4

#================================
# Lesson-5: Algebra 
#================================

# Vector algebra ==============================================================

# Two vectors: 

a <- 1:5
b <- 6:10

# Multiplication works element by element:

d <- a*b

# Logical comparisons

a == 2

## [1] FALSE  TRUE FALSE FALSE FALSE

b > 6 & b < 8

## [1] FALSE  TRUE FALSE FALSE FALSE

b > 9 | a < 2

## [1]  TRUE FALSE FALSE FALSE  TRUE

b >= 9

## [1] FALSE FALSE FALSE  TRUE  TRUE

a <= 2

## [1]  TRUE  TRUE FALSE FALSE FALSE

b >= 9 | a <= 2

## [1]  TRUE  TRUE FALSE  TRUE  TRUE

b >= 9 & a <= 2

## [1] FALSE FALSE FALSE FALSE FALSE

# Functions

sqrt(a)

## [1] 1.000000 1.414214 1.732051 2.000000 2.236068

exp(a)

## [1]   2.718282   7.389056  20.085537  54.598150 148.413159

min(a)

## [1] 1

max(a)

## [1] 5

range(a)

## [1] 1 5

sum(a)

## [1] 15

mean(a)

## [1] 3

median(a)

## [1] 3

prod(a)

## [1] 120

sd(a)

## [1] 1.581139

# Random numbers

r <- runif(10) # for uniform distributed numbers 

r <- rnorm(10, mean=10, sd =2) # for randomly distributed numbers 

# To be able to exactly reproduce examples or data analysis we often want to assure that we take exactly the same "random" sample each time we run our code.

set.seed(n)

# Matrices

# Create an example matrix

m <- matrix(1:6, ncol=3, nrow=2, byrow=TRUE)
print(m)

##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6

#to multiply all values of m with 5, 

m*5

##      [,1] [,2] [,3]
## [1,]    5   10   15
## [2,]   20   25   30

# multiply two matrices
m*m

##      [,1] [,2] [,3]
## [1,]    1    4    9
## [2,]   16   25   36

# We can also do math with a matrix and a vector 
m * 1:2

##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    8   10   12

#================================
# Lesson-6: Data Exploration
#================================

# Summary and Table=============================================================

d <- data.frame(id=1:10, name=c('Bob', 'Bobby', '???', 'Bob', 'Bab', 'Jim', 'Jim', 'jim', '', 'Jim'), score1=c(8, 10, 7, 9, 2, 5, 1, 6, 3, 4), 
score2=c(3,4,5,-999,5,5,-999,2,3,4), stringsAsFactors=FALSE)

print(d)

##    id  name score1 score2
## 1   1   Bob      8      3
## 2   2 Bobby     10      4
## 3   3   ???      7      5
## 4   4   Bob      9   -999
## 5   5   Bab      2      5
## 6   6   Jim      5      5
## 7   7   Jim      1   -999
## 8   8   jim      6      2
## 9   9            3      3
## 10 10   Jim      4      4

str(d)

## 'data.frame':    10 obs. of  4 variables:
##  $ id    : int  1 2 3 4 5 6 7 8 9 10
##  $ name  : chr  "Bob" "Bobby" "???" "Bob" ...
##  $ score1: num  8 10 7 9 2 5 1 6 3 4
##  $ score2: num  3 4 5 -999 5 5 -999 2 3 4

summary(d)

##        id            name               score1          score2       
##  Min.   : 1.00   Length:10          Min.   : 1.00   Min.   :-999.00  
##  1st Qu.: 3.25   Class :character   1st Qu.: 3.25   1st Qu.:   2.25  
##  Median : 5.50   Mode  :character   Median : 5.50   Median :   3.50  
##  Mean   : 5.50                      Mean   : 5.50   Mean   :-196.70  
##  3rd Qu.: 7.75                      3rd Qu.: 7.75   3rd Qu.:   4.75  
##  Max.   :10.00                      Max.   :10.00   Max.   :   5.00

# Use $ symbol to extract variable from dataset
i <- d$score2 == -999
d$score2[i] <- NA
summary(d)

##        id            name               score1          score2     
##  Min.   : 1.00   Length:10          Min.   : 1.00   Min.   :2.000  
##  1st Qu.: 3.25   Class :character   1st Qu.: 3.25   1st Qu.:3.000  
##  Median : 5.50   Mode  :character   Median : 5.50   Median :4.000  
##  Mean   : 5.50                      Mean   : 5.50   Mean   :3.875  
##  3rd Qu.: 7.75                      3rd Qu.: 7.75   3rd Qu.:5.000  
##  Max.   :10.00                      Max.   :10.00   Max.   :5.000  
##                                                     NA's   :2

# character (and integer) variables, 
unique(d$name)

## [1] "Bob"   "Bobby" "???"   "Bab"   "Jim"   "jim"   ""

table(d$name)

## 
##         ???   Bab   Bob Bobby   jim   Jim 
##     1     1     1     2     1     1     3

# to replace 'Bab' and 'Bobby' with 'Bob'
d$name[d$name %in% c('Bab', 'Bobby')] <- 'Bob'
table(d$name)

## 
##     ??? Bob jim Jim 
##   1   1   4   1   3

# to replace 'jim'  with 'Jim'
d$name[d$name %in% 'jim'] <- 'Jim'
table(d$name)

## 
##     ??? Bob Jim 
##   1   1   4   4

d$name[d$name == '???'] <- NA
table(d$name)

## 
##     Bob Jim 
##   1   4   4

# To force table to also count the NA values. 
table(d$name, useNA='ifany')

## 
##       Bob  Jim <NA> 
##    1    4    4    1

d$name[9]

## [1] ""

# Note that there is one 'empty' value in the dataset. to replace 'empty' value with NA (missing value)
d$name[d$name == ''] <- NA
table(d[ c('name', 'score2')])

##      score2
## name  2 3 4 5
##   Bob 0 1 1 1
##   Jim 1 0 1 1

# Quantile, range, and mean=====================================================

quantile(d$score1)

##    0%   25%   50%   75%  100% 
##  1.00  3.25  5.50  7.75 10.00

range(d$score1)

## [1]  1 10

mean(d$score1)

## [1] 5.5

# you may need to use na.rm=TRUE if there are NA values
# quantile(d$score2)
quantile(d$score2, na.rm=TRUE)

##   0%  25%  50%  75% 100% 
##    2    3    4    5    5

range(d$score2)

## [1] NA NA

range(d$score2, na.rm=TRUE)

## [1] 2 5

# Plots=========================================================================

# sets up the canvas for two rows and columns
par(mfrow=c(2,2))
# Scatter plot with two variables
plot(d$score1, d$score2)
# Boxplot of two variables 
boxplot(d[, c('score1', 'score2')])
plot(sort(d$score1))
hist(d$score2)

# ========================
# Read and write files
# ========================

# To read first we need to know the full path (directory) name and the name of the file for path delimiters we need to use the forward-slash "/". For example, "C:/projects/research/data/obs.csv".

df <- read.csv("F:/BSMMU/Spatial Data Analysis in R/participants.csv")
print(df)

##                RegTime                   Full.Name
## 1  26/10/2022 16:41:45           Taslima Chowdhury
## 2  26/10/2022 16:42:35                Anika Tasnim
## 3  26/10/2022 16:42:49             Shahana Sultana
## 4  26/10/2022 17:01:08             Tanjela Bushra 
## 5  26/10/2022 19:17:17         Md. Redwanul Islam 
## 6  30/10/2022 11:11:30                Kamrun Nahar
## 7      4/11/2022 10:05           Evana Binthe Alam
## 8       6/11/2022 7:54                Marium Salwa
## 9      7/12/2022 22:45  Dr Ashekur Rahman Mullick 
## 10     7/12/2022 22:45 Nabhira Aftabi Binte Islam 
## 11     7/12/2022 23:02             Sabrina Mousum 
## 12      8/12/2022 0:10    Ayesha Tabassum Swarna  
## 13     8/12/2022 10:03        Sumaiya Samad Deepa 
## 14    11/12/2022 15:53         Md Maruf Haque KHan
## 15    12/12/2022 13:11             Tanjela Bushra 
## 16 13/12/2022 14:24:11               Nilima Barman
##    Previous.Software.Experience.on...Excel.
## 1                                       Yes
## 2                                       Yes
## 3                                       Yes
## 4                                       Yes
## 5                                       Yes
## 6                                       Yes
## 7                                       Yes
## 8                                       Yes
## 9                                       Yes
## 10                                      Yes
## 11                                      Yes
## 12                                      Yes
## 13                                      Yes
## 14                                      Yes
## 15                                      Yes
## 16                                      Yes
##    Previous.Software.Experience.on...SPSS.
## 1                                      Yes
## 2                                      Yes
## 3                                      Yes
## 4                                      Yes
## 5                                      Yes
## 6                                      Yes
## 7                                      Yes
## 8                                      Yes
## 9                                      Yes
## 10                                     Yes
## 11                                     Yes
## 12                                     Yes
## 13                                     Yes
## 14                                     Yes
## 15                                     Yes
## 16                                     Yes
##    Previous.Software.Experience.on...Stata.
## 1                                        No
## 2                                        No
## 3                                        No
## 4                                        No
## 5                                        No
## 6                                        No
## 7                                        No
## 8                                        No
## 9                                        No
## 10                                       No
## 11                                       No
## 12                                       No
## 13                                       No
## 14                                       No
## 15                                       No
## 16                                       No
##    Previous.Software.Experience.on...R. Previous.Software.Experience.on...SAS.
## 1                                    No                                     No
## 2                                    No                                     No
## 3                                    No                                     No
## 4                                    No                                     No
## 5                                    No                                     No
## 6                                    No                                     No
## 7                                    No                                     No
## 8                                    No                                     No
## 9                                    No                                     No
## 10                                   No                                     No
## 11                                   No                                     No
## 12                                   No                                     No
## 13                                   No                                     No
## 14                                   No                                    Yes
## 15                                   No                                     No
## 16                                   No                                     No
##      Age Gender Major.at.Bachelor.or.master.s.level Do.you.have.personal.laptop
## 1  36-40 Female                              Others                         Yes
## 2  31-35 Female                              Others                         Yes
## 3  36-40 Female                              Others                         Yes
## 4  31-35 Female                              Others                         Yes
## 5  26-30   Male                        Anthropology                         Yes
## 6  36-40 Female                              Others                         Yes
## 7  31-35 Female                              Others                         Yes
## 8  31-35 Female                              Others                         Yes
## 9  31-35   Male                              Others                         Yes
## 10 36-40 Female                       Biostatistics                         Yes
## 11 31-35 Female                              Others                         Yes
## 12 31-35 Female                              Others                         Yes
## 13 26-30 Female                              Others                         Yes
## 14 36-40   Male                              Others                         Yes
## 15 31-35 Female                              Others                         Yes
## 16 36-40 Female                              Others                         Yes

class(df)

## [1] "data.frame"

str(df)

## 'data.frame':    16 obs. of  11 variables:
##  $ RegTime                                 : chr  "26/10/2022 16:41:45" "26/10/2022 16:42:35" "26/10/2022 16:42:49" "26/10/2022 17:01:08" ...
##  $ Full.Name                               : chr  "Taslima Chowdhury" "Anika Tasnim" "Shahana Sultana" "Tanjela Bushra " ...
##  $ Previous.Software.Experience.on...Excel.: chr  "Yes" "Yes" "Yes" "Yes" ...
##  $ Previous.Software.Experience.on...SPSS. : chr  "Yes" "Yes" "Yes" "Yes" ...
##  $ Previous.Software.Experience.on...Stata.: chr  "No" "No" "No" "No" ...
##  $ Previous.Software.Experience.on...R.    : chr  "No" "No" "No" "No" ...
##  $ Previous.Software.Experience.on...SAS.  : chr  "No" "No" "No" "No" ...
##  $ Age                                     : chr  "36-40" "31-35" "36-40" "31-35" ...
##  $ Gender                                  : chr  "Female" "Female" "Female" "Female" ...
##  $ Major.at.Bachelor.or.master.s.level     : chr  "Others" "Others" "Others" "Others" ...
##  $ Do.you.have.personal.laptop             : chr  "Yes" "Yes" "Yes" "Yes" ...

write.csv(df, "F:/BSMMU/Spatial Data Analysis in R/participants_data.csv")
# to see file.path 
getwd()

## [1] "F:/BSMMU/Spatial Data Analysis in R"

# Cleaning stuff
colnames(df) <- c("reg", "name", "excel", "spss", "stata", "r", "sas", "age", "sex","major", "laptop")
str(df)

## 'data.frame':    16 obs. of  11 variables:
##  $ reg   : chr  "26/10/2022 16:41:45" "26/10/2022 16:42:35" "26/10/2022 16:42:49" "26/10/2022 17:01:08" ...
##  $ name  : chr  "Taslima Chowdhury" "Anika Tasnim" "Shahana Sultana" "Tanjela Bushra " ...
##  $ excel : chr  "Yes" "Yes" "Yes" "Yes" ...
##  $ spss  : chr  "Yes" "Yes" "Yes" "Yes" ...
##  $ stata : chr  "No" "No" "No" "No" ...
##  $ r     : chr  "No" "No" "No" "No" ...
##  $ sas   : chr  "No" "No" "No" "No" ...
##  $ age   : chr  "36-40" "31-35" "36-40" "31-35" ...
##  $ sex   : chr  "Female" "Female" "Female" "Female" ...
##  $ major : chr  "Others" "Others" "Others" "Others" ...
##  $ laptop: chr  "Yes" "Yes" "Yes" "Yes" ...

df$excel <- as.factor(df$excel)
df$spss <- as.factor(df$spss)
df$stata <- as.factor(df$stata)
df$r <- as.factor(df$r)
df$sas <- as.factor(df$sas)
df$age <- as.factor(df$age)
df$sex <- as.factor(df$sex)
df$major <- as.factor(df$major)
df$laptop <- as.factor(df$laptop)

df <- df[-1]
df

##                           name excel spss stata  r sas   age    sex
## 1            Taslima Chowdhury   Yes  Yes    No No  No 36-40 Female
## 2                 Anika Tasnim   Yes  Yes    No No  No 31-35 Female
## 3              Shahana Sultana   Yes  Yes    No No  No 36-40 Female
## 4              Tanjela Bushra    Yes  Yes    No No  No 31-35 Female
## 5          Md. Redwanul Islam    Yes  Yes    No No  No 26-30   Male
## 6                 Kamrun Nahar   Yes  Yes    No No  No 36-40 Female
## 7            Evana Binthe Alam   Yes  Yes    No No  No 31-35 Female
## 8                 Marium Salwa   Yes  Yes    No No  No 31-35 Female
## 9   Dr Ashekur Rahman Mullick    Yes  Yes    No No  No 31-35   Male
## 10 Nabhira Aftabi Binte Islam    Yes  Yes    No No  No 36-40 Female
## 11             Sabrina Mousum    Yes  Yes    No No  No 31-35 Female
## 12    Ayesha Tabassum Swarna     Yes  Yes    No No  No 31-35 Female
## 13        Sumaiya Samad Deepa    Yes  Yes    No No  No 26-30 Female
## 14         Md Maruf Haque KHan   Yes  Yes    No No Yes 36-40   Male
## 15             Tanjela Bushra    Yes  Yes    No No  No 31-35 Female
## 16               Nilima Barman   Yes  Yes    No No  No 36-40 Female
##            major laptop
## 1         Others    Yes
## 2         Others    Yes
## 3         Others    Yes
## 4         Others    Yes
## 5   Anthropology    Yes
## 6         Others    Yes
## 7         Others    Yes
## 8         Others    Yes
## 9         Others    Yes
## 10 Biostatistics    Yes
## 11        Others    Yes
## 12        Others    Yes
## 13        Others    Yes
## 14        Others    Yes
## 15        Others    Yes
## 16        Others    Yes

df$ID <- 1:nrow(df)
df

##                           name excel spss stata  r sas   age    sex
## 1            Taslima Chowdhury   Yes  Yes    No No  No 36-40 Female
## 2                 Anika Tasnim   Yes  Yes    No No  No 31-35 Female
## 3              Shahana Sultana   Yes  Yes    No No  No 36-40 Female
## 4              Tanjela Bushra    Yes  Yes    No No  No 31-35 Female
## 5          Md. Redwanul Islam    Yes  Yes    No No  No 26-30   Male
## 6                 Kamrun Nahar   Yes  Yes    No No  No 36-40 Female
## 7            Evana Binthe Alam   Yes  Yes    No No  No 31-35 Female
## 8                 Marium Salwa   Yes  Yes    No No  No 31-35 Female
## 9   Dr Ashekur Rahman Mullick    Yes  Yes    No No  No 31-35   Male
## 10 Nabhira Aftabi Binte Islam    Yes  Yes    No No  No 36-40 Female
## 11             Sabrina Mousum    Yes  Yes    No No  No 31-35 Female
## 12    Ayesha Tabassum Swarna     Yes  Yes    No No  No 31-35 Female
## 13        Sumaiya Samad Deepa    Yes  Yes    No No  No 26-30 Female
## 14         Md Maruf Haque KHan   Yes  Yes    No No Yes 36-40   Male
## 15             Tanjela Bushra    Yes  Yes    No No  No 31-35 Female
## 16               Nilima Barman   Yes  Yes    No No  No 36-40 Female
##            major laptop ID
## 1         Others    Yes  1
## 2         Others    Yes  2
## 3         Others    Yes  3
## 4         Others    Yes  4
## 5   Anthropology    Yes  5
## 6         Others    Yes  6
## 7         Others    Yes  7
## 8         Others    Yes  8
## 9         Others    Yes  9
## 10 Biostatistics    Yes 10
## 11        Others    Yes 11
## 12        Others    Yes 12
## 13        Others    Yes 13
## 14        Others    Yes 14
## 15        Others    Yes 15
## 16        Others    Yes 16

data.table::setcolorder(df, neworder = "ID")
df

##    ID                        name excel spss stata  r sas   age    sex
## 1   1           Taslima Chowdhury   Yes  Yes    No No  No 36-40 Female
## 2   2                Anika Tasnim   Yes  Yes    No No  No 31-35 Female
## 3   3             Shahana Sultana   Yes  Yes    No No  No 36-40 Female
## 4   4             Tanjela Bushra    Yes  Yes    No No  No 31-35 Female
## 5   5         Md. Redwanul Islam    Yes  Yes    No No  No 26-30   Male
## 6   6                Kamrun Nahar   Yes  Yes    No No  No 36-40 Female
## 7   7           Evana Binthe Alam   Yes  Yes    No No  No 31-35 Female
## 8   8                Marium Salwa   Yes  Yes    No No  No 31-35 Female
## 9   9  Dr Ashekur Rahman Mullick    Yes  Yes    No No  No 31-35   Male
## 10 10 Nabhira Aftabi Binte Islam    Yes  Yes    No No  No 36-40 Female
## 11 11             Sabrina Mousum    Yes  Yes    No No  No 31-35 Female
## 12 12    Ayesha Tabassum Swarna     Yes  Yes    No No  No 31-35 Female
## 13 13        Sumaiya Samad Deepa    Yes  Yes    No No  No 26-30 Female
## 14 14         Md Maruf Haque KHan   Yes  Yes    No No Yes 36-40   Male
## 15 15             Tanjela Bushra    Yes  Yes    No No  No 31-35 Female
## 16 16               Nilima Barman   Yes  Yes    No No  No 36-40 Female
##            major laptop
## 1         Others    Yes
## 2         Others    Yes
## 3         Others    Yes
## 4         Others    Yes
## 5   Anthropology    Yes
## 6         Others    Yes
## 7         Others    Yes
## 8         Others    Yes
## 9         Others    Yes
## 10 Biostatistics    Yes
## 11        Others    Yes
## 12        Others    Yes
## 13        Others    Yes
## 14        Others    Yes
## 15        Others    Yes
## 16        Others    Yes

str(df)

## 'data.frame':    16 obs. of  11 variables:
##  $ ID    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ name  : chr  "Taslima Chowdhury" "Anika Tasnim" "Shahana Sultana" "Tanjela Bushra " ...
##  $ excel : Factor w/ 1 level "Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ spss  : Factor w/ 1 level "Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ stata : Factor w/ 1 level "No": 1 1 1 1 1 1 1 1 1 1 ...
##  $ r     : Factor w/ 1 level "No": 1 1 1 1 1 1 1 1 1 1 ...
##  $ sas   : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ age   : Factor w/ 3 levels "26-30","31-35",..: 3 2 3 2 1 3 2 2 2 3 ...
##  $ sex   : Factor w/ 2 levels "Female","Male": 1 1 1 1 2 1 1 1 2 1 ...
##  $ major : Factor w/ 3 levels "Anthropology",..: 3 3 3 3 1 3 3 3 3 2 ...
##  $ laptop: Factor w/ 1 level "Yes": 1 1 1 1 1 1 1 1 1 1 ...