1) Creating Vector and Covert Vector into List and List to Dataframe :

#Creating vectors:
x1<-c("Apple","Samsung","vivo")
x2<-c("Red","Black","Pink")
x3<-c(1,3,2)
  
# Creating list of vectors 
x <- list(col1 = x1, col2 = x2, col2 = x3) 
x 
## $col1
## [1] "Apple"   "Samsung" "vivo"   
## 
## $col2
## [1] "Red"   "Black" "Pink" 
## 
## $col2
## [1] 1 3 2
#Convert into Dataframe:
as.data.frame(x) 
##      col1  col2 col2.1
## 1   Apple   Red      1
## 2 Samsung Black      3
## 3    vivo  Pink      2

2) Converting predefined sets into Dataframe:

x <- as.list(BOD)# BOD (Biochemical Oxygen Demand) is pre-defined Vector in R.
x 
## $Time
## [1] 1 2 3 4 5 7
## 
## $demand
## [1]  8.3 10.3 19.0 16.0 15.6 19.8
## 
## attr(,"reference")
## [1] "A1.4, p. 270"
as.data.frame(x)
##   Time demand
## 1    1    8.3
## 2    2   10.3
## 3    3   19.0
## 4    4   16.0
## 5    5   15.6
## 6    7   19.8

3) Finding Dataframe:

data.frame() is used to identify whether the provided dataset is a Dataframe.

is.data.frame(BOD)
## [1] TRUE
is.data.frame(1)
## [1] FALSE
is.data.frame(x)
## [1] FALSE

4) Converting all the values in Dataframe into Matrix

data.matrix() function in R Language is used to create a matrix by converting all the values of a Data Frame into numeric mode and then binding them as a matrix.

df=data.frame(
  "Name"=c("a","b","c","d"),
  "Age"=c(12,34,23,67),
  "BMI"=c(22,12,10,11),
  "Country"=c("Malaysia","India","Spain","England"))
  print(df)
##   Name Age BMI  Country
## 1    a  12  22 Malaysia
## 2    b  34  12    India
## 3    c  23  10    Spain
## 4    d  67  11  England
df1<-data.matrix(df)
print(df1)
##      Name Age BMI Country
## [1,]    1  12  22       3
## [2,]    2  34  12       2
## [3,]    3  23  10       4
## [4,]    4  67  11       1

5) Converting matrix into DataFrame

as.data.frame() is used to convert matrix dataset as Dataframe.

df2<-as.data.frame(df1)
print(df2)
##   Name Age BMI Country
## 1    1  12  22       3
## 2    2  34  12       2
## 3    3  23  10       4
## 4    4  67  11       1

6) Creating a Dataframe using list , renaming Column names and Getting dimension of the data

Creating dataframe using data.frame() function. Renaming column names using names(), the same can also be achived using rename() dim() is used to get dimention of the existing dataframe

Friends=data.frame(
  A=c("Raju","Kumar","Chandru","Deepak","Raina", "Dhoni", "Hema", "Vandana", "Bela","Anand" ),
  B=c(12,34,23,67,23,43,23,38,34,12),
  C=c("M","M","M","M","M","M","F","F","F","M"),
  D=c("Malaysia","India","Spain","England", "Malaysia", "Malaysia", "India", "Egypt", "Italy", "Peru"))
  names(Friends)<-c("Name", "Age", "Gender","Country" )
  Friends
##       Name Age Gender  Country
## 1     Raju  12      M Malaysia
## 2    Kumar  34      M    India
## 3  Chandru  23      M    Spain
## 4   Deepak  67      M  England
## 5    Raina  23      M Malaysia
## 6    Dhoni  43      M Malaysia
## 7     Hema  23      F    India
## 8  Vandana  38      F    Egypt
## 9     Bela  34      F    Italy
## 10   Anand  12      M     Peru
dim(Friends)
## [1] 10  4

7) Structure of data

str(Friends)
## 'data.frame':    10 obs. of  4 variables:
##  $ Name   : chr  "Raju" "Kumar" "Chandru" "Deepak" ...
##  $ Age    : num  12 34 23 67 23 43 23 38 34 12
##  $ Gender : chr  "M" "M" "M" "M" ...
##  $ Country: chr  "Malaysia" "India" "Spain" "England" ...

8) Getting the first 6 rows:

head(Friends)
##      Name Age Gender  Country
## 1    Raju  12      M Malaysia
## 2   Kumar  34      M    India
## 3 Chandru  23      M    Spain
## 4  Deepak  67      M  England
## 5   Raina  23      M Malaysia
## 6   Dhoni  43      M Malaysia

9) Getting the last 6 rows:

tail(Friends)
##       Name Age Gender  Country
## 5    Raina  23      M Malaysia
## 6    Dhoni  43      M Malaysia
## 7     Hema  23      F    India
## 8  Vandana  38      F    Egypt
## 9     Bela  34      F    Italy
## 10   Anand  12      M     Peru

10) Accessing 1st &2nd rows:

print(Friends[1:2,])
##    Name Age Gender  Country
## 1  Raju  12      M Malaysia
## 2 Kumar  34      M    India

11) Accessing 1st &2nd col:

print(Friends[,1:2])
##       Name Age
## 1     Raju  12
## 2    Kumar  34
## 3  Chandru  23
## 4   Deepak  67
## 5    Raina  23
## 6    Dhoni  43
## 7     Hema  23
## 8  Vandana  38
## 9     Bela  34
## 10   Anand  12

12) Selecting the subset of Dataframe

print(Friends)
##       Name Age Gender  Country
## 1     Raju  12      M Malaysia
## 2    Kumar  34      M    India
## 3  Chandru  23      M    Spain
## 4   Deepak  67      M  England
## 5    Raina  23      M Malaysia
## 6    Dhoni  43      M Malaysia
## 7     Hema  23      F    India
## 8  Vandana  38      F    Egypt
## 9     Bela  34      F    Italy
## 10   Anand  12      M     Peru
FriendsinMalaysia<-subset(Friends,Country=="Malaysia")
print(FriendsinMalaysia)
##    Name Age Gender  Country
## 1  Raju  12      M Malaysia
## 5 Raina  23      M Malaysia
## 6 Dhoni  43      M Malaysia
Friendsabove30 = subset(Friends, Age>30) 
print(Friendsabove30)
##      Name Age Gender  Country
## 2   Kumar  34      M    India
## 4  Deepak  67      M  England
## 6   Dhoni  43      M Malaysia
## 8 Vandana  38      F    Egypt
## 9    Bela  34      F    Italy

13) Appending a column to an existing Dataframe

Employmentstatus <- c("Unemployed","Employed","Employed","Unemployed","Employed","Unemployed","Employed","Employed","Unemployed","Unemployed")
# Add `quantity` to the `Friends` data frame
Friends$Employmentstatus <- Employmentstatus
Friends
##       Name Age Gender  Country Employmentstatus
## 1     Raju  12      M Malaysia       Unemployed
## 2    Kumar  34      M    India         Employed
## 3  Chandru  23      M    Spain         Employed
## 4   Deepak  67      M  England       Unemployed
## 5    Raina  23      M Malaysia         Employed
## 6    Dhoni  43      M Malaysia       Unemployed
## 7     Hema  23      F    India         Employed
## 8  Vandana  38      F    Egypt         Employed
## 9     Bela  34      F    Italy       Unemployed
## 10   Anand  12      M     Peru       Unemployed

14) modifying indirectly by creating a table

Friends[[4]][2]="Australia" # change Country from India to Australia for Kumar
print(Friends)
##       Name Age Gender   Country Employmentstatus
## 1     Raju  12      M  Malaysia       Unemployed
## 2    Kumar  34      M Australia         Employed
## 3  Chandru  23      M     Spain         Employed
## 4   Deepak  67      M   England       Unemployed
## 5    Raina  23      M  Malaysia         Employed
## 6    Dhoni  43      M  Malaysia       Unemployed
## 7     Hema  23      F     India         Employed
## 8  Vandana  38      F     Egypt         Employed
## 9     Bela  34      F     Italy       Unemployed
## 10   Anand  12      M      Peru       Unemployed

15) Create or Edit a DataFrame by direct assigning using table.

myTable=edit(Friends)
print(myTable)
##       Name Age Gender   Country Employmentstatus
## 1     Raju  12      M  Malaysia       Unemployed
## 2    Kumar  34      M Australia         Employed
## 3  Chandru  23      M     Spain         Employed
## 4   Deepak  67      M   England       Unemployed
## 5    Raina  23      M  Malaysia         Employed
## 6    Dhoni  43      M  Malaysia       Unemployed
## 7     Hema  23      F     India         Employed
## 8  Vandana  38      F     Egypt         Employed
## 9     Bela  34      F     Italy       Unemployed
## 10   Anand  12      M      Peru       Unemployed

16) Sorting the dataframe

sorteddata <-myTable[order(myTable$Age),]
sorteddata
##       Name Age Gender   Country Employmentstatus
## 1     Raju  12      M  Malaysia       Unemployed
## 10   Anand  12      M      Peru       Unemployed
## 3  Chandru  23      M     Spain         Employed
## 5    Raina  23      M  Malaysia         Employed
## 7     Hema  23      F     India         Employed
## 2    Kumar  34      M Australia         Employed
## 9     Bela  34      F     Italy       Unemployed
## 8  Vandana  38      F     Egypt         Employed
## 6    Dhoni  43      M  Malaysia       Unemployed
## 4   Deepak  67      M   England       Unemployed

17) Adding a column using cbind() function

newTable=cbind(myTable,Foodpreference=c("Veg","Veg","Veg","Non-Veg","Veg","Non-Veg","Veg","Veg","Non-Veg","Non-Veg"))
print(newTable)
##       Name Age Gender   Country Employmentstatus Foodpreference
## 1     Raju  12      M  Malaysia       Unemployed            Veg
## 2    Kumar  34      M Australia         Employed            Veg
## 3  Chandru  23      M     Spain         Employed            Veg
## 4   Deepak  67      M   England       Unemployed        Non-Veg
## 5    Raina  23      M  Malaysia         Employed            Veg
## 6    Dhoni  43      M  Malaysia       Unemployed        Non-Veg
## 7     Hema  23      F     India         Employed            Veg
## 8  Vandana  38      F     Egypt         Employed            Veg
## 9     Bela  34      F     Italy       Unemployed        Non-Veg
## 10   Anand  12      M      Peru       Unemployed        Non-Veg

18) Adding rows using rbind() function:

newTable=rbind(newTable,data.frame(Name="Ely", Age=10, Gender="F", Country="Germany",Employmentstatus ="unemployed", Foodpreference = "Veg"))
print(newTable)
##       Name Age Gender   Country Employmentstatus Foodpreference
## 1     Raju  12      M  Malaysia       Unemployed            Veg
## 2    Kumar  34      M Australia         Employed            Veg
## 3  Chandru  23      M     Spain         Employed            Veg
## 4   Deepak  67      M   England       Unemployed        Non-Veg
## 5    Raina  23      M  Malaysia         Employed            Veg
## 6    Dhoni  43      M  Malaysia       Unemployed        Non-Veg
## 7     Hema  23      F     India         Employed            Veg
## 8  Vandana  38      F     Egypt         Employed            Veg
## 9     Bela  34      F     Italy       Unemployed        Non-Veg
## 10   Anand  12      M      Peru       Unemployed        Non-Veg
## 11     Ely  10      F   Germany       unemployed            Veg

19) Duplicating a column using mutate():

Using mutate(), we will create a new column and display the age in Year 2030.

#install.packages("dplyr")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
newTable=mutate(newTable,Agein2030=Age+10)
print(newTable)
##       Name Age Gender   Country Employmentstatus Foodpreference Agein2030
## 1     Raju  12      M  Malaysia       Unemployed            Veg        22
## 2    Kumar  34      M Australia         Employed            Veg        44
## 3  Chandru  23      M     Spain         Employed            Veg        33
## 4   Deepak  67      M   England       Unemployed        Non-Veg        77
## 5    Raina  23      M  Malaysia         Employed            Veg        33
## 6    Dhoni  43      M  Malaysia       Unemployed        Non-Veg        53
## 7     Hema  23      F     India         Employed            Veg        33
## 8  Vandana  38      F     Egypt         Employed            Veg        48
## 9     Bela  34      F     Italy       Unemployed        Non-Veg        44
## 10   Anand  12      M      Peru       Unemployed        Non-Veg        22
## 11     Ely  10      F   Germany       unemployed            Veg        20

20) transmute():

Using transmute(), we will change an existing column and display the age in Year 2050.

#install.packages("dplyr")
library(dplyr)
Table2=transmute(newTable,Name, Age, Gender, Country, Employmentstatus, Foodpreference, Agein2050=Agein2030+20)
print(Table2)
##       Name Age Gender   Country Employmentstatus Foodpreference Agein2050
## 1     Raju  12      M  Malaysia       Unemployed            Veg        42
## 2    Kumar  34      M Australia         Employed            Veg        64
## 3  Chandru  23      M     Spain         Employed            Veg        53
## 4   Deepak  67      M   England       Unemployed        Non-Veg        97
## 5    Raina  23      M  Malaysia         Employed            Veg        53
## 6    Dhoni  43      M  Malaysia       Unemployed        Non-Veg        73
## 7     Hema  23      F     India         Employed            Veg        53
## 8  Vandana  38      F     Egypt         Employed            Veg        68
## 9     Bela  34      F     Italy       Unemployed        Non-Veg        64
## 10   Anand  12      M      Peru       Unemployed        Non-Veg        42
## 11     Ely  10      F   Germany       unemployed            Veg        40

21) Adding column:

Adding numeric columns

Table2$"NewAge"<-NA 
Table2[["NewAge"]]<-Table2[["Age"]] + Table2[["Agein2050"]]
print(Table2)
##       Name Age Gender   Country Employmentstatus Foodpreference Agein2050
## 1     Raju  12      M  Malaysia       Unemployed            Veg        42
## 2    Kumar  34      M Australia         Employed            Veg        64
## 3  Chandru  23      M     Spain         Employed            Veg        53
## 4   Deepak  67      M   England       Unemployed        Non-Veg        97
## 5    Raina  23      M  Malaysia         Employed            Veg        53
## 6    Dhoni  43      M  Malaysia       Unemployed        Non-Veg        73
## 7     Hema  23      F     India         Employed            Veg        53
## 8  Vandana  38      F     Egypt         Employed            Veg        68
## 9     Bela  34      F     Italy       Unemployed        Non-Veg        64
## 10   Anand  12      M      Peru       Unemployed        Non-Veg        42
## 11     Ely  10      F   Germany       unemployed            Veg        40
##    NewAge
## 1      54
## 2      98
## 3      76
## 4     164
## 5      76
## 6     116
## 7      76
## 8     106
## 9      98
## 10     54
## 11     50

22) Deleting a column by assigning NULL to it.

Here we will delete column “NewAge” by assigning NULL to it.

Table2$NewAge <- NULL
Table2
##       Name Age Gender   Country Employmentstatus Foodpreference Agein2050
## 1     Raju  12      M  Malaysia       Unemployed            Veg        42
## 2    Kumar  34      M Australia         Employed            Veg        64
## 3  Chandru  23      M     Spain         Employed            Veg        53
## 4   Deepak  67      M   England       Unemployed        Non-Veg        97
## 5    Raina  23      M  Malaysia         Employed            Veg        53
## 6    Dhoni  43      M  Malaysia       Unemployed        Non-Veg        73
## 7     Hema  23      F     India         Employed            Veg        53
## 8  Vandana  38      F     Egypt         Employed            Veg        68
## 9     Bela  34      F     Italy       Unemployed        Non-Veg        64
## 10   Anand  12      M      Peru       Unemployed        Non-Veg        42
## 11     Ely  10      F   Germany       unemployed            Veg        40

23) Deleting a column by assigning NULL to it.

Here we will delete specific row by proving the corresponding row number. Here we will delete 2nd row.

Table2 <- Table2[-2,]
Table2
##       Name Age Gender  Country Employmentstatus Foodpreference Agein2050
## 1     Raju  12      M Malaysia       Unemployed            Veg        42
## 3  Chandru  23      M    Spain         Employed            Veg        53
## 4   Deepak  67      M  England       Unemployed        Non-Veg        97
## 5    Raina  23      M Malaysia         Employed            Veg        53
## 6    Dhoni  43      M Malaysia       Unemployed        Non-Veg        73
## 7     Hema  23      F    India         Employed            Veg        53
## 8  Vandana  38      F    Egypt         Employed            Veg        68
## 9     Bela  34      F    Italy       Unemployed        Non-Veg        64
## 10   Anand  12      M     Peru       Unemployed        Non-Veg        42
## 11     Ely  10      F  Germany       unemployed            Veg        40

24) Summarize Dataframe :

summary() function is used to summarize each column in the Dataframe. If a column is Character, then it will provide us size/length of the dataframe, class, mode. If a column is a number, then it provides minimum, Maximum, Mean, Median etc.

summary(Table2)
##      Name                Age           Gender            Country         
##  Length:10          Min.   :10.00   Length:10          Length:10         
##  Class :character   1st Qu.:14.75   Class :character   Class :character  
##  Mode  :character   Median :23.00   Mode  :character   Mode  :character  
##                     Mean   :28.50                                        
##                     3rd Qu.:37.00                                        
##                     Max.   :67.00                                        
##  Employmentstatus   Foodpreference       Agein2050    
##  Length:10          Length:10          Min.   :40.00  
##  Class :character   Class :character   1st Qu.:44.75  
##  Mode  :character   Mode  :character   Median :53.00  
##                                        Mean   :58.50  
##                                        3rd Qu.:67.00  
##                                        Max.   :97.00

25) Visualize the Dataframe using plot() :

Table2$Gender <- factor(Table2$Gender)
plot(Table2$Gender, Table2$Age)