Introduction

This document demonstrates basic data manipulation techniques in R, including creating and modifying data frames, assigning row and column names, and changing data types.


Code Execution and Explanation

Create Two Vectors

# Create two numeric vectors
var1 <- c(23, 54, 76, 34, 56, 32, 45, 66, 43, 28, 64, 27)
var2 <- c(43, 12, 56, 43, 23, 66, 78, 43, 45, 32, 35, 64)

# Check the length of var1
length(var1)
## [1] 12

Combine var1 and var2 into a data frame

df <- data.frame(var1, var2)
df
##    var1 var2
## 1    23   43
## 2    54   12
## 3    76   56
## 4    34   43
## 5    56   23
## 6    32   66
## 7    45   78
## 8    66   43
## 9    43   45
## 10   28   32
## 11   64   35
## 12   27   64

Check the number of rows in the data frame

nrow(df)
## [1] 12

Add var3 to the data frame

var3 <- c(1, 2, 3, 4)
df <- data.frame(var1, var2, var3)
df
##    var1 var2 var3
## 1    23   43    1
## 2    54   12    2
## 3    76   56    3
## 4    34   43    4
## 5    56   23    1
## 6    32   66    2
## 7    45   78    3
## 8    66   43    4
## 9    43   45    1
## 10   28   32    2
## 11   64   35    3
## 12   27   64    4

Add var4 to the data frame

var4 <- c("part1", "part2")
df <- data.frame(df, var4)
df
##    var1 var2 var3  var4
## 1    23   43    1 part1
## 2    54   12    2 part2
## 3    76   56    3 part1
## 4    34   43    4 part2
## 5    56   23    1 part1
## 6    32   66    2 part2
## 7    45   78    3 part1
## 8    66   43    4 part2
## 9    43   45    1 part1
## 10   28   32    2 part2
## 11   64   35    3 part1
## 12   27   64    4 part2

Add state names as a new column

var5 <- c("texas", "michigan", "utah", "colorado", "minnesota", "ohio", 
          "new york", "montana", "idaho", "hawaii", "washington", "arkansas")

Check the length of var5

length(var5)
## [1] 12

Add var5 to the data frame

df <- data.frame(var5, df)
df
##          var5 var1 var2 var3  var4
## 1       texas   23   43    1 part1
## 2    michigan   54   12    2 part2
## 3        utah   76   56    3 part1
## 4    colorado   34   43    4 part2
## 5   minnesota   56   23    1 part1
## 6        ohio   32   66    2 part2
## 7    new york   45   78    3 part1
## 8     montana   66   43    4 part2
## 9       idaho   43   45    1 part1
## 10     hawaii   28   32    2 part2
## 11 washington   64   35    3 part1
## 12   arkansas   27   64    4 part2

Set row names to var5

rownames(df) <- df$var5
df
##                  var5 var1 var2 var3  var4
## texas           texas   23   43    1 part1
## michigan     michigan   54   12    2 part2
## utah             utah   76   56    3 part1
## colorado     colorado   34   43    4 part2
## minnesota   minnesota   56   23    1 part1
## ohio             ohio   32   66    2 part2
## new york     new york   45   78    3 part1
## montana       montana   66   43    4 part2
## idaho           idaho   43   45    1 part1
## hawaii         hawaii   28   32    2 part2
## washington washington   64   35    3 part1
## arkansas     arkansas   27   64    4 part2

#Remove the var5 column

df <- df[, -1]
df
##            var1 var2 var3  var4
## texas        23   43    1 part1
## michigan     54   12    2 part2
## utah         76   56    3 part1
## colorado     34   43    4 part2
## minnesota    56   23    1 part1
## ohio         32   66    2 part2
## new york     45   78    3 part1
## montana      66   43    4 part2
## idaho        43   45    1 part1
## hawaii       28   32    2 part2
## washington   64   35    3 part1
## arkansas     27   64    4 part2

Rename columns

colnames(df) <- c("obs1", "obs2", "fact", "part#")
df
##            obs1 obs2 fact part#
## texas        23   43    1 part1
## michigan     54   12    2 part2
## utah         76   56    3 part1
## colorado     34   43    4 part2
## minnesota    56   23    1 part1
## ohio         32   66    2 part2
## new york     45   78    3 part1
## montana      66   43    4 part2
## idaho        43   45    1 part1
## hawaii       28   32    2 part2
## washington   64   35    3 part1
## arkansas     27   64    4 part2

Check the structure of the data frame

str(df)
## 'data.frame':    12 obs. of  4 variables:
##  $ obs1 : num  23 54 76 34 56 32 45 66 43 28 ...
##  $ obs2 : num  43 12 56 43 23 66 78 43 45 32 ...
##  $ fact : num  1 2 3 4 1 2 3 4 1 2 ...
##  $ part#: chr  "part1" "part2" "part1" "part2" ...

Change ‘fact’ column to a factor

df$fact <- as.factor(df$fact)

Check the structure again

str(df)
## 'data.frame':    12 obs. of  4 variables:
##  $ obs1 : num  23 54 76 34 56 32 45 66 43 28 ...
##  $ obs2 : num  43 12 56 43 23 66 78 43 45 32 ...
##  $ fact : Factor w/ 4 levels "1","2","3","4": 1 2 3 4 1 2 3 4 1 2 ...
##  $ part#: chr  "part1" "part2" "part1" "part2" ...