This document demonstrates basic data manipulation techniques in R, including creating and modifying data frames, assigning row and column names, and changing data types.
# Create two numeric vectors
var1 <- c(23, 54, 76, 34, 56, 32, 45, 66, 43, 28, 64, 27)
var2 <- c(43, 12, 56, 43, 23, 66, 78, 43, 45, 32, 35, 64)
# Check the length of var1
length(var1)
## [1] 12
df <- data.frame(var1, var2)
df
## var1 var2
## 1 23 43
## 2 54 12
## 3 76 56
## 4 34 43
## 5 56 23
## 6 32 66
## 7 45 78
## 8 66 43
## 9 43 45
## 10 28 32
## 11 64 35
## 12 27 64
nrow(df)
## [1] 12
var3 <- c(1, 2, 3, 4)
df <- data.frame(var1, var2, var3)
df
## var1 var2 var3
## 1 23 43 1
## 2 54 12 2
## 3 76 56 3
## 4 34 43 4
## 5 56 23 1
## 6 32 66 2
## 7 45 78 3
## 8 66 43 4
## 9 43 45 1
## 10 28 32 2
## 11 64 35 3
## 12 27 64 4
var4 <- c("part1", "part2")
df <- data.frame(df, var4)
df
## var1 var2 var3 var4
## 1 23 43 1 part1
## 2 54 12 2 part2
## 3 76 56 3 part1
## 4 34 43 4 part2
## 5 56 23 1 part1
## 6 32 66 2 part2
## 7 45 78 3 part1
## 8 66 43 4 part2
## 9 43 45 1 part1
## 10 28 32 2 part2
## 11 64 35 3 part1
## 12 27 64 4 part2
var5 <- c("texas", "michigan", "utah", "colorado", "minnesota", "ohio",
"new york", "montana", "idaho", "hawaii", "washington", "arkansas")
length(var5)
## [1] 12
df <- data.frame(var5, df)
df
## var5 var1 var2 var3 var4
## 1 texas 23 43 1 part1
## 2 michigan 54 12 2 part2
## 3 utah 76 56 3 part1
## 4 colorado 34 43 4 part2
## 5 minnesota 56 23 1 part1
## 6 ohio 32 66 2 part2
## 7 new york 45 78 3 part1
## 8 montana 66 43 4 part2
## 9 idaho 43 45 1 part1
## 10 hawaii 28 32 2 part2
## 11 washington 64 35 3 part1
## 12 arkansas 27 64 4 part2
rownames(df) <- df$var5
df
## var5 var1 var2 var3 var4
## texas texas 23 43 1 part1
## michigan michigan 54 12 2 part2
## utah utah 76 56 3 part1
## colorado colorado 34 43 4 part2
## minnesota minnesota 56 23 1 part1
## ohio ohio 32 66 2 part2
## new york new york 45 78 3 part1
## montana montana 66 43 4 part2
## idaho idaho 43 45 1 part1
## hawaii hawaii 28 32 2 part2
## washington washington 64 35 3 part1
## arkansas arkansas 27 64 4 part2
#Remove the var5 column
df <- df[, -1]
df
## var1 var2 var3 var4
## texas 23 43 1 part1
## michigan 54 12 2 part2
## utah 76 56 3 part1
## colorado 34 43 4 part2
## minnesota 56 23 1 part1
## ohio 32 66 2 part2
## new york 45 78 3 part1
## montana 66 43 4 part2
## idaho 43 45 1 part1
## hawaii 28 32 2 part2
## washington 64 35 3 part1
## arkansas 27 64 4 part2
colnames(df) <- c("obs1", "obs2", "fact", "part#")
df
## obs1 obs2 fact part#
## texas 23 43 1 part1
## michigan 54 12 2 part2
## utah 76 56 3 part1
## colorado 34 43 4 part2
## minnesota 56 23 1 part1
## ohio 32 66 2 part2
## new york 45 78 3 part1
## montana 66 43 4 part2
## idaho 43 45 1 part1
## hawaii 28 32 2 part2
## washington 64 35 3 part1
## arkansas 27 64 4 part2
str(df)
## 'data.frame': 12 obs. of 4 variables:
## $ obs1 : num 23 54 76 34 56 32 45 66 43 28 ...
## $ obs2 : num 43 12 56 43 23 66 78 43 45 32 ...
## $ fact : num 1 2 3 4 1 2 3 4 1 2 ...
## $ part#: chr "part1" "part2" "part1" "part2" ...
df$fact <- as.factor(df$fact)
str(df)
## 'data.frame': 12 obs. of 4 variables:
## $ obs1 : num 23 54 76 34 56 32 45 66 43 28 ...
## $ obs2 : num 43 12 56 43 23 66 78 43 45 32 ...
## $ fact : Factor w/ 4 levels "1","2","3","4": 1 2 3 4 1 2 3 4 1 2 ...
## $ part#: chr "part1" "part2" "part1" "part2" ...