Vector and Dataframe

VECTOR

nums1 <- c(1,4,2,8,11,100,8) 
nums2 <- c(3.3,8.1,2.5,9.8,21.2,13.8,0.9) 

# Get last element: 
nelements <- length(nums1) 
nums1[nelements]

# Select the first 3: 
nums1[1:3]

# Select a few elements of a vector: 
selectthese <- c(1,5,2) 
nums1[selectthese]

# Select every other element: 
everyother <- seq(1,7,by=2) 
nums1[everyother]

# Select five random elements: 
ranels <- sample(1:length(nums2), 5) 
nums2[ranels]

# Remove the first element: 
nums1[-1]

# Remove the first and last element: 
nums1[-c(1, length(nums1))]

# Subset of nums2, where value is at least 10 : 
nums2[nums2 > 10]

# Subset of nums2, where value is between 5 and 10: 
nums2[nums2 > 5 & nums2 < 10]

# Subset of nums2, where value is smaller than 1, or larger than 20: 
nums2[nums2 < 1 | nums2 > 20]

# Subset of nums1, where value is exactly 8: 
nums1[nums1 == 8]

# Subset nums1 where number is NOT equal to 100 
nums1[nums1 != 100]

# Subset of nums1, where value is one of 1,4 or 11: 
nums1[nums1 %in% c(1,4,11)]

# Subset of nums1, where value is NOT 1,4 or 11: 
nums1[!(nums1 %in% c(1,4,11))]

# Where nums1 was 100, make it -100 
nums1[nums1 == 100] <- -100

# Where nums2 was less than 5, make it zero 
nums2[nums2 < 5] <- 0

SUBSETTING DATAFRAME

mydataframe[row,column]

# Read data 
dataset <- read.csv("att.csv")
# Recall the names of the variables, the number of columns, and number of rows: 
names(dataset)
nrow(dataset)
ncol(dataset)

# Extract tree diameters: take the 4th observation of the 2nd variable: 
dataset[4,2]

# We can also index the dataframe by its variable name: 
dataset[4,"moves"]

# Extract the first 3 rows of 'pick': 
dataset[1:3, "pick"]

# Extract the first 5 rows, of ALL variables 
# Note the use of the comma followed by nothing 
# This means 'every column' and is very useful! 
dataset[1:5,]

# Extract the fourth column 
# Here we use nothing, followed by a comma, 
# to indicate 'every row'
dataset[,4]

# Select only 'pick' and 'diameter', store in new dataframe: 
subdataset <- dataset[,c("pick", "reachout","card")] 

# Extract pick == "OCC" 
dataset$pick[dataset$pick == "OCC"]

# Extract all rows of allom where diameter is larger than 60. 
# Make sure you understand the difference with the above example! 
dataset[dataset$pick == "OCC",]

# We can use one vector to index another. For example, find the observations
# that has the max moves, we can do: 
dataset[which.max(dataset$moves),]

# Recalling the previous section, this is identical to: 
dataset[which.max(dataset$moves), "pick"]

# Get 10 random observations of 'age'. Here, we make a new vector 
# on the fly with sample(), which we use to index the dataframe. 
dataset[sample(1:nrow(dataset),10),"age"]

# As we did with vectors, we can also use %in% to select a subset. 
# This example selects only two levels in the dataframe. 
dataset[dataset$education %in% c("HS","BA"),]

# Extract education for the BA level, as long as employment == "F" 
dataset$pick[dataset$education == "BA" & dataset$employment == "F"]
dataset[dataset$education == "BA" & dataset$employment == "F",]

Using subset()

# Take subset of dataset, pick == ATT and usage > 10, show variables: pick and usage. 
subset(dataset, pick == "ATT" & usage > 10, select=c(pick,usage))

# Deleting columns from a dataframe 
# A simple example dataframe 
dfr <- data.frame(a=-5:0, b=10:15)
# Delete the second column (make a new dataframe 'dfr2' that does not include that column) 
dfr2 <- dfr[,-2]
# Use subset to remove a column 
# Note: this does not work using square-bracket notation! 
dfr2 <- subset(dfr, select = -b)
# Finally, this strange command deletes a column as well. 
# In this case, we really delete the column from the existing dataframe, 
# whereas the two examples above create a new subset *without* that column. 
dfr$b <- NULL

# Write to disk (row names are generally not wanted in the CSV file). 
write.csv(dfr,"somedata.csv", row.names=FALSE)

Vector and Dataframe

Thuy Tran

8 Januar 2018

VECTOR

SUBSETTING DATAFRAME

Using subset()