nums1 <- c(1,4,2,8,11,100,8)
nums2 <- c(3.3,8.1,2.5,9.8,21.2,13.8,0.9)
# Get last element:
nelements <- length(nums1)
nums1[nelements]
# Select the first 3:
nums1[1:3]
# Select a few elements of a vector:
selectthese <- c(1,5,2)
nums1[selectthese]
# Select every other element:
everyother <- seq(1,7,by=2)
nums1[everyother]
# Select five random elements:
ranels <- sample(1:length(nums2), 5)
nums2[ranels]
# Remove the first element:
nums1[-1]
# Remove the first and last element:
nums1[-c(1, length(nums1))]
# Subset of nums2, where value is at least 10 :
nums2[nums2 > 10]
# Subset of nums2, where value is between 5 and 10:
nums2[nums2 > 5 & nums2 < 10]
# Subset of nums2, where value is smaller than 1, or larger than 20:
nums2[nums2 < 1 | nums2 > 20]
# Subset of nums1, where value is exactly 8:
nums1[nums1 == 8]
# Subset nums1 where number is NOT equal to 100
nums1[nums1 != 100]
# Subset of nums1, where value is one of 1,4 or 11:
nums1[nums1 %in% c(1,4,11)]
# Subset of nums1, where value is NOT 1,4 or 11:
nums1[!(nums1 %in% c(1,4,11))]
# Where nums1 was 100, make it -100
nums1[nums1 == 100] <- -100
# Where nums2 was less than 5, make it zero
nums2[nums2 < 5] <- 0
mydataframe[row,column]
# Read data
dataset <- read.csv("att.csv")
# Recall the names of the variables, the number of columns, and number of rows:
names(dataset)
nrow(dataset)
ncol(dataset)
# Extract tree diameters: take the 4th observation of the 2nd variable:
dataset[4,2]
# We can also index the dataframe by its variable name:
dataset[4,"moves"]
# Extract the first 3 rows of 'pick':
dataset[1:3, "pick"]
# Extract the first 5 rows, of ALL variables
# Note the use of the comma followed by nothing
# This means 'every column' and is very useful!
dataset[1:5,]
# Extract the fourth column
# Here we use nothing, followed by a comma,
# to indicate 'every row'
dataset[,4]
# Select only 'pick' and 'diameter', store in new dataframe:
subdataset <- dataset[,c("pick", "reachout","card")]
# Extract pick == "OCC"
dataset$pick[dataset$pick == "OCC"]
# Extract all rows of allom where diameter is larger than 60.
# Make sure you understand the difference with the above example!
dataset[dataset$pick == "OCC",]
# We can use one vector to index another. For example, find the observations
# that has the max moves, we can do:
dataset[which.max(dataset$moves),]
# Recalling the previous section, this is identical to:
dataset[which.max(dataset$moves), "pick"]
# Get 10 random observations of 'age'. Here, we make a new vector
# on the fly with sample(), which we use to index the dataframe.
dataset[sample(1:nrow(dataset),10),"age"]
# As we did with vectors, we can also use %in% to select a subset.
# This example selects only two levels in the dataframe.
dataset[dataset$education %in% c("HS","BA"),]
# Extract education for the BA level, as long as employment == "F"
dataset$pick[dataset$education == "BA" & dataset$employment == "F"]
dataset[dataset$education == "BA" & dataset$employment == "F",]
# Take subset of dataset, pick == ATT and usage > 10, show variables: pick and usage.
subset(dataset, pick == "ATT" & usage > 10, select=c(pick,usage))
# Deleting columns from a dataframe
# A simple example dataframe
dfr <- data.frame(a=-5:0, b=10:15)
# Delete the second column (make a new dataframe 'dfr2' that does not include that column)
dfr2 <- dfr[,-2]
# Use subset to remove a column
# Note: this does not work using square-bracket notation!
dfr2 <- subset(dfr, select = -b)
# Finally, this strange command deletes a column as well.
# In this case, we really delete the column from the existing dataframe,
# whereas the two examples above create a new subset *without* that column.
dfr$b <- NULL
# Write to disk (row names are generally not wanted in the CSV file).
write.csv(dfr,"somedata.csv", row.names=FALSE)