all created variables will be labeled here as “object” but can also just type in datasetName$variableName Thank you to Coursera's Data Analysis class for this info
dim(object) #shows how many rows, columns, and perhaps series?
## Error: object 'object' not found
# also helps to show if the full file loaded in correctly
names(object)
## Error: object 'object' not found
head(object) #I'm not sure how these two are different
## Error: object 'object' not found
# head gives a few rows of data in addition to column names
nrow(object)
## Error: object 'object' not found
quantile(object)
## Error: object 'object' not found
# basically shows the range of the data but tells 25%, 50%, and 75% also
summary(object)
## Error: object 'object' not found
# shows qualitative and quant
class(object) #tells data.frame, but each column can be detected from first row
## Error: object 'object' not found
sapply(object[1, ], class) #this tells numeric, string, float class etc.
## Error: object 'object' not found
# are variables loaded the way I expected?
unique(dataset$variable)
## Error: object 'dataset' not found
length(unique(dataset$variable)) #tells how many unique items are in that column
## Error: object 'dataset' not found
table(unique(dataset$variable)) #does a count of qual instances
## Error: object 'dataset' not found
table(unique(dataset$variable), dataset$var2) #does a count of each cell
## Error: object 'dataset' not found
dataset$variable[1:10] #shows me the first 10 values in that column
## Error: object 'dataset' not found
# great for missing data or searching for charactaristics
any(dataset$variable[1:10] > 40) #get true or false for each value
## Error: object 'dataset' not found
all(dataset$variable[1:10] < 40) #get true or false for whole set
## Error: object 'dataset' not found
# & is 'and' | is 'or'
is.na(dataset$variable)
## Error: object 'dataset' not found
sum(is.na(dataset$variable)) #counts the number of times TRUE that na is there shows up.
## Error: object 'dataset' not found
table(is.na(dataset$variable)) #shows how many values are missing and not missing
## Error: object 'dataset' not found
colSums(object) #if any NA values, the output will be NA
## Error: object 'object' not found
rowSums(object)
## Error: object 'object' not found
rowMeans(variable)
## Error: object 'variable' not found
colMeans(variable, na.rm = TRUE) #takes out NA values and finishes computing
## Error: object 'variable' not found
# DATA MUNGING label munging in its own R-script is a good idea
# fixing vector characters
tolower(names(datasetName)) #changes the column name of the dataset to all lower case letters
## Error: object 'datasetName' not found
strsplit(names(dataset), "\\.") #example case. \\escapes the special character . so that
## Error: object 'dataset' not found
toupper(variable)
## Error: object 'variable' not found
sub("_", "", names(dataset), ) #this example replaces the underscore character in the header names with a nothing character basically merging the two sides. BUT only replaces the first instance of the underscore!
## Error: object 'dataset' not found
gsub
## function (pattern, replacement, x, ignore.case = FALSE, perl = FALSE,
## fixed = FALSE, useBytes = FALSE)
## {
## if (!is.character(x))
## x <- as.character(x)
## .Internal(gsub(as.character(pattern), as.character(replacement),
## x, ignore.case, perl, fixed, useBytes))
## }
## <bytecode: 0x1009c2878>
## <environment: namespace:base>
You can also embed plots, for example:
plot(object)
## Error: object 'object' not found