Commands to Start with

all created variables will be labeled here as “object” but can also just type in datasetName$variableName Thank you to Coursera's Data Analysis class for this info

dim(object)  #shows how many rows, columns, and perhaps series?
## Error: object 'object' not found
# also helps to show if the full file loaded in correctly

names(object)
## Error: object 'object' not found
head(object)  #I'm not sure how these two are different
## Error: object 'object' not found
# head gives a few rows of data in addition to column names

nrow(object)
## Error: object 'object' not found

quantile(object)
## Error: object 'object' not found
# basically shows the range of the data but tells 25%, 50%, and 75% also

summary(object)
## Error: object 'object' not found
# shows qualitative and quant

class(object)  #tells data.frame, but each column can be detected from first row
## Error: object 'object' not found

sapply(object[1, ], class)  #this tells numeric, string, float class etc.
## Error: object 'object' not found
# are variables loaded the way I expected?

unique(dataset$variable)
## Error: object 'dataset' not found
length(unique(dataset$variable))  #tells how many unique items are in that column
## Error: object 'dataset' not found
table(unique(dataset$variable))  #does a count of qual instances
## Error: object 'dataset' not found

table(unique(dataset$variable), dataset$var2)  #does a count of each cell
## Error: object 'dataset' not found

dataset$variable[1:10]  #shows me the first 10 values in that column
## Error: object 'dataset' not found

# great for missing data or searching for charactaristics
any(dataset$variable[1:10] > 40)  #get true or false for each value
## Error: object 'dataset' not found
all(dataset$variable[1:10] < 40)  #get true or false for whole set
## Error: object 'dataset' not found

# & is 'and' | is 'or'

is.na(dataset$variable)
## Error: object 'dataset' not found
sum(is.na(dataset$variable))  #counts the number of times TRUE that na is there shows up.
## Error: object 'dataset' not found
table(is.na(dataset$variable))  #shows how many values are missing and not missing
## Error: object 'dataset' not found


colSums(object)  #if any NA values, the output will be NA
## Error: object 'object' not found
rowSums(object)
## Error: object 'object' not found
rowMeans(variable)
## Error: object 'variable' not found
colMeans(variable, na.rm = TRUE)  #takes out NA values and finishes computing
## Error: object 'variable' not found

# DATA MUNGING label munging in its own R-script is a good idea

# fixing vector characters
tolower(names(datasetName))  #changes the column name of the dataset to all lower case letters
## Error: object 'datasetName' not found
strsplit(names(dataset), "\\.")  #example case. \\escapes the special character . so that
## Error: object 'dataset' not found
toupper(variable)
## Error: object 'variable' not found

sub("_", "", names(dataset), )  #this example replaces the underscore character in the header names with a nothing character basically merging the two sides. BUT only replaces the first instance of the underscore!
## Error: object 'dataset' not found
gsub
## function (pattern, replacement, x, ignore.case = FALSE, perl = FALSE, 
##     fixed = FALSE, useBytes = FALSE) 
## {
##     if (!is.character(x)) 
##         x <- as.character(x)
##     .Internal(gsub(as.character(pattern), as.character(replacement), 
##         x, ignore.case, perl, fixed, useBytes))
## }
## <bytecode: 0x1009c2878>
## <environment: namespace:base>

You can also embed plots, for example:

plot(object)
## Error: object 'object' not found