SEC R Workshop Basics

Here is the code we will be working with. You’ll also need the TWEETS.csv data file.
########## AEJMC 2023 Southeast Colloquium ##################
########## Middle Tennessee State University ################
######### College of Media and Entertainment ################
######## School of Journalism & Strategic Media #############
########### Dive headfirst into R ###########################
######## Prepared by Ken Blake and Jun Zhang ################

#use "#" to enter a comment

########### Simple Math ########## 
3+3  
50*125
1:10 #colon ":" returns every integer between two integers

##### Objects & Vectors ########## 
a<-20 #the environment pane shows all the created variables
A<-20 #R is case sensitive
a <- 6/(4-1) #replace the previous value

a #check the value of the variable 
a+5

b <- "R workshop"
b

c<-1:6
d <- c(7, 1.5, 1.5, 6, 0.5, 3)
?c() #use "?" to get help of a particular function

e <- c("Tennessee", "Alabama", "Kentucky", "Georgia", "Tennessee","North Carolina")

d+1
d*c
d %o% c #traditional matrix multiplication

########### Comparison ########## 
##return true/false
a>10 #greater than
d == 7 #equal to
c != 10 #NOT equal to
c<=d #less than or equal to

a==3|d==7 #logical operator OR
a==2&e=="Tennessee" #logical operation AND

######### Conditionals ########## 
if(a > 5){
  print("a is greater than 5")
} else {
  print("a is less than 5")
}

########### Packages ############
install.packages("tidyverse") #install packages
install.packages("lubridate")
library(tidyverse) # load the package
library(datasets)

help(package = datasets)
?datasets

########### Functions ###########
head(trees) # Show the first six lines of the dataest
summary(trees) 
trees # view what the data frame contains

class(trees) # check the data type
class(trees$Girth)
sort(trees$Girth) # sort from smallest to largest
unique(trees$Girth) # see unique values
ls(trees) #get a get a list of variables in the dataset 
ls()

######## Basic Statistics ########
#Numeric data
mean(trees$Girth) 
sd(trees$Girth)
sum(trees$Girt)
min(trees$Girth)
max(trees$Girth)
length(trees$Girth) #check the length of a vector

#Categorical data
summary(e)
length(e)
unique(e)

#Clear the workspace 
rm(A)
rm(list = ls())

########### Exercise ##############
#Create an R object called ID and assign it values 1 to 7
#Create an R object called location and assign it values Tennessee, Alabama, Kentucky, Georgia, Tennessee, North Carolina, Tennessee
#Create an R object called followers and assign it values 2525, 908, 1560, 2687, 525, 1237, 667 

ID <- 1:7
location <- c("Tennessee", "Alabama", "Kentucky", "Georgia", "Tennessee", "North Carolina", "Tennessee")
followers <- c(2525, 908, 1560, 2687, 525, 1237, 667)

########### Data frame ############
account<-data.frame(id=ID, location=location, followers=followers) 
#"id", "location", "followers" are headers of the data frame

account
summary(account) 

#Specify Elements Within a Data Object
account[1,] # Display the 1st row of the data frame. 
account[,2] #Display the 2nd column of the data frame

# Take a subset of data
account_TN <-account[account$location == "Tennessee",] #create a data frame that contains only accounts locate at TN

# Aggregate data
aggregate(account$followers, list(account$location), FUN = mean)

#Replace values
account$location<-sub("Tennessee", "TN", account$location)

##### Load & Export Data #######
#work directory
getwd()
setwd()

#Load data
TWEETS<-read.csv("TWEETS.csv", header=TRUE, sep=",") 
#If your file is not in the working directory, you need to specify the file path. 

TWEETS_sample <-TWEETS[sample(1:nrow(TWEETS), 1000, replace=FALSE),] 
#Take a random sample of 1,000 tweets from the original dataset
#"replace=FALSE": each item could only be selected once 

#Export data
write.csv(TWEETS_sample, "TWEETS_sample.csv")

#Inspecting data & Basic statistics
names(TWEETS_sample) #Return the header names 
head(TWEETS_sample) #Return the first part of an object. You can specify the number of rows, e.g., head(trump_sample, 10). Otherwise, it will return the first six rows of data. 
tail(TWEETS_sample) #Return the last part of an object
summary(TWEETS_sample)
nrow(TWEETS_sample) #Return the total number of rows
ncol(TWEETS_sample) #the total number of columns
unique(TWEETS_sample$Author) #Display unique values of a column
length(unique(TWEETS_sample$Author)) #Count the number of unique values
SEC R Workshop Basics

Dr. Jun Zhang

2023-02-28