# Step 0: Save this file to your workspace. When you do, it should prompt
# you to install any packages that you need. If it doesn't, make a small change
# to the file (e.g., add a comment) and save again. If that still doesn't work,
# please email me right away!
library(psych)
library(DT)
# Step 1: Load the .csv data file(s)
# In this example, the data has already been cleaned and organized. You
# can use R to clean and organize data, but there's a pretty steep learning
# curve, so we'll do all data cleaning in Excel/Google Sheets (if we need
# to do any at all).
# what students will see
name_of_your_new_dataframe <- read.csv(file="gss1.csv", header = T, na.strings = c("99"))
# what they'll need to create
data <- read.csv(file="gss1.csv", header = T, na.strings = c("99"))
# Once you've loaded the data, it should show up to the right in the
# environment window. If you click on it, it will open it in a
# viewer-friendly spreadsheet format. Note that you can't edit the data
# in this window the way you can in a spreadsheet.
# Below are the names and labels of the dataset you just imported. To
# view more about the variable and how it's coded, you'll need to
# copy/paste the provided link.
# CATEGORICAL VARIABLES
# "BALLOT - Ballot used for interview"
# "YEAR - Gss year for this respondent
# "SEX - Respondents sex"
# https://gssdataexplorer.norc.org/projects/58679/variables/81/vshow
# "SEXORNT - Sexual orientation"
# https://gssdataexplorer.norc.org/projects/58679/variables/5081/vshow
# "RACE - Race of respondent"
# https://gssdataexplorer.norc.org/projects/58679/variables/82/vshow
# "RATETONE - R's facial coloring by interviewer"
# https://gssdataexplorer.norc.org/projects/58679/variables/3978/vshow
#
# "WKRACISM - R feels discriminated because of race"
# https://gssdataexplorer.norc.org/projects/58679/variables/2822/vshow
# "WKSEXISM - R feels discriminated because of gender"
# https://gssdataexplorer.norc.org/projects/58679/variables/2823/vshow
#
# "INCOME - Total family income"
# https://gssdataexplorer.norc.org/projects/58679/variables/104/vshow
# "RINCOME - Respondents income"
# https://gssdataexplorer.norc.org/projects/58679/variables/105/vshow
#
# "DEPRESS - Told have depression"
# https://gssdataexplorer.norc.org/projects/58679/variables/2845/vshow
# CONTINUOUS VARIABLES
# "AGE - Age of respondent"
#
# "SATFIN - Satisfaction with financial situation"
# https://gssdataexplorer.norc.org/projects/58679/variables/572/vshow
# "FAIREARN - How fair is what r earn on the job"
# https://gssdataexplorer.norc.org/projects/58679/variables/2816/vshow
#
# "HEALTH - Condition of health"
# https://gssdataexplorer.norc.org/projects/58679/variables/437/vshow
# "HEALTH1 - Rs health in general"
# https://gssdataexplorer.norc.org/projects/58679/variables/2826/vshow
# "MNTLHLTH - Days of poor mental health past 30 days"
# https://gssdataexplorer.norc.org/projects/58679/variables/2828/vshow
#############
# Step 2: Calculate descriptives
# Use the 'describe' function in the psych package to calculate descriptive
# statistics for the dataframe, and then use the kable package to display
# the statistics in a friendly way. You can use the popout button to
# display the table in a new tab/window. Make note of the means
# and standard deviations for all of the continuous variables.
# CONTINUOUS VARIABLES
# "AGE - Age of respondent"
# "SATFIN - Satisfaction with financial situation"
# "FAIREARN - How fair is what r earn on the job"
# "HEALTH - Condition of health"
# "HEALTH1 - Rs health in general"
# "MNTLHLTH - Days of poor mental health past 30 days"
# what students will see
data_descriptives <- describe(name_of_your_new_dataframe)
datatable(data_descriptives) %>%
formatRound(1:13, 2)
# what they'll need to create
data_descriptives <- describe(data)
datatable(data_descriptives) %>%
formatRound(1:13, 2)
# Some of the variables are categorical, in which case the means and standard deviations
# don't mean anything -- they aren't really numbers, they're just coded that way.
# For these variables you'll need to use the code below to create
# a frequency table for these variables. The code to generate a table for
# the 'SEX' variable is available below: modify the code as needed to create
# tables for the other categorical variables, and describe the pattern of the data
# (e.g., 50% of the respondents were women, or over half of the participants made
# more than $20K).
# CATEGORICAL VARIABLES
# "SEX - Respondents sex"
# "SEXORNT - Sexual orientation"
# "RACE - Race of respondent"
# "RATETONE - R's facial coloring by interviewer"
#
# "WKRACISM - R feels discriminated because of race"
# "WKSEXISM - R feels discriminated because of gender"
#
# "INCOME - Total family income"
# "RINCOME - Respondents income"
#
# "DEPRESS - Told have depression"
table(data$SEX) #table() is the command; inside the parentheses, you enter
##
## 1 2
## 1141 1397
# what you want to be displayed. In this case, this is the
# 'SEX' variable from the dataframe 'data', which is
# written as data$SEX
#############
# Step 3: Formulate a research question
# Pick a few variables from the list and get familiar with them -- how it's
# coded, how participants are responding. Drawing from your reading in the
# spotlight and the dataset, come up with a research question. For instance,
# my research question might be: Do reports of mental health from the past
# 30 days differ between White women and Women of Color? The variables I
# would focus on are SEX, RACE, and MNTLHLTH. When posting to the forum,
# I would include my research question, the target variables, and some
# descriptive information for each one.