Data Preparation in R (R Book)

Link: https://bookdown.org/aschmi11/RESMHandbook/data-preparation-and-cleaning-in-r.html#renaming-variables

View data

library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

raw_data <- read.csv("https://github.com/rstudio/learning-r-survey/raw/master/2019/data/2019%20English%20R%20Community%20Survey%20Responses.csv", 
                     fileEncoding = "UTF-8")

dim(raw_data)

## [1] 1838   52

str(raw_data)

## 'data.frame':    1838 obs. of  52 variables:
##  $ Timestamp                                                                                                                                                                                                                    : chr  "12/13/2019 9:50:30" "12/13/2019 9:50:38" "12/13/2019 9:51:19" "12/13/2019 9:53:51" ...
##  $ How.would.you.rate.your.level.of.experience.using.R.                                                                                                                                                                         : chr  "Expert" "Beginner" "Intermediate" "Intermediate" ...
##  $ Compared.with.other.technical.topics.you.ve.learned.in.school.and.on.the.job..on.a.scale.of.1.to.5..how.difficult.do.you.expect.learning.R.to.be.                                                                            : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ From.what.you.know.about.R..how.long.do.you.expect.that.it.will.take.for.you.to.learn.enough.to.use.R.productively.                                                                                                          : chr  "" "" "" "" ...
##  $ How.do.you.think.you.would.go.about.the.process.of.learning.R.                                                                                                                                                               : chr  "" "" "" "" ...
##  $ Which.statement.most.closely.reflects.the.primary.reason.why.you.are.interested.in.learning.R.                                                                                                                               : chr  "" "" "" "" ...
##  $ If.you.were.to.learn.R..what.would.do.you.think.you.would.use.it.for...check.all.that.apply.                                                                                                                                 : chr  "" "" "" "" ...
##  $ Which.analytical.tools.do.you.use.today.for.the.functions.that.you.might.learn.R.for...please.check.all.that.apply.                                                                                                          : chr  "" "" "" "" ...
##  $ What.do.you.think.is.the.biggest.obstacle.you.must.overcome.in.trying.to.learn.R..The.choices.below.are.only.suggestions..if.we.haven.t.listed.your.obstacle..please.choose..Other..and.add.your.obstacle.in.the.text..      : chr  "" "" "" "" ...
##  $ What.year.did.you.first.start.learning.R.                                                                                                                                                                                    : int  2017 2018 2018 2007 2018 2014 2017 2012 2003 2016 ...
##  $ How.did.you.learn.R..If.you.used.multiple.methods..please.select.the.one.you.used.the.most.                                                                                                                                  : chr  "On the job" "By myself using a book or online documentation that was not part of a formal course" "Online course (e.g., Coursera, edX, Datacamp)" "On the job" ...
##  $ Compared.with.other.technical.topics.you.ve.learned.in.school.and.on.the.job..on.a.scale.of.1.to.5..how.difficult.has.it.been.for.you.to.learn.R.                                                                            : int  5 3 3 4 3 3 3 2 2 2 ...
##  $ Roughly.how.long.did.it.take.you.to.achieve.proficiency.in.R.                                                                                                                                                                : chr  "Years" "I don't feel proficient in R yet." "Months" "Years" ...
##  $ Which.statement.most.closely.reflects.the.primary.reason.why.you.learned.R.                                                                                                                                                  : chr  "I was personally interested" "I thought it would open new career opportunities" "One or more members of the R community encouraged me to learn it" "It was required for my work" ...
##  $ What.do.you.think.was.the.biggest.obstacle.you.had.to.overcome.in.learning.R..The.choices.below.are.only.suggestions..if.we.haven.t.listed.your.obstacle..please.choose..Other..and.add.your.obstacle.in.the.text..          : chr  "Language irregularities" "Language irregularities" "Error messages" "Data types" ...
##  $ How.often.do.you.use.R.today..either.for.professional.or.personal.projects.                                                                                                                                                  : chr  "More than once a day" "Less than once a month" "More than once a day" "Between once a day and once a week" ...
##  $ What.applications.do.you.use.R.for.most...check.all.that.apply.                                                                                                                                                              : chr  "Statistical analysis, Data transformation, Modeling, Visualization, Machine learning, Text processing" "Statistical analysis, Data transformation, Visualization" "Statistical analysis, Data transformation, Visualization" "Data transformation" ...
##  $ Please.rate.how.much.you.enjoy.using.R.on.a.scale.of.1.to.5..where.1.is.you.don.t.enjoy.it.at.all..and.5.is.that.you.enjoy.it.a.great.deal.                                                                                  : int  5 5 5 5 5 5 5 5 5 5 ...
##  $ How.likely.are.you.to.recommend.R.to.a.colleague..friend..or.family.member.                                                                                                                                                  : int  10 10 10 9 10 10 9 10 10 10 ...
##  $ Which.tools.do.you.use.with.your.R.applications...please.check.all.that.apply.                                                                                                                                               : chr  "Python, R Command Line (i.e., the native R terminal interface), RStudio Desktop (IDE), RStudio Server, Shinyapp"| __truncated__ "Microsoft Excel" "" "Microsoft Excel, Python, R Command Line (i.e., the native R terminal interface), RStudio.cloud, RStudio Connect"| __truncated__ ...
##  $ Did.you.use.tidyverse.packages.such.as.ggplot2.or.dplyr.to.learn.R.                                                                                                                                                          : chr  "Yes" "Yes" "Yes" "No" ...
##  $ Do.you.use.tidyverse.packages.when.you.use.R.now.                                                                                                                                                                            : chr  "Occasionally" "Usually" "Usually" "Usually" ...
##  $ What.do.you.like.best.about.using.R.                                                                                                                                                                                         : chr  "The speed of experimentation and relative lack of dependency hell. CRAN! Rcpp! The community." "Great community, easy to get to results" "It is readable code that I can find mistakes in easily" "Data munging" ...
##  $ What.do.you.like.least.about.using.R.                                                                                                                                                                                        : chr  "Multiple inconsistent styles across all the packages. Lack of type hinting. The community." "Trying to access data in matrices" "" "Inconsistencies in base (naming, parameter order, etc.)" ...
##  $ When.you.have.problems.in.R..where.do.you.go.for.help.                                                                                                                                                                       : chr  "Question and answer sites such as Stack Overflow and Quora, The repo, if available. " "General search websites such as Google and Yahoo, Social media such as blogs, R-bloggers, Twitter, Slack, or GitHub contacts" "General search websites such as Google and Yahoo, Your personal network, such as colleagues and professors, Que"| __truncated__ "General search websites such as Google and Yahoo, Your personal network, such as colleagues and professors, Que"| __truncated__ ...
##  $ How.do.you.discover.new.packages.or.packages.that.are.unfamiliar.to.you.                                                                                                                                                     : chr  "Email lists such as r-help, r-packages, or r-pkg-devel, Your personal network, such as colleagues and professor"| __truncated__ "General search websites such as Google and Yahoo, Social media such as blogs, R-bloggers, Twitter, Slack, or GitHub contacts" "General search websites such as Google and Yahoo, community.rstudio.com, Your personal network, such as colleag"| __truncated__ "General search websites such as Google and Yahoo, CRAN Task Views, Your personal network, such as colleagues and professors" ...
##  $ How.do.you.share.the.results.that.you.create.in.R..Check.all.that.apply.                                                                                                                                                     : chr  "Other internal web site or portal run by my organization (for example,  Sharepoint), Public web sites such as G"| __truncated__ "Personal blog or web site owned by you" "Email, Other internal web site or portal run by my organization (for example,  Sharepoint), Public web sites su"| __truncated__ "RStudio Connect site run by my organization" ...
##  $ Looking.ahead..how.do.you.expect.your.use.of.R.to.change.in.2020.                                                                                                                                                            : chr  "I expect it to stay the same" "I expect it to increase" "I expect it to increase" "I expect it to stay the same" ...
##  $ To.help.us.ensure.that.you.are.not.a.robot..please.enter.the.number.of.characters.in.the.word..analysis..in.the.text.box.below..Please.type.your.answer.as.a.word..for.example.if.you.want.3.to.be.your.answer..type..three..: chr  "Eight" "eight" "eight" "eight" ...
##  $ Do.you.currently.use.R.Markdown..Choose.the.statement.that.most.closely.matches.your.use.                                                                                                                                    : chr  "Weekly -- Between once a day and once a week" "Occasionally -- less than once a month" "Occasionally -- less than once a month" "Weekly -- Between once a day and once a week" ...
##  $ What.applications.do.you.use.R.Markdown.for..Check.all.that.apply.                                                                                                                                                           : chr  "Creating publication-ready articles for submission, Building web sites via blogdown, Publishing online and offl"| __truncated__ "Publishing online and offline books via bookdown" "Creating R Notebooks" "Creating R Notebooks" ...
##  $ Looking.forward..how.do.you.expect.your.use.of.R.Markdown.to.change.in.2020.                                                                                                                                                 : chr  "I expect it to stay the same" "I expect it to increase" "I expect it to increase" "I expect it to stay the same" ...
##  $ How.often.do.you.currently.use.Shiny..Choose.the.statement.that.most.closely.matches.your.use.                                                                                                                               : chr  "Frequently -- I frequently use or build Shiny applications" "Never -- I don't use Shiny." "Never -- I don't use Shiny." "Sometimes -- I use or build Shiny applications occasionally" ...
##  $ Looking.forward..how.do.you.expect.your.use.of.Shiny.to.change.in.2020.                                                                                                                                                      : chr  "I expect it to stay the same" "I expect it to stay the same" "I expect it to increase" "I expect it to stay the same" ...
##  $ Do.you.currently.use.Python..Choose.the.statement.that.most.closely.matches.your.use.                                                                                                                                        : chr  "Weekly -- Between once a day and once a week" "I don't use Python" "I don't use Python" "Monthly -- between once a week and once a month" ...
##  $ What.applications.do.you.use.Python.for.most...check.all.that.apply.                                                                                                                                                         : chr  "Machine learning, Text processing, General programming " "" "" "" ...
##  $ Please.rate.how.much.you.enjoy.using.Python.on.a.scale.of.1.to.5..where.1.is.you.don.t.enjoy.it.at.all..and.5.is.that.you.enjoy.it.a.great.deal.                                                                             : int  5 NA NA 3 NA 2 NA 2 5 3 ...
##  $ How.likely.are.you.to.recommend.Python.to.a.colleague..friend..or.family.member.                                                                                                                                             : int  10 NA NA 8 NA 8 NA 8 10 7 ...
##  $ Looking.forward..how.do.you.expect.your.use.of.Python.to.change.in.2020.                                                                                                                                                     : chr  "I expect it to increase" "I expect it to stay the same" "I expect it to stay the same" "I expect it to increase" ...
##  $ What.computer.tools.and.or.languages.have.you.used.besides.R.                                                                                                                                                                : chr  "C/C++, Python" "Excel, Tableau" "Excel, Tableau" "C/C++, Excel, Go, Java, Javascript, Matlab, PHP, Python, Ruby, Visual Basic" ...
##  $ What.was.the.FIRST.computer.language.or.tool.that.you.learned.                                                                                                                                                               : chr  "R" "TurboPascal" "HTML" "Java" ...
##  $ What.year.were.you.born.                                                                                                                                                                                                     : int  1987 1983 1992 NA 1990 1991 1994 1986 1964 1987 ...
##  $ What.gender.do.you.identify.with.                                                                                                                                                                                            : chr  "male" "Male" "female" "" ...
##  $ I.identify.my.ethnicity.as..select.all.that.apply..                                                                                                                                                                          : chr  "White" "White" "White" "" ...
##  $ What.is.the.highest.degree.or.level.of.school.you.have.completed..If.currently.enrolled..please.use.the.highest.degree.received.                                                                                             : chr  "Master’s degree (e.g. MA, MS, MEd)" "Master’s degree (e.g. MA, MS, MEd)" "Master’s degree (e.g. MA, MS, MEd)" "Master’s degree (e.g. MA, MS, MEd)" ...
##  $ In.what.country.do.you.currently.reside.                                                                                                                                                                                     : chr  "United States of America" "Netherlands" "United States of America" "United States of America" ...
##  $ What.industry.do.you.work.or.participate.in.                                                                                                                                                                                 : chr  "Education" "Sustainability" "Transportation" "Information Technologies" ...
##  $ What.is.your.job.title..if.any.                                                                                                                                                                                              : chr  "" "Data Specialist" "Operations Research Analyst" "Software Engineer" ...
##  $ Which.category.best.describes.the.work.you.do.                                                                                                                                                                               : chr  "Data scientist or analyst" "Data scientist or analyst" "Data scientist or analyst" "Software developer" ...
##  $ How.many.people.in.your.organization.or.work.group.do.you.feel.that.you.can.ask.for.help.or.support.when.working.with.R.                                                                                                     : num  1 0 0 20 5 5 2 0 0 2 ...
##  $ Which.of.the.following.events.have.you.attended..if.any..Check.all.that.apply.                                                                                                                                               : chr  "" "Local or regional R meetup, R Ladies meetups (local, regional, national, or international)" "Local or regional R meetup" "RStudio::conf, UseR conference, Local or regional R meetup, R Ladies meetups (local, regional, national, or int"| __truncated__ ...
##  $ How.did.you.hear.about.this.survey.                                                                                                                                                                                          : chr  "Twitter" "Twitter" "Twitter" "Email" ...

str(raw_data[1:5])

## 'data.frame':    1838 obs. of  5 variables:
##  $ Timestamp                                                                                                                                        : chr  "12/13/2019 9:50:30" "12/13/2019 9:50:38" "12/13/2019 9:51:19" "12/13/2019 9:53:51" ...
##  $ How.would.you.rate.your.level.of.experience.using.R.                                                                                             : chr  "Expert" "Beginner" "Intermediate" "Intermediate" ...
##  $ Compared.with.other.technical.topics.you.ve.learned.in.school.and.on.the.job..on.a.scale.of.1.to.5..how.difficult.do.you.expect.learning.R.to.be.: int  NA NA NA NA NA NA NA NA NA NA ...
##  $ From.what.you.know.about.R..how.long.do.you.expect.that.it.will.take.for.you.to.learn.enough.to.use.R.productively.                              : chr  "" "" "" "" ...
##  $ How.do.you.think.you.would.go.about.the.process.of.learning.R.                                                                                   : chr  "" "" "" "" ...

names(raw_data[1:5])

## [1] "Timestamp"                                                                                                                                        
## [2] "How.would.you.rate.your.level.of.experience.using.R."                                                                                             
## [3] "Compared.with.other.technical.topics.you.ve.learned.in.school.and.on.the.job..on.a.scale.of.1.to.5..how.difficult.do.you.expect.learning.R.to.be."
## [4] "From.what.you.know.about.R..how.long.do.you.expect.that.it.will.take.for.you.to.learn.enough.to.use.R.productively."                              
## [5] "How.do.you.think.you.would.go.about.the.process.of.learning.R."

str(raw_data$In.what.country.do.you.currently.reside.)

##  chr [1:1838] "United States of America" "Netherlands" ...

class(raw_data$What.year.did.you.first.start.learning.R.)

## [1] "integer"

Variable/Column Names - names()

names(raw_data[1:5])

## [1] "Timestamp"                                                                                                                                        
## [2] "How.would.you.rate.your.level.of.experience.using.R."                                                                                             
## [3] "Compared.with.other.technical.topics.you.ve.learned.in.school.and.on.the.job..on.a.scale.of.1.to.5..how.difficult.do.you.expect.learning.R.to.be."
## [4] "From.what.you.know.about.R..how.long.do.you.expect.that.it.will.take.for.you.to.learn.enough.to.use.R.productively."                              
## [5] "How.do.you.think.you.would.go.about.the.process.of.learning.R."

head(raw_data[1:2])

ABCDEFGHIJ0123456789

	Timestamp <chr>	How.would.you.rate.your.level.of.experience.using.R. <chr>
1	12/13/2019 9:50:30	Expert
2	12/13/2019 9:50:38	Beginner
3	12/13/2019 9:51:19	Intermediate
4	12/13/2019 9:53:51	Intermediate
5	12/13/2019 10:01:03	Intermediate
6	12/13/2019 10:04:42	Expert

head(raw_data[1:2], n = 3)

ABCDEFGHIJ0123456789

	Timestamp <chr>	How.would.you.rate.your.level.of.experience.using.R. <chr>
1	12/13/2019 9:50:30	Expert
2	12/13/2019 9:50:38	Beginner
3	12/13/2019 9:51:19	Intermediate

head(raw_data$How.likely.are.you.to.recommend.R.to.a.colleague..friend..or.family.member., n = 20)

##  [1] 10 10 10  9 10 10  9 10 10 10 10 10 10 10 10 10  9  9  9  9

tail(raw_data$How.likely.are.you.to.recommend.R.to.a.colleague..friend..or.family.member., n = 20)

##  [1]  9 10 10  6 10  6  9 10 10 10  8 10 10 10 10 10  8 NA 10  9

Renaming Variables

names(raw_data[2])

## [1] "How.would.you.rate.your.level.of.experience.using.R."

renamed <- raw_data %>%
  rename("Qr_experience" = "How.would.you.rate.your.level.of.experience.using.R.")
# new name
names(renamed[1:2])

## [1] "Timestamp"     "Qr_experience"

Renaming Multiple Variables

renamed <- raw_data %>%
  rename("Qr_experience" = "How.would.you.rate.your.level.of.experience.using.R.",
         "Qr_difficulty_experienced" = "Compared.with.other.technical.topics.you.ve.learned.in.school.and.on.the.job..on.a.scale.of.1.to.5..how.difficult.do.you.expect.learning.R.to.be.")

names(renamed[2:3])

## [1] "Qr_experience"             "Qr_difficulty_experienced"

names(renamed[1:3])

## [1] "Timestamp"                 "Qr_experience"            
## [3] "Qr_difficulty_experienced"

However, raw_data has 52 variables! This would take a long time to do. Instead of writing 52+ lines of code, we can use the names() function to set the names of our data frame (raw_data) equal to the names of another data frame.

The R Studio survey provides us with a .tsv file of proper variable names for the columns. Let’s load that first:

qnames <- read_tsv("https://raw.githubusercontent.com/rstudio/learning-r-survey/master/2019/data/2019-english-question-names-only.tsv")

## Rows: 0 Columns: 52
## -- Column specification --------------------------------------------------------
## Delimiter: "\t"
## chr (52): Qtime, Qr_experience, Qr_difficulty, Qr_length_to_success, Qhow_to...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

# make a copy of the data frame 
rsurvey <- raw_data

# rename the columns based on qnames
names(rsurvey) <- names(qnames)

names(rsurvey[1:4])

## [1] "Qtime"                "Qr_experience"        "Qr_difficulty"       
## [4] "Qr_length_to_success"

Cleaning Names with janitor

Let’s make sure all the names are lowercase. This will make typing them in later analyses easier, as you don’t ever need to remember what is capital and what is not

library("janitor")

## 
## Attaching package: 'janitor'

## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

rsurvey <- rsurvey %>% 
  janitor::clean_names()
names(rsurvey[1:10])

##  [1] "qtime"                  "qr_experience"          "qr_difficulty"         
##  [4] "qr_length_to_success"   "qhow_to_learn_r"        "qreason_to_learn"      
##  [7] "qr_use"                 "qtools"                 "qobstacles_to_starting"
## [10] "qr_year"

Summary Stats - describe()

library(psych)

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

describe(rsurvey[1:5])

ABCDEFGHIJ0123456789

	vars <int>	n <dbl>	mean <dbl>	sd <dbl>	median <dbl>	trimmed <dbl>	mad <dbl>	min <dbl>
qtime*	1	1838	913.236126	526.8858952	912.5	913.046196	676.0656	1
qr_experience*	2	1838	3.412405	0.7785213	4.0	3.530571	0.0000	1
qr_difficulty	3	8	3.500000	0.5345225	3.5	3.500000	0.7413	3
qr_length_to_success*	4	1838	1.007073	0.1210282	1.0	1.000000	0.0000	1
qhow_to_learn_r*	5	1838	1.007617	0.1275654	1.0	1.000000	0.0000	1

Summary Stats - describeBy()

#create an age variable
rsurvey$age <- 2020-rsurvey$qyear_born

#Describe experience by age
describeBy(rsurvey$age, group=rsurvey$qr_experience, mat=TRUE)

## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning Inf

## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning
## -Inf

ABCDEFGHIJ0123456789

	item <chr>	group1 <chr>	vars <dbl>	n <dbl>	mean <dbl>	sd <dbl>	median <dbl>	trimmed <dbl>	mad <dbl>
X11	1		1	0	NaN	NA	NA	NaN	NA
X12	2	Beginner	1	224	36.26786	12.968732	34	34.76111	11.8608
X13	3	Expert	1	506	36.62648	9.625914	35	35.35468	7.4130
X14	4	Intermediate	1	993	36.62034	10.948554	35	35.29560	8.8956
X15	5	None	1	8	44.12500	13.798939	42	44.12500	12.6021

Summary Stats - summary()

summary(rsurvey[2:3])

##  qr_experience      qr_difficulty 
##  Length:1838        Min.   :3.0   
##  Class :character   1st Qu.:3.0   
##  Mode  :character   Median :3.5   
##                     Mean   :3.5   
##                     3rd Qu.:4.0   
##                     Max.   :4.0   
##                     NA's   :1830

summary(as.factor(rsurvey$qr_experience))

##                  Beginner       Expert Intermediate         None 
##           31          233          529         1037            8

summary(as.factor(rsurvey$qr_difficulty))

##    3    4 NA's 
##    4    4 1830

Summary Stats - skim()

library(skimr)

skim(rsurvey[2:3])

Data summary
Name	rsurvey[2:3]
Number of rows	1838
Number of columns	2
_______________________
Column type frequency:
character	1
numeric	1
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
qr_experience	0	1	0	12	31	5	0

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
qr_difficulty	1830	0	3.5	0.53	3	3	3.5	4	4	▇▁▁▁▇

Crosstabs - table()

table(rsurvey$qcountry, rsurvey$qr_experience)[1:5,]

##              
##                  Beginner Expert Intermediate None
##               31       18     33           66    0
##   Afghanistan  0        0      0            1    0
##   Albania      0        0      0            1    0
##   Algeria      0        7      5           13    1
##   Andorra      0        0      1            1    0

More Frequencies and Descriptives

library("jmv")

## 
## Attaching package: 'jmv'

## The following objects are masked from 'package:psych':
## 
##     pca, reliability

rsurvey %>%
  select(qr_experience, qr_year) %>%
  descriptives(freq = TRUE)

## 
##  DESCRIPTIVES
## 
##  Descriptives                                        
##  --------------------------------------------------- 
##                          qr_experience    qr_year    
##  --------------------------------------------------- 
##    N                              1838        1676   
##    Missing                           0         162   
##    Mean                                   2007.749   
##    Median                                 2015.000   
##    Standard deviation                     107.3527   
##    Minimum                                       2   
##    Maximum                                    2019   
##  --------------------------------------------------- 
## 
## 
##  FREQUENCIES
## 
##  Frequencies of qr_experience                             
##  -------------------------------------------------------- 
##    Levels          Counts    % of Total    Cumulative %   
##  -------------------------------------------------------- 
##                                 0.00000         0.00000   
##    Beginner           233      12.67682         0.00000   
##    Expert             529      28.78128         0.00000   
##    Intermediate      1037      56.42002         0.00000   
##    None                 8       0.43526         0.00000   
##  --------------------------------------------------------

Spotting Coding Mistakes

rsurvey %>%
  select(qr_year) %>%
  arrange(qr_year) %>%
  head(n = 10)

ABCDEFGHIJ0123456789

	qr_year <int>
1	2
2	6
3	13
4	18
5	207
6	1977
7	1985
8	1989
9	1989
10	1990

Modifying Data - mutate()

#run this to change the variable
rsurvey <- rsurvey %>%
  mutate(qr_year2 = ifelse(qr_year < 1977, NA, qr_year))

#run this to check
rsurvey %>%
  select(qr_year, qr_year2) %>%
  arrange(qr_year) %>%
  head(n=10)

ABCDEFGHIJ0123456789

	qr_year <int>	qr_year2 <int>
1	2	NA
2	6	NA
3	13	NA
4	18	NA
5	207	NA
6	1977	1977
7	1985	1985
8	1989	1989
9	1989	1989
10	1990	1990

Reordering Categories - factor()

rsurvey %>% count(qr_experience)

ABCDEFGHIJ0123456789

qr_experience <chr>	n <int>
	31
Beginner	233
Expert	529
Intermediate	1037
None	8

recoded <- rsurvey %>% 
  select(qr_experience) %>%
  mutate(qr_experience2 = factor(qr_experience,
        levels=c("None","Beginner", "Intermediate", "Expert", NA ))
  )
recoded %>% count(qr_experience2)

ABCDEFGHIJ0123456789

qr_experience2 <fct>	n <int>
None	8
Beginner	233
Intermediate	1037
Expert	529
NA	31

This works, so lets apply it to the real data set:

rsurvey <- rsurvey %>% 
  mutate(qr_experience = factor(qr_experience,
        levels=c("None","Beginner", "Intermediate", "Expert", NA ))
  )
recoded %>% count(qr_experience)

ABCDEFGHIJ0123456789

qr_experience <chr>	n <int>
	31
Beginner	233
Expert	529
Intermediate	1037
None	8