data()

data(package = .packages(all.available = TRUE))
library(psych)

mydata <- force(sat.act)
head(mydata)
##       gender education age ACT SATV SATQ
## 29442      2         3  19  24  500  500
## 29457      2         3  23  35  600  500
## 29498      2         3  20  21  480  470
## 29503      1         4  27  26  550  520
## 29504      1         2  33  31  600  550
## 29518      1         5  26  28  640  640

Explanation:

Create the factor for Gender

mydata$GenderFactor <- factor(mydata$gender,
                              levels = c(1,2),
                              labels = c("Male", "Female"))

head(mydata)
##       gender education age ACT SATV SATQ GenderFactor
## 29442      2         3  19  24  500  500       Female
## 29457      2         3  23  35  600  500       Female
## 29498      2         3  20  21  480  470       Female
## 29503      1         4  27  26  550  520         Male
## 29504      1         2  33  31  600  550         Male
## 29518      1         5  26  28  640  640         Male

Create new object, called mydataF, which includes only female.

mydataF <- mydata[mydata$GenderFactor == "Female",    ]

Writing with pipes “then”

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
mydataF_new <- mydata %>%
  filter(GenderFactor == "Female")

Fiter Female and take only older from 25

library(dplyr)

mydataF_new <- mydata %>%
  filter(GenderFactor == "Female" & age >= 25)

Rename variable age into Age

names(mydata)[names(mydata) == "age"] <- "Age"


colnames(mydata)[3] <- "AGE"


library(dplyr)

mydataF_new <- mydataF_new %>%
  rename(Age = age)

Make a descriptive statistics for variables, measuring abilities

summary(mydata[ c(4, 5, 6)])
##       ACT             SATV            SATQ      
##  Min.   : 3.00   Min.   :200.0   Min.   :200.0  
##  1st Qu.:25.00   1st Qu.:550.0   1st Qu.:530.0  
##  Median :29.00   Median :620.0   Median :620.0  
##  Mean   :28.55   Mean   :612.2   Mean   :610.2  
##  3rd Qu.:32.00   3rd Qu.:700.0   3rd Qu.:700.0  
##  Max.   :36.00   Max.   :800.0   Max.   :800.0  
##                                  NA's   :13
mean(mydata$SATQ)
## [1] NA
library(tidyr)

mydataClean <- drop_na(mydata)

Make a descriptive statistics of SATV, seperated by gender.

library(psych)
describeBy(mydata$SATV, mydata$GenderFactor)
## 
##  Descriptive statistics by group 
## group: Male
##    vars   n   mean     sd median trimmed    mad min max range  skew kurtosis
## X1    1 247 615.11 114.16    630  622.07 118.61 200 800   600 -0.63     0.13
##      se
## X1 7.26
## ------------------------------------------------------------ 
## group: Female
##    vars   n   mean     sd median trimmed    mad min max range  skew kurtosis
## X1    1 453 610.66 112.31    620  617.91 103.78 200 800   600 -0.65     0.42
##      se
## X1 5.28