Basic Statistics Lab

Load Libraries

# remember, you might need to install packages

library(psych) # for the describe() command
library(expss) # for the cross_cases() command

Load Data

d <- read.csv(file="Data/mydata.csv", header=T)
names(d)
[1] "gender"    "ethnicity" "age"       "big5_open" "pswq"      "big5_ext" 

Univariate Plots: Histograms & Tables

table(d$gender) #update for hw!!

            female I use another term               male  Prefer not to say 
              1020                 28                195                 16 
table(d$ethnicity)

 Asian/Asian British - Indian, Pakistani, Bangladeshi, other 
                                                         147 
             Black/Black British - Caribbean, African, other 
                                                          31 
                                     Chinese/Chinese British 
                                                          12 
Middle Eastern/Middle Eastern British - Arab, Turkish, other 
                                                          13 
                                          Mixed race - other 
                                                          34 
                  Mixed race - White and Black/Black British 
                                                          21 
                                          Other ethnic group 
                                                          12 
                                           Prefer not to say 
                                                          27 
                               White - British, Irish, other 
                                                         962 
table(d$age)

         1 under 18 2 between 18 and 25 3 between 26 and 35 4 between 36 and 45 
                829                  75                  12                 118 
          5 over 45 
                225 
hist(d$big5_open)

hist(d$pswq)

hist(d$big5_ext)

Univariate Normality

Check skew and kurtosis. Cutoffs are -2 or +2; if the skew or kurtosis is higher or lower than these values, need to mention in my writeup!!

describe(d)
           vars    n  mean   sd median trimmed  mad   min  max range  skew
gender*       1 1259  1.37 0.79   1.00    1.20 0.00  1.00 4.00  3.00  1.78
ethnicity*    2 1259  7.59 2.82   9.00    8.23 0.00  1.00 9.00  8.00 -1.68
age*          3 1259  2.07 1.63   1.00    1.85 0.00  1.00 5.00  4.00  1.01
big5_open     4 1259  5.21 1.13   5.33    5.29 0.99  1.00 7.00  6.00 -0.73
pswq          5 1259 -0.02 1.00   0.02   -0.01 1.17 -2.25 2.38  4.63 -0.08
big5_ext      6 1259  4.37 1.45   4.33    4.41 1.48  1.00 7.00  6.00 -0.24
           kurtosis   se
gender*        1.48 0.02
ethnicity*     1.04 0.08
age*          -0.82 0.05
big5_open      0.47 0.03
pswq          -0.92 0.03
big5_ext      -0.79 0.04

Bivariate Plots

Crosstabs

cross_cases(d, gender, ethnicity, age)
   ethnicity 
   Asian/Asian British - Indian, Pakistani, Bangladeshi, other   Black/Black British - Caribbean, African, other   Chinese/Chinese British   Middle Eastern/Middle Eastern British - Arab, Turkish, other   Mixed race - other   Mixed race - White and Black/Black British   Other ethnic group   Prefer not to say   White - British, Irish, other 
 age 
   1 under 18   gender   I use another term    1 1 2 1 19
    Prefer not to say    8 6
    female    96 25 5 10 21 16 6 11 451
    male    20 3 3 1 7 3 2 111
    #Total cases    117 28 8 12 30 19 8 20 587
   2 between 18 and 25   gender   I use another term    1 2
    Prefer not to say   
    female    7 1 1 1 56
    male    1 5
    #Total cases    8 1 1 1 1 63
   3 between 26 and 35   gender   I use another term   
    Prefer not to say   
    female    1 1 8
    male    1 1
    #Total cases    1 1 1 9
   4 between 36 and 45   gender   I use another term    1
    Prefer not to say    1
    female    9 1 1 1 2 1 3 89
    male    1 1 7
    #Total cases    10 2 1 1 2 1 4 97
   5 over 45   gender   I use another term   
    Prefer not to say    1
    female    7 2 1 1 2 1 182
    male    4 1 23
    #Total cases    11 2 1 1 2 2 206

Scatterplots

plot(d$big5_open, d$pswq,
     main="Scatterplot of Openness and Worry",
     xlab = "Openness",
     ylab = "Worry")

plot(d$big5_open, d$big5_ext,
     main="Scatterplot of Openness and Extraverison",
     xlab = "Openness",
     ylab = "Extraverison")

plot(d$pswq, d$big5_ext,
     main="Scatterplot of Worry and Extraverison",
     xlab = "Worry",
     ylab = "Extraverison")

Boxplots

boxplot(data=d, big5_open~gender,
        main="Boxplot of Openness and Gender Identification",
        xlab = "Gender Identification",
        ylab = "Openness")

boxplot(data=d, big5_open~age,
        main="Boxplot of Openness and Age",
        xlab = "Age",
        ylab = "Openness")

boxplot(data=d, big5_open~ethnicity,
        main="Boxplot of Openness and Ethnicity",
        xlab = "Ethnicity",
        ylab = "Openness")

Write-Up

Once again, you need to create a write-up reviewing the most important things you did here. Again, it should be suitable for inclusion in a manuscript. Make sure you include your review of skewness and kurtosis. I have given you two potential templates you can follow below, depending upon your needs – you should delete the other text in this section and only include your write-up.

If skew and kurtosis are good: We reviewed plots and descriptive statistics for our six chosen variables. All three of our continuous variables had skew and kurtosis within the accepted range (-2/+2).