Group5

options(scipen = 999)
library(Statamarkdown)
Warning: package 'Statamarkdown' was built under R version 4.4.3
Stata found at C:/Program Files/Stata18/StataSE-64.exe
The 'stata' engine is ready to use.
stataexe <- "C:/Program Files/Stata18/StataSE-64.exe"

knitr::opts_chunk$set(engine.path = list(stata=stataexe))
cd "C:\Users\NOMTHA\Documents\WITWATERSRAND\MSc_Epidemiology\Research_Protocol_Development_1\Group5Assignment\DataGroup5"
> l_Development_1\Group5Assignment\DataGroup5"
C:\Users\NOMTHA\Documents\WITWATERSRAND\MSc_Epidemiology\Research_Protocol_Deve
> lopment_1\Group5Assignment\DataGroup5
pwd
C:\Users\NOMTHA\Documents\WITWATERSRAND\MSc_Epidemiology\Research_Protocol_Deve
> lopment_1\Group5Assignment\DataGroup5
use Demo, clear

describe
Contains data from Demo.dta
 Observations:            43                  
    Variables:            22                  11 Mar 2025 14:34
-------------------------------------------------------------------------------
Variable      Storage   Display    Value
    name         type    format    label      Variable label
-------------------------------------------------------------------------------
record_id       byte    %8.0g                 
redcap_survey~r byte    %8.0g                 
demographic_d~p str19   %19s                  
name            str12   %12s                  
surname         str10   %10s                  
student_id      long    %12.0g                
age             byte    %8.0g                 
gender          byte    %8.0g                 
other_gender    byte    %8.0g                 
race            byte    %8.0g                 
other_race      byte    %8.0g                 
home_language   byte    %8.0g                 
other_hl        str5    %9s                   
english         byte    %8.0g                 
study_level     byte    %8.0g                 
study_field     byte    %8.0g                 
education_level byte    %8.0g                 
employment_st~s byte    %8.0g                 
res             byte    %8.0g                 
other_res       str12   %12s                  
demographic_d~e byte    %8.0g                 
dob             float   %dM_d,_CY             
-------------------------------------------------------------------------------
Sorted by: 
use Conf, clear

describe
Contains data from Conf.dta
 Observations:            43                  
    Variables:            25                  24 Feb 2025 17:34
-------------------------------------------------------------------------------
Variable      Storage   Display    Value
    name         type    format    label      Variable label
-------------------------------------------------------------------------------
record_id       byte    %8.0g                 
redcap_survey~r byte    %8.0g                 
level_of_conf~p str19   %19s                  
prior_stata     byte    %8.0g                 
stata_familia~y byte    %8.0g                 
data_management byte    %8.0g                 
descriptive_s~s byte    %8.0g                 
data_visualiz~s byte    %8.0g                 
dofiles         byte    %8.0g                 
effectiveness   byte    %8.0g                 
need_support    byte    %8.0g                 
further_train~1 byte    %8.0g                 
further_train~2 byte    %8.0g                 
further_train~3 byte    %8.0g                 
further_train~4 byte    %8.0g                 
further_train~5 byte    %8.0g                 
further_train~6 byte    %8.0g                 
stata_ready     byte    %8.0g                 
additional_su~1 byte    %8.0g                 
additional_su~2 byte    %8.0g                 
additional_su~3 byte    %8.0g                 
additional_su~4 byte    %8.0g                 
additional_su~5 byte    %8.0g                 
other_support   str45   %45s                  
level_of_conf~e byte    %8.0g                 
-------------------------------------------------------------------------------
Sorted by: 
use Hons, clear

describe
Contains data from Hons.dta
 Observations:            43                  
    Variables:            25                  24 Feb 2025 19:19
-------------------------------------------------------------------------------
Variable      Storage   Display    Value
    name         type    format    label      Variable label
-------------------------------------------------------------------------------
v1              byte    %8.0g                 
v2              byte    %8.0g                 
v3              str19   %19s                  
v4              byte    %8.0g                 
v5              byte    %8.0g                 
v6              byte    %8.0g                 
v7              byte    %8.0g                 
v8              byte    %8.0g                 
v9              byte    %8.0g                 
v10             byte    %8.0g                 
v11             byte    %8.0g                 
v12             byte    %8.0g                 
v13             byte    %8.0g                 
v14             byte    %8.0g                 
v15             byte    %8.0g                 
v16             byte    %8.0g                 
v17             byte    %8.0g                 
v18             byte    %8.0g                 
v19             byte    %8.0g                 
v20             byte    %8.0g                 
v21             byte    %8.0g                 
v22             byte    %8.0g                 
v23             byte    %8.0g                 
v24             str45   %45s                  
v25             byte    %8.0g                 
-------------------------------------------------------------------------------
Sorted by: 
use Demo, clear

gen dob_str = string(dob, "%td")


gen dob_date = date(dob_str, "YMD")


format dob %dM_d,_CY
(43 missing values generated)
use Demo, clear
gen _date_ = date(dob,"YMD")
drop dob
rename _date_ dob
format dob %dM_d,_CY

save Demo, replace
type mismatch
r(109);

r(109);
use Demo 

describe
Contains data from Demo.dta
 Observations:            43                  
    Variables:            22                  11 Mar 2025 14:34
-------------------------------------------------------------------------------
Variable      Storage   Display    Value
    name         type    format    label      Variable label
-------------------------------------------------------------------------------
record_id       byte    %8.0g                 
redcap_survey~r byte    %8.0g                 
demographic_d~p str19   %19s                  
name            str12   %12s                  
surname         str10   %10s                  
student_id      long    %12.0g                
age             byte    %8.0g                 
gender          byte    %8.0g                 
other_gender    byte    %8.0g                 
race            byte    %8.0g                 
other_race      byte    %8.0g                 
home_language   byte    %8.0g                 
other_hl        str5    %9s                   
english         byte    %8.0g                 
study_level     byte    %8.0g                 
study_field     byte    %8.0g                 
education_level byte    %8.0g                 
employment_st~s byte    %8.0g                 
res             byte    %8.0g                 
other_res       str12   %12s                  
demographic_d~e byte    %8.0g                 
dob             float   %dM_d,_CY             
-------------------------------------------------------------------------------
Sorted by: 

Merging the Demographic Dataset with the Level of Confidence Datasets

use Demo
merge 1:1 record_id using Conf

save merged5.dta, replace
    Result                      Number of obs
    -----------------------------------------
    Not matched                             0
    Matched                                43  (_merge==3)
    -----------------------------------------

file merged5.dta saved
use merged5 

describe
Contains data from merged5.dta
 Observations:            43                  
    Variables:            46                  11 Mar 2025 17:20
-------------------------------------------------------------------------------
Variable      Storage   Display    Value
    name         type    format    label      Variable label
-------------------------------------------------------------------------------
record_id       byte    %8.0g                 
redcap_survey~r byte    %8.0g                 
demographic_d~p str19   %19s                  
name            str12   %12s                  
surname         str10   %10s                  
student_id      long    %12.0g                
age             byte    %8.0g                 
gender          byte    %8.0g                 
other_gender    byte    %8.0g                 
race            byte    %8.0g                 
other_race      byte    %8.0g                 
home_language   byte    %8.0g                 
other_hl        str5    %9s                   
english         byte    %8.0g                 
study_level     byte    %8.0g                 
study_field     byte    %8.0g                 
education_level byte    %8.0g                 
employment_st~s byte    %8.0g                 
res             byte    %8.0g                 
other_res       str12   %12s                  
demographic_d~e byte    %8.0g                 
dob             float   %dM_d,_CY             
level_of_conf~p str19   %19s                  
prior_stata     byte    %8.0g                 
stata_familia~y byte    %8.0g                 
data_management byte    %8.0g                 
descriptive_s~s byte    %8.0g                 
data_visualiz~s byte    %8.0g                 
dofiles         byte    %8.0g                 
effectiveness   byte    %8.0g                 
need_support    byte    %8.0g                 
further_train~1 byte    %8.0g                 
further_train~2 byte    %8.0g                 
further_train~3 byte    %8.0g                 
further_train~4 byte    %8.0g                 
further_train~5 byte    %8.0g                 
further_train~6 byte    %8.0g                 
stata_ready     byte    %8.0g                 
additional_su~1 byte    %8.0g                 
additional_su~2 byte    %8.0g                 
additional_su~3 byte    %8.0g                 
additional_su~4 byte    %8.0g                 
additional_su~5 byte    %8.0g                 
other_support   str45   %45s                  
level_of_conf~e byte    %8.0g                 
_merge          byte    %23.0g     _merge     Matching result from merge
-------------------------------------------------------------------------------
Sorted by: record_id

Labelling variables and creating value labels and making notes on dataset

use merged5

codebook
record_id                                                           (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,43]                        Units: 1
         Unique values: 43                        Missing .: 0/43

                  Mean:      22
             Std. dev.: 12.5565

           Percentiles:     10%       25%       50%       75%       90%
                              5        11        22        33        39

-------------------------------------------------------------------------------
redcap_survey_identifier                                            (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [.,.]                         Units: .
         Unique values: 0                         Missing .: 43/43

            Tabulation: Freq.  Value
                           43  .

-------------------------------------------------------------------------------
demographic_details_timestamp                                       (unlabeled)
-------------------------------------------------------------------------------

                  Type: String (str19)

         Unique values: 43                        Missing "": 0/43

              Examples: "2025-02-11 11:05:45"
                        "2025-02-11 21:20:27"
                        "2025-02-13 16:10:32"
                        "2025-02-15 17:37:24"

               Warning: Variable has embedded blanks.

-------------------------------------------------------------------------------
name                                                                (unlabeled)
-------------------------------------------------------------------------------

                  Type: String (str12)

         Unique values: 39                        Missing "": 0/43

              Examples: "Boitumelo "
                        "Katlie"
                        "Musawenkosi "
                        "Sylvia "

               Warning: Variable has trailing blanks.

-------------------------------------------------------------------------------
surname                                                             (unlabeled)
-------------------------------------------------------------------------------

                  Type: String (str10)

         Unique values: 34                        Missing "": 0/43

              Examples: "Kgobane "
                        "Mbele "
                        "Mpoto"
                        "Qetuka"

               Warning: Variable has trailing blanks.

-------------------------------------------------------------------------------
student_id                                                          (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (long)

                 Range: [243577,2.152e+08]            Units: 1
         Unique values: 34                        Missing .: 0/43

                  Mean: 1.7e+07
             Std. dev.: 5.5e+07

           Percentiles:     10%       25%       50%       75%       90%
                        1.1e+06   2.4e+06   2.6e+06   3.0e+06   3.0e+06

-------------------------------------------------------------------------------
age                                                                 (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [20,36]                       Units: 1
         Unique values: 14                        Missing .: 0/43

                  Mean: 26.5814
             Std. dev.: 4.34923

           Percentiles:     10%       25%       50%       75%       90%
                             22        23        26        30        31

-------------------------------------------------------------------------------
gender                                                              (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,5]                         Units: 1
         Unique values: 4                         Missing .: 0/43

            Tabulation: Freq.  Value
                            8  1
                           33  2
                            1  3
                            1  5

-------------------------------------------------------------------------------
other_gender                                                        (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [.,.]                         Units: .
         Unique values: 0                         Missing .: 43/43

            Tabulation: Freq.  Value
                           43  .

-------------------------------------------------------------------------------
race                                                                (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,4]                         Units: 1
         Unique values: 3                         Missing .: 0/43

            Tabulation: Freq.  Value
                            1  1
                           41  2
                            1  4

-------------------------------------------------------------------------------
other_race                                                          (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [.,.]                         Units: .
         Unique values: 0                         Missing .: 43/43

            Tabulation: Freq.  Value
                           43  .

-------------------------------------------------------------------------------
home_language                                                       (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [2,13]                        Units: 1
         Unique values: 11                        Missing .: 0/43

                  Mean: 7.88372
             Std. dev.: 3.33958

           Percentiles:     10%       25%       50%       75%       90%
                              4         5         8        11        12

-------------------------------------------------------------------------------
other_hl                                                            (unlabeled)
-------------------------------------------------------------------------------

                  Type: String (str5)

         Unique values: 1                         Missing "": 42/43

            Tabulation: Freq.  Value
                           42  ""
                            1  "Shona"

-------------------------------------------------------------------------------
english                                                             (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,5]                         Units: 1
         Unique values: 5                         Missing .: 4/43

            Tabulation: Freq.  Value
                            1  1
                            6  2
                           13  3
                           13  4
                            6  5
                            4  .

-------------------------------------------------------------------------------
study_level                                                         (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,2]                         Units: 1
         Unique values: 2                         Missing .: 1/43

            Tabulation: Freq.  Value
                           16  1
                           26  2
                            1  .

-------------------------------------------------------------------------------
study_field                                                         (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,6]                         Units: 1
         Unique values: 6                         Missing .: 18/43

            Tabulation: Freq.  Value
                            8  1
                            3  2
                            1  3
                            8  4
                            2  5
                            3  6
                           18  .

-------------------------------------------------------------------------------
education_level                                                     (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,4]                         Units: 1
         Unique values: 4                         Missing .: 0/43

            Tabulation: Freq.  Value
                           22  1
                            3  2
                           12  3
                            6  4

-------------------------------------------------------------------------------
employment_status                                                   (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 0/43

            Tabulation: Freq.  Value
                           30  0
                           13  1

-------------------------------------------------------------------------------
res                                                                 (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,4]                         Units: 1
         Unique values: 4                         Missing .: 0/43

            Tabulation: Freq.  Value
                            5  1
                           29  2
                            8  3
                            1  4

-------------------------------------------------------------------------------
other_res                                                           (unlabeled)
-------------------------------------------------------------------------------

                  Type: String (str12)

         Unique values: 1                         Missing "": 42/43

            Tabulation: Freq.  Value
                           42  ""
                            1  "Private Flat"

               Warning: Variable has embedded blanks.

-------------------------------------------------------------------------------
demographic_details_complete                                        (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [2,2]                         Units: 1
         Unique values: 1                         Missing .: 0/43

            Tabulation: Freq.  Value
                           43  2

-------------------------------------------------------------------------------
dob                                                                 (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric daily date (float)

                 Range: [10666,16299]                 Units: 1
       Or equivalently: [15mar1989,16aug2004]         Units: days
         Unique values: 35                        Missing .: 0/43

                  Mean: 14058.2 = 28jun1998(+ 6 hours)
             Std. dev.: 1564.19
           Percentiles:       10%        25%        50%        75%        90%
                            12293      12900      14257      15541      15639
                        28aug1993  27apr1995  13jan1999  20jul2002  26oct2002

-------------------------------------------------------------------------------
level_of_confidence_timestamp                                       (unlabeled)
-------------------------------------------------------------------------------

                  Type: String (str19)

         Unique values: 41                        Missing "": 2/43

              Examples: "2025-02-11 11:00:41"
                        "2025-02-11 14:25:44"
                        "2025-02-13 10:05:01"
                        "2025-02-15 17:39:19"

               Warning: Variable has embedded blanks.

-------------------------------------------------------------------------------
prior_stata                                                         (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 2/43

            Tabulation: Freq.  Value
                           20  0
                           21  1
                            2  .

-------------------------------------------------------------------------------
stata_familiarity                                                   (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,5]                         Units: 1
         Unique values: 5                         Missing .: 3/43

            Tabulation: Freq.  Value
                           15  1
                            6  2
                            7  3
                           11  4
                            1  5
                            3  .

-------------------------------------------------------------------------------
data_management                                                     (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,4]                         Units: 1
         Unique values: 4                         Missing .: 6/43

            Tabulation: Freq.  Value
                           11  1
                            9  2
                            8  3
                            9  4
                            6  .

-------------------------------------------------------------------------------
descriptive_stats                                                   (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,5]                         Units: 1
         Unique values: 5                         Missing .: 3/43

            Tabulation: Freq.  Value
                           10  1
                            7  2
                           10  3
                           11  4
                            2  5
                            3  .

-------------------------------------------------------------------------------
data_visualizations                                                 (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,5]                         Units: 1
         Unique values: 5                         Missing .: 2/43

            Tabulation: Freq.  Value
                            9  1
                            9  2
                           14  3
                            6  4
                            3  5
                            2  .

-------------------------------------------------------------------------------
dofiles                                                             (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,5]                         Units: 1
         Unique values: 5                         Missing .: 3/43

            Tabulation: Freq.  Value
                            5  1
                           10  2
                            8  3
                           10  4
                            7  5
                            3  .

-------------------------------------------------------------------------------
effectiveness                                                       (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,5]                         Units: 1
         Unique values: 5                         Missing .: 3/43

            Tabulation: Freq.  Value
                            1  1
                           16  2
                           16  3
                            5  4
                            2  5
                            3  .

-------------------------------------------------------------------------------
need_support                                                        (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 4/43

            Tabulation: Freq.  Value
                            5  0
                           34  1
                            4  .

-------------------------------------------------------------------------------
further_training___1                                                (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 0/43

            Tabulation: Freq.  Value
                           17  0
                           26  1

-------------------------------------------------------------------------------
further_training___2                                                (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 0/43

            Tabulation: Freq.  Value
                           21  0
                           22  1

-------------------------------------------------------------------------------
further_training___3                                                (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 0/43

            Tabulation: Freq.  Value
                           21  0
                           22  1

-------------------------------------------------------------------------------
further_training___4                                                (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 0/43

            Tabulation: Freq.  Value
                           20  0
                           23  1

-------------------------------------------------------------------------------
further_training___5                                                (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 0/43

            Tabulation: Freq.  Value
                           32  0
                           11  1

-------------------------------------------------------------------------------
further_training___6                                                (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,0]                         Units: 1
         Unique values: 1                         Missing .: 0/43

            Tabulation: Freq.  Value
                           43  0

-------------------------------------------------------------------------------
stata_ready                                                         (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 3/43

            Tabulation: Freq.  Value
                           34  0
                            6  1
                            3  .

-------------------------------------------------------------------------------
additional_support___1                                              (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 0/43

            Tabulation: Freq.  Value
                           17  0
                           26  1

-------------------------------------------------------------------------------
additional_support___2                                              (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 0/43

            Tabulation: Freq.  Value
                           25  0
                           18  1

-------------------------------------------------------------------------------
additional_support___3                                              (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 0/43

            Tabulation: Freq.  Value
                           21  0
                           22  1

-------------------------------------------------------------------------------
additional_support___4                                              (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 0/43

            Tabulation: Freq.  Value
                           18  0
                           25  1

-------------------------------------------------------------------------------
additional_support___5                                              (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 0/43

            Tabulation: Freq.  Value
                           42  0
                            1  1

-------------------------------------------------------------------------------
other_support                                                       (unlabeled)
-------------------------------------------------------------------------------

                  Type: String (str45)

         Unique values: 1                         Missing "": 42/43

            Tabulation: Freq.  Value
                           42  ""
                            1  "I need to work on real projects using
                               STATA. "

               Warning: Variable has embedded and trailing blanks.

-------------------------------------------------------------------------------
level_of_confidence_complete                                        (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,2]                         Units: 1
         Unique values: 2                         Missing .: 0/43

            Tabulation: Freq.  Value
                            2  0
                           41  2

-------------------------------------------------------------------------------
_merge                                               Matching result from merge
-------------------------------------------------------------------------------

                  Type: Numeric (byte)
                 Label: _merge

                 Range: [3,3]                         Units: 1
         Unique values: 1                         Missing .: 0/43

            Tabulation: Freq.   Numeric  Label
                           43         3  Matched (3)
use merged5, clear

rename Sex Gender

label define Sex 1 "Male" 2 "Female" 3 "Non-binary" 5 "Prefer not to say", replace

label values race Race 

save merged5.dta, replace
variable Sex not found
r(111);

r(111);
use merged5.dta
describe
Contains data from merged5.dta
 Observations:            43                  
    Variables:            46                  11 Mar 2025 17:20
-------------------------------------------------------------------------------
Variable      Storage   Display    Value
    name         type    format    label      Variable label
-------------------------------------------------------------------------------
record_id       byte    %8.0g                 
redcap_survey~r byte    %8.0g                 
demographic_d~p str19   %19s                  
name            str12   %12s                  
surname         str10   %10s                  
student_id      long    %12.0g                
age             byte    %8.0g                 
gender          byte    %8.0g                 
other_gender    byte    %8.0g                 
race            byte    %8.0g                 
other_race      byte    %8.0g                 
home_language   byte    %8.0g                 
other_hl        str5    %9s                   
english         byte    %8.0g                 
study_level     byte    %8.0g                 
study_field     byte    %8.0g                 
education_level byte    %8.0g                 
employment_st~s byte    %8.0g                 
res             byte    %8.0g                 
other_res       str12   %12s                  
demographic_d~e byte    %8.0g                 
dob             float   %dM_d,_CY             
level_of_conf~p str19   %19s                  
prior_stata     byte    %8.0g                 
stata_familia~y byte    %8.0g                 
data_management byte    %8.0g                 
descriptive_s~s byte    %8.0g                 
data_visualiz~s byte    %8.0g                 
dofiles         byte    %8.0g                 
effectiveness   byte    %8.0g                 
need_support    byte    %8.0g                 
further_train~1 byte    %8.0g                 
further_train~2 byte    %8.0g                 
further_train~3 byte    %8.0g                 
further_train~4 byte    %8.0g                 
further_train~5 byte    %8.0g                 
further_train~6 byte    %8.0g                 
stata_ready     byte    %8.0g                 
additional_su~1 byte    %8.0g                 
additional_su~2 byte    %8.0g                 
additional_su~3 byte    %8.0g                 
additional_su~4 byte    %8.0g                 
additional_su~5 byte    %8.0g                 
other_support   str45   %45s                  
level_of_conf~e byte    %8.0g                 
_merge          byte    %23.0g     _merge     Matching result from merge
-------------------------------------------------------------------------------
Sorted by: record_id
use merged5.dta, clear
label define Race 1 "White"2 "Black" 4 "Indian"
label values race Race

save merged5.dta, replace
file merged5.dta saved

use merged5.dta, clear

codebook
record_id                                                           (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,43]                        Units: 1
         Unique values: 43                        Missing .: 0/43

                  Mean:      22
             Std. dev.: 12.5565

           Percentiles:     10%       25%       50%       75%       90%
                              5        11        22        33        39

-------------------------------------------------------------------------------
redcap_survey_identifier                                            (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [.,.]                         Units: .
         Unique values: 0                         Missing .: 43/43

            Tabulation: Freq.  Value
                           43  .

-------------------------------------------------------------------------------
demographic_details_timestamp                                       (unlabeled)
-------------------------------------------------------------------------------

                  Type: String (str19)

         Unique values: 43                        Missing "": 0/43

              Examples: "2025-02-11 11:05:45"
                        "2025-02-11 21:20:27"
                        "2025-02-13 16:10:32"
                        "2025-02-15 17:37:24"

               Warning: Variable has embedded blanks.

-------------------------------------------------------------------------------
name                                                                (unlabeled)
-------------------------------------------------------------------------------

                  Type: String (str12)

         Unique values: 39                        Missing "": 0/43

              Examples: "Boitumelo "
                        "Katlie"
                        "Musawenkosi "
                        "Sylvia "

               Warning: Variable has trailing blanks.

-------------------------------------------------------------------------------
surname                                                             (unlabeled)
-------------------------------------------------------------------------------

                  Type: String (str10)

         Unique values: 34                        Missing "": 0/43

              Examples: "Kgobane "
                        "Mbele "
                        "Mpoto"
                        "Qetuka"

               Warning: Variable has trailing blanks.

-------------------------------------------------------------------------------
student_id                                                          (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (long)

                 Range: [243577,2.152e+08]            Units: 1
         Unique values: 34                        Missing .: 0/43

                  Mean: 1.7e+07
             Std. dev.: 5.5e+07

           Percentiles:     10%       25%       50%       75%       90%
                        1.1e+06   2.4e+06   2.6e+06   3.0e+06   3.0e+06

-------------------------------------------------------------------------------
age                                                                 (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [20,36]                       Units: 1
         Unique values: 14                        Missing .: 0/43

                  Mean: 26.5814
             Std. dev.: 4.34923

           Percentiles:     10%       25%       50%       75%       90%
                             22        23        26        30        31

-------------------------------------------------------------------------------
gender                                                              (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,5]                         Units: 1
         Unique values: 4                         Missing .: 0/43

            Tabulation: Freq.  Value
                            8  1
                           33  2
                            1  3
                            1  5

-------------------------------------------------------------------------------
other_gender                                                        (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [.,.]                         Units: .
         Unique values: 0                         Missing .: 43/43

            Tabulation: Freq.  Value
                           43  .

-------------------------------------------------------------------------------
race                                                                (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)
                 Label: Race

                 Range: [1,4]                         Units: 1
         Unique values: 3                         Missing .: 0/43

            Tabulation: Freq.   Numeric  Label
                            1         1  White
                           41         2  Black
                            1         4  Indian

-------------------------------------------------------------------------------
other_race                                                          (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [.,.]                         Units: .
         Unique values: 0                         Missing .: 43/43

            Tabulation: Freq.  Value
                           43  .

-------------------------------------------------------------------------------
home_language                                                       (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [2,13]                        Units: 1
         Unique values: 11                        Missing .: 0/43

                  Mean: 7.88372
             Std. dev.: 3.33958

           Percentiles:     10%       25%       50%       75%       90%
                              4         5         8        11        12

-------------------------------------------------------------------------------
other_hl                                                            (unlabeled)
-------------------------------------------------------------------------------

                  Type: String (str5)

         Unique values: 1                         Missing "": 42/43

            Tabulation: Freq.  Value
                           42  ""
                            1  "Shona"

-------------------------------------------------------------------------------
english                                                             (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,5]                         Units: 1
         Unique values: 5                         Missing .: 4/43

            Tabulation: Freq.  Value
                            1  1
                            6  2
                           13  3
                           13  4
                            6  5
                            4  .

-------------------------------------------------------------------------------
study_level                                                         (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,2]                         Units: 1
         Unique values: 2                         Missing .: 1/43

            Tabulation: Freq.  Value
                           16  1
                           26  2
                            1  .

-------------------------------------------------------------------------------
study_field                                                         (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,6]                         Units: 1
         Unique values: 6                         Missing .: 18/43

            Tabulation: Freq.  Value
                            8  1
                            3  2
                            1  3
                            8  4
                            2  5
                            3  6
                           18  .

-------------------------------------------------------------------------------
education_level                                                     (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,4]                         Units: 1
         Unique values: 4                         Missing .: 0/43

            Tabulation: Freq.  Value
                           22  1
                            3  2
                           12  3
                            6  4

-------------------------------------------------------------------------------
employment_status                                                   (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 0/43

            Tabulation: Freq.  Value
                           30  0
                           13  1

-------------------------------------------------------------------------------
res                                                                 (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,4]                         Units: 1
         Unique values: 4                         Missing .: 0/43

            Tabulation: Freq.  Value
                            5  1
                           29  2
                            8  3
                            1  4

-------------------------------------------------------------------------------
other_res                                                           (unlabeled)
-------------------------------------------------------------------------------

                  Type: String (str12)

         Unique values: 1                         Missing "": 42/43

            Tabulation: Freq.  Value
                           42  ""
                            1  "Private Flat"

               Warning: Variable has embedded blanks.

-------------------------------------------------------------------------------
demographic_details_complete                                        (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [2,2]                         Units: 1
         Unique values: 1                         Missing .: 0/43

            Tabulation: Freq.  Value
                           43  2

-------------------------------------------------------------------------------
dob                                                                 (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric daily date (float)

                 Range: [10666,16299]                 Units: 1
       Or equivalently: [15mar1989,16aug2004]         Units: days
         Unique values: 35                        Missing .: 0/43

                  Mean: 14058.2 = 28jun1998(+ 6 hours)
             Std. dev.: 1564.19
           Percentiles:       10%        25%        50%        75%        90%
                            12293      12900      14257      15541      15639
                        28aug1993  27apr1995  13jan1999  20jul2002  26oct2002

-------------------------------------------------------------------------------
level_of_confidence_timestamp                                       (unlabeled)
-------------------------------------------------------------------------------

                  Type: String (str19)

         Unique values: 41                        Missing "": 2/43

              Examples: "2025-02-11 11:00:41"
                        "2025-02-11 14:25:44"
                        "2025-02-13 10:05:01"
                        "2025-02-15 17:39:19"

               Warning: Variable has embedded blanks.

-------------------------------------------------------------------------------
prior_stata                                                         (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 2/43

            Tabulation: Freq.  Value
                           20  0
                           21  1
                            2  .

-------------------------------------------------------------------------------
stata_familiarity                                                   (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,5]                         Units: 1
         Unique values: 5                         Missing .: 3/43

            Tabulation: Freq.  Value
                           15  1
                            6  2
                            7  3
                           11  4
                            1  5
                            3  .

-------------------------------------------------------------------------------
data_management                                                     (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,4]                         Units: 1
         Unique values: 4                         Missing .: 6/43

            Tabulation: Freq.  Value
                           11  1
                            9  2
                            8  3
                            9  4
                            6  .

-------------------------------------------------------------------------------
descriptive_stats                                                   (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,5]                         Units: 1
         Unique values: 5                         Missing .: 3/43

            Tabulation: Freq.  Value
                           10  1
                            7  2
                           10  3
                           11  4
                            2  5
                            3  .

-------------------------------------------------------------------------------
data_visualizations                                                 (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,5]                         Units: 1
         Unique values: 5                         Missing .: 2/43

            Tabulation: Freq.  Value
                            9  1
                            9  2
                           14  3
                            6  4
                            3  5
                            2  .

-------------------------------------------------------------------------------
dofiles                                                             (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,5]                         Units: 1
         Unique values: 5                         Missing .: 3/43

            Tabulation: Freq.  Value
                            5  1
                           10  2
                            8  3
                           10  4
                            7  5
                            3  .

-------------------------------------------------------------------------------
effectiveness                                                       (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [1,5]                         Units: 1
         Unique values: 5                         Missing .: 3/43

            Tabulation: Freq.  Value
                            1  1
                           16  2
                           16  3
                            5  4
                            2  5
                            3  .

-------------------------------------------------------------------------------
need_support                                                        (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 4/43

            Tabulation: Freq.  Value
                            5  0
                           34  1
                            4  .

-------------------------------------------------------------------------------
further_training___1                                                (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 0/43

            Tabulation: Freq.  Value
                           17  0
                           26  1

-------------------------------------------------------------------------------
further_training___2                                                (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 0/43

            Tabulation: Freq.  Value
                           21  0
                           22  1

-------------------------------------------------------------------------------
further_training___3                                                (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 0/43

            Tabulation: Freq.  Value
                           21  0
                           22  1

-------------------------------------------------------------------------------
further_training___4                                                (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 0/43

            Tabulation: Freq.  Value
                           20  0
                           23  1

-------------------------------------------------------------------------------
further_training___5                                                (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 0/43

            Tabulation: Freq.  Value
                           32  0
                           11  1

-------------------------------------------------------------------------------
further_training___6                                                (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,0]                         Units: 1
         Unique values: 1                         Missing .: 0/43

            Tabulation: Freq.  Value
                           43  0

-------------------------------------------------------------------------------
stata_ready                                                         (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 3/43

            Tabulation: Freq.  Value
                           34  0
                            6  1
                            3  .

-------------------------------------------------------------------------------
additional_support___1                                              (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 0/43

            Tabulation: Freq.  Value
                           17  0
                           26  1

-------------------------------------------------------------------------------
additional_support___2                                              (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 0/43

            Tabulation: Freq.  Value
                           25  0
                           18  1

-------------------------------------------------------------------------------
additional_support___3                                              (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 0/43

            Tabulation: Freq.  Value
                           21  0
                           22  1

-------------------------------------------------------------------------------
additional_support___4                                              (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 0/43

            Tabulation: Freq.  Value
                           18  0
                           25  1

-------------------------------------------------------------------------------
additional_support___5                                              (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,1]                         Units: 1
         Unique values: 2                         Missing .: 0/43

            Tabulation: Freq.  Value
                           42  0
                            1  1

-------------------------------------------------------------------------------
other_support                                                       (unlabeled)
-------------------------------------------------------------------------------

                  Type: String (str45)

         Unique values: 1                         Missing "": 42/43

            Tabulation: Freq.  Value
                           42  ""
                            1  "I need to work on real projects using
                               STATA. "

               Warning: Variable has embedded and trailing blanks.

-------------------------------------------------------------------------------
level_of_confidence_complete                                        (unlabeled)
-------------------------------------------------------------------------------

                  Type: Numeric (byte)

                 Range: [0,2]                         Units: 1
         Unique values: 2                         Missing .: 0/43

            Tabulation: Freq.  Value
                            2  0
                           41  2

-------------------------------------------------------------------------------
_merge                                               Matching result from merge
-------------------------------------------------------------------------------

                  Type: Numeric (byte)
                 Label: _merge

                 Range: [3,3]                         Units: 1
         Unique values: 1                         Missing .: 0/43

            Tabulation: Freq.   Numeric  Label
                           43         3  Matched (3)
use merged5.dta, clear

rename home_language home_language 

label variable home_language "Hlanguage" 
label define Hlanguage 1 "Afrikaans" 2 "English" 3 "Ndebele" 4 "Pedi" 5 "Sotho" 6 "South African Sign language" 7 "Swati" 8 "Tsonga" 9 "Tswana" 10 "Venda" 11 "Xhosa" 12 "Zulu" 13 "Other", replace 
label values home_language Hlanguage

save merged5.dta, replace
  (all newnames==oldnames)




file merged5.dta saved
use merged5.dta, clear
label variable english English_Proficiency
label define English_Proficiency 1 "Novice" 2 "Intermediate" 3 "Advanced" 4 "Superior" 5 "Distinguished"
label values english English_Proficiency

save merged5.dta, replace
file merged5.dta saved
use merged5.dta, clear
label variable study_Level Study_Level 
label define Study_Level 1 "Honours" 2 "Masters"

save merged5.dta, replace
variable study_Level not found
r(111);

r(111);
use merged5.dta, clear