options (scipen = 999 )
library (Statamarkdown)
Warning: package 'Statamarkdown' was built under R version 4.4.3
Stata found at C:/Program Files/Stata18/StataSE-64.exe
The 'stata' engine is ready to use.
stataexe <- "C:/Program Files/Stata18/StataSE-64.exe"
knitr:: opts_chunk$ set (engine.path = list (stata= stataexe))
cd "C:\Users\NOMTHA\Documents\WITWATERSRAND\MSc_Epidemiology\Research_Protocol_Development_1\Group5Assignment\DataGroup5"
> l_Development_1\Group5Assignment\DataGroup5"
C:\Users\NOMTHA\Documents\WITWATERSRAND\MSc_Epidemiology\Research_Protocol_Deve
> lopment_1\Group5Assignment\DataGroup5
C:\Users\NOMTHA\Documents\WITWATERSRAND\MSc_Epidemiology\Research_Protocol_Deve
> lopment_1\Group5Assignment\DataGroup5
Contains data from Demo.dta
Observations: 43
Variables: 22 11 Mar 2025 14:34
-------------------------------------------------------------------------------
Variable Storage Display Value
name type format label Variable label
-------------------------------------------------------------------------------
record_id byte %8.0g
redcap_survey~r byte %8.0g
demographic_d~p str19 %19s
name str12 %12s
surname str10 %10s
student_id long %12.0g
age byte %8.0g
gender byte %8.0g
other_gender byte %8.0g
race byte %8.0g
other_race byte %8.0g
home_language byte %8.0g
other_hl str5 %9s
english byte %8.0g
study_level byte %8.0g
study_field byte %8.0g
education_level byte %8.0g
employment_st~s byte %8.0g
res byte %8.0g
other_res str12 %12s
demographic_d~e byte %8.0g
dob float %dM_d,_CY
-------------------------------------------------------------------------------
Sorted by:
Contains data from Conf.dta
Observations: 43
Variables: 25 24 Feb 2025 17:34
-------------------------------------------------------------------------------
Variable Storage Display Value
name type format label Variable label
-------------------------------------------------------------------------------
record_id byte %8.0g
redcap_survey~r byte %8.0g
level_of_conf~p str19 %19s
prior_stata byte %8.0g
stata_familia~y byte %8.0g
data_management byte %8.0g
descriptive_s~s byte %8.0g
data_visualiz~s byte %8.0g
dofiles byte %8.0g
effectiveness byte %8.0g
need_support byte %8.0g
further_train~1 byte %8.0g
further_train~2 byte %8.0g
further_train~3 byte %8.0g
further_train~4 byte %8.0g
further_train~5 byte %8.0g
further_train~6 byte %8.0g
stata_ready byte %8.0g
additional_su~1 byte %8.0g
additional_su~2 byte %8.0g
additional_su~3 byte %8.0g
additional_su~4 byte %8.0g
additional_su~5 byte %8.0g
other_support str45 %45s
level_of_conf~e byte %8.0g
-------------------------------------------------------------------------------
Sorted by:
Contains data from Hons.dta
Observations: 43
Variables: 25 24 Feb 2025 19:19
-------------------------------------------------------------------------------
Variable Storage Display Value
name type format label Variable label
-------------------------------------------------------------------------------
v1 byte %8.0g
v2 byte %8.0g
v3 str19 %19s
v4 byte %8.0g
v5 byte %8.0g
v6 byte %8.0g
v7 byte %8.0g
v8 byte %8.0g
v9 byte %8.0g
v10 byte %8.0g
v11 byte %8.0g
v12 byte %8.0g
v13 byte %8.0g
v14 byte %8.0g
v15 byte %8.0g
v16 byte %8.0g
v17 byte %8.0g
v18 byte %8.0g
v19 byte %8.0g
v20 byte %8.0g
v21 byte %8.0g
v22 byte %8.0g
v23 byte %8.0g
v24 str45 %45s
v25 byte %8.0g
-------------------------------------------------------------------------------
Sorted by:
use Demo, clear
gen dob_str = string (dob, "%td" )
gen dob_date = date (dob_str, "YMD" )
format dob %dM_d,_CY
(43 missing values generated)
use Demo, clear
gen _date_ = date (dob,"YMD" )
drop dob
rename _date_ dob
format dob %dM_d,_CY
save Demo, replace
type mismatch
r(109);
r(109);
Contains data from Demo.dta
Observations: 43
Variables: 22 11 Mar 2025 14:34
-------------------------------------------------------------------------------
Variable Storage Display Value
name type format label Variable label
-------------------------------------------------------------------------------
record_id byte %8.0g
redcap_survey~r byte %8.0g
demographic_d~p str19 %19s
name str12 %12s
surname str10 %10s
student_id long %12.0g
age byte %8.0g
gender byte %8.0g
other_gender byte %8.0g
race byte %8.0g
other_race byte %8.0g
home_language byte %8.0g
other_hl str5 %9s
english byte %8.0g
study_level byte %8.0g
study_field byte %8.0g
education_level byte %8.0g
employment_st~s byte %8.0g
res byte %8.0g
other_res str12 %12s
demographic_d~e byte %8.0g
dob float %dM_d,_CY
-------------------------------------------------------------------------------
Sorted by:
Merging the Demographic Dataset with the Level of Confidence Datasets
use Demo
merge 1:1 record_id using Conf
save merged5.dta, replace
Result Number of obs
-----------------------------------------
Not matched 0
Matched 43 (_merge==3)
-----------------------------------------
file merged5.dta saved
Contains data from merged5.dta
Observations: 43
Variables: 46 11 Mar 2025 17:20
-------------------------------------------------------------------------------
Variable Storage Display Value
name type format label Variable label
-------------------------------------------------------------------------------
record_id byte %8.0g
redcap_survey~r byte %8.0g
demographic_d~p str19 %19s
name str12 %12s
surname str10 %10s
student_id long %12.0g
age byte %8.0g
gender byte %8.0g
other_gender byte %8.0g
race byte %8.0g
other_race byte %8.0g
home_language byte %8.0g
other_hl str5 %9s
english byte %8.0g
study_level byte %8.0g
study_field byte %8.0g
education_level byte %8.0g
employment_st~s byte %8.0g
res byte %8.0g
other_res str12 %12s
demographic_d~e byte %8.0g
dob float %dM_d,_CY
level_of_conf~p str19 %19s
prior_stata byte %8.0g
stata_familia~y byte %8.0g
data_management byte %8.0g
descriptive_s~s byte %8.0g
data_visualiz~s byte %8.0g
dofiles byte %8.0g
effectiveness byte %8.0g
need_support byte %8.0g
further_train~1 byte %8.0g
further_train~2 byte %8.0g
further_train~3 byte %8.0g
further_train~4 byte %8.0g
further_train~5 byte %8.0g
further_train~6 byte %8.0g
stata_ready byte %8.0g
additional_su~1 byte %8.0g
additional_su~2 byte %8.0g
additional_su~3 byte %8.0g
additional_su~4 byte %8.0g
additional_su~5 byte %8.0g
other_support str45 %45s
level_of_conf~e byte %8.0g
_merge byte %23.0g _merge Matching result from merge
-------------------------------------------------------------------------------
Sorted by: record_id
Labelling variables and creating value labels and making notes on dataset
record_id (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,43] Units: 1
Unique values: 43 Missing .: 0/43
Mean: 22
Std. dev.: 12.5565
Percentiles: 10% 25% 50% 75% 90%
5 11 22 33 39
-------------------------------------------------------------------------------
redcap_survey_identifier (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [.,.] Units: .
Unique values: 0 Missing .: 43/43
Tabulation: Freq. Value
43 .
-------------------------------------------------------------------------------
demographic_details_timestamp (unlabeled)
-------------------------------------------------------------------------------
Type: String (str19)
Unique values: 43 Missing "": 0/43
Examples: "2025-02-11 11:05:45"
"2025-02-11 21:20:27"
"2025-02-13 16:10:32"
"2025-02-15 17:37:24"
Warning: Variable has embedded blanks.
-------------------------------------------------------------------------------
name (unlabeled)
-------------------------------------------------------------------------------
Type: String (str12)
Unique values: 39 Missing "": 0/43
Examples: "Boitumelo "
"Katlie"
"Musawenkosi "
"Sylvia "
Warning: Variable has trailing blanks.
-------------------------------------------------------------------------------
surname (unlabeled)
-------------------------------------------------------------------------------
Type: String (str10)
Unique values: 34 Missing "": 0/43
Examples: "Kgobane "
"Mbele "
"Mpoto"
"Qetuka"
Warning: Variable has trailing blanks.
-------------------------------------------------------------------------------
student_id (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (long)
Range: [243577,2.152e+08] Units: 1
Unique values: 34 Missing .: 0/43
Mean: 1.7e+07
Std. dev.: 5.5e+07
Percentiles: 10% 25% 50% 75% 90%
1.1e+06 2.4e+06 2.6e+06 3.0e+06 3.0e+06
-------------------------------------------------------------------------------
age (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [20,36] Units: 1
Unique values: 14 Missing .: 0/43
Mean: 26.5814
Std. dev.: 4.34923
Percentiles: 10% 25% 50% 75% 90%
22 23 26 30 31
-------------------------------------------------------------------------------
gender (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,5] Units: 1
Unique values: 4 Missing .: 0/43
Tabulation: Freq. Value
8 1
33 2
1 3
1 5
-------------------------------------------------------------------------------
other_gender (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [.,.] Units: .
Unique values: 0 Missing .: 43/43
Tabulation: Freq. Value
43 .
-------------------------------------------------------------------------------
race (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,4] Units: 1
Unique values: 3 Missing .: 0/43
Tabulation: Freq. Value
1 1
41 2
1 4
-------------------------------------------------------------------------------
other_race (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [.,.] Units: .
Unique values: 0 Missing .: 43/43
Tabulation: Freq. Value
43 .
-------------------------------------------------------------------------------
home_language (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [2,13] Units: 1
Unique values: 11 Missing .: 0/43
Mean: 7.88372
Std. dev.: 3.33958
Percentiles: 10% 25% 50% 75% 90%
4 5 8 11 12
-------------------------------------------------------------------------------
other_hl (unlabeled)
-------------------------------------------------------------------------------
Type: String (str5)
Unique values: 1 Missing "": 42/43
Tabulation: Freq. Value
42 ""
1 "Shona"
-------------------------------------------------------------------------------
english (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,5] Units: 1
Unique values: 5 Missing .: 4/43
Tabulation: Freq. Value
1 1
6 2
13 3
13 4
6 5
4 .
-------------------------------------------------------------------------------
study_level (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,2] Units: 1
Unique values: 2 Missing .: 1/43
Tabulation: Freq. Value
16 1
26 2
1 .
-------------------------------------------------------------------------------
study_field (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,6] Units: 1
Unique values: 6 Missing .: 18/43
Tabulation: Freq. Value
8 1
3 2
1 3
8 4
2 5
3 6
18 .
-------------------------------------------------------------------------------
education_level (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,4] Units: 1
Unique values: 4 Missing .: 0/43
Tabulation: Freq. Value
22 1
3 2
12 3
6 4
-------------------------------------------------------------------------------
employment_status (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 0/43
Tabulation: Freq. Value
30 0
13 1
-------------------------------------------------------------------------------
res (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,4] Units: 1
Unique values: 4 Missing .: 0/43
Tabulation: Freq. Value
5 1
29 2
8 3
1 4
-------------------------------------------------------------------------------
other_res (unlabeled)
-------------------------------------------------------------------------------
Type: String (str12)
Unique values: 1 Missing "": 42/43
Tabulation: Freq. Value
42 ""
1 "Private Flat"
Warning: Variable has embedded blanks.
-------------------------------------------------------------------------------
demographic_details_complete (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [2,2] Units: 1
Unique values: 1 Missing .: 0/43
Tabulation: Freq. Value
43 2
-------------------------------------------------------------------------------
dob (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric daily date (float)
Range: [10666,16299] Units: 1
Or equivalently: [15mar1989,16aug2004] Units: days
Unique values: 35 Missing .: 0/43
Mean: 14058.2 = 28jun1998(+ 6 hours)
Std. dev.: 1564.19
Percentiles: 10% 25% 50% 75% 90%
12293 12900 14257 15541 15639
28aug1993 27apr1995 13jan1999 20jul2002 26oct2002
-------------------------------------------------------------------------------
level_of_confidence_timestamp (unlabeled)
-------------------------------------------------------------------------------
Type: String (str19)
Unique values: 41 Missing "": 2/43
Examples: "2025-02-11 11:00:41"
"2025-02-11 14:25:44"
"2025-02-13 10:05:01"
"2025-02-15 17:39:19"
Warning: Variable has embedded blanks.
-------------------------------------------------------------------------------
prior_stata (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 2/43
Tabulation: Freq. Value
20 0
21 1
2 .
-------------------------------------------------------------------------------
stata_familiarity (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,5] Units: 1
Unique values: 5 Missing .: 3/43
Tabulation: Freq. Value
15 1
6 2
7 3
11 4
1 5
3 .
-------------------------------------------------------------------------------
data_management (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,4] Units: 1
Unique values: 4 Missing .: 6/43
Tabulation: Freq. Value
11 1
9 2
8 3
9 4
6 .
-------------------------------------------------------------------------------
descriptive_stats (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,5] Units: 1
Unique values: 5 Missing .: 3/43
Tabulation: Freq. Value
10 1
7 2
10 3
11 4
2 5
3 .
-------------------------------------------------------------------------------
data_visualizations (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,5] Units: 1
Unique values: 5 Missing .: 2/43
Tabulation: Freq. Value
9 1
9 2
14 3
6 4
3 5
2 .
-------------------------------------------------------------------------------
dofiles (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,5] Units: 1
Unique values: 5 Missing .: 3/43
Tabulation: Freq. Value
5 1
10 2
8 3
10 4
7 5
3 .
-------------------------------------------------------------------------------
effectiveness (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,5] Units: 1
Unique values: 5 Missing .: 3/43
Tabulation: Freq. Value
1 1
16 2
16 3
5 4
2 5
3 .
-------------------------------------------------------------------------------
need_support (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 4/43
Tabulation: Freq. Value
5 0
34 1
4 .
-------------------------------------------------------------------------------
further_training___1 (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 0/43
Tabulation: Freq. Value
17 0
26 1
-------------------------------------------------------------------------------
further_training___2 (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 0/43
Tabulation: Freq. Value
21 0
22 1
-------------------------------------------------------------------------------
further_training___3 (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 0/43
Tabulation: Freq. Value
21 0
22 1
-------------------------------------------------------------------------------
further_training___4 (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 0/43
Tabulation: Freq. Value
20 0
23 1
-------------------------------------------------------------------------------
further_training___5 (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 0/43
Tabulation: Freq. Value
32 0
11 1
-------------------------------------------------------------------------------
further_training___6 (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,0] Units: 1
Unique values: 1 Missing .: 0/43
Tabulation: Freq. Value
43 0
-------------------------------------------------------------------------------
stata_ready (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 3/43
Tabulation: Freq. Value
34 0
6 1
3 .
-------------------------------------------------------------------------------
additional_support___1 (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 0/43
Tabulation: Freq. Value
17 0
26 1
-------------------------------------------------------------------------------
additional_support___2 (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 0/43
Tabulation: Freq. Value
25 0
18 1
-------------------------------------------------------------------------------
additional_support___3 (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 0/43
Tabulation: Freq. Value
21 0
22 1
-------------------------------------------------------------------------------
additional_support___4 (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 0/43
Tabulation: Freq. Value
18 0
25 1
-------------------------------------------------------------------------------
additional_support___5 (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 0/43
Tabulation: Freq. Value
42 0
1 1
-------------------------------------------------------------------------------
other_support (unlabeled)
-------------------------------------------------------------------------------
Type: String (str45)
Unique values: 1 Missing "": 42/43
Tabulation: Freq. Value
42 ""
1 "I need to work on real projects using
STATA. "
Warning: Variable has embedded and trailing blanks.
-------------------------------------------------------------------------------
level_of_confidence_complete (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,2] Units: 1
Unique values: 2 Missing .: 0/43
Tabulation: Freq. Value
2 0
41 2
-------------------------------------------------------------------------------
_merge Matching result from merge
-------------------------------------------------------------------------------
Type: Numeric (byte)
Label: _merge
Range: [3,3] Units: 1
Unique values: 1 Missing .: 0/43
Tabulation: Freq. Numeric Label
43 3 Matched (3)
use merged5, clear
rename Sex Gender
label define Sex 1 "Male" 2 "Female" 3 "Non-binary" 5 "Prefer not to say" , replace
label values race Race
save merged5.dta, replace
variable Sex not found
r(111);
r(111);
Contains data from merged5.dta
Observations: 43
Variables: 46 11 Mar 2025 17:20
-------------------------------------------------------------------------------
Variable Storage Display Value
name type format label Variable label
-------------------------------------------------------------------------------
record_id byte %8.0g
redcap_survey~r byte %8.0g
demographic_d~p str19 %19s
name str12 %12s
surname str10 %10s
student_id long %12.0g
age byte %8.0g
gender byte %8.0g
other_gender byte %8.0g
race byte %8.0g
other_race byte %8.0g
home_language byte %8.0g
other_hl str5 %9s
english byte %8.0g
study_level byte %8.0g
study_field byte %8.0g
education_level byte %8.0g
employment_st~s byte %8.0g
res byte %8.0g
other_res str12 %12s
demographic_d~e byte %8.0g
dob float %dM_d,_CY
level_of_conf~p str19 %19s
prior_stata byte %8.0g
stata_familia~y byte %8.0g
data_management byte %8.0g
descriptive_s~s byte %8.0g
data_visualiz~s byte %8.0g
dofiles byte %8.0g
effectiveness byte %8.0g
need_support byte %8.0g
further_train~1 byte %8.0g
further_train~2 byte %8.0g
further_train~3 byte %8.0g
further_train~4 byte %8.0g
further_train~5 byte %8.0g
further_train~6 byte %8.0g
stata_ready byte %8.0g
additional_su~1 byte %8.0g
additional_su~2 byte %8.0g
additional_su~3 byte %8.0g
additional_su~4 byte %8.0g
additional_su~5 byte %8.0g
other_support str45 %45s
level_of_conf~e byte %8.0g
_merge byte %23.0g _merge Matching result from merge
-------------------------------------------------------------------------------
Sorted by: record_id
use merged5.dta, clear
label define Race 1 "White" 2 "Black" 4 "Indian"
label values race Race
save merged5.dta, replace
use merged5.dta, clear
codebook
record_id (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,43] Units: 1
Unique values: 43 Missing .: 0/43
Mean: 22
Std. dev.: 12.5565
Percentiles: 10% 25% 50% 75% 90%
5 11 22 33 39
-------------------------------------------------------------------------------
redcap_survey_identifier (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [.,.] Units: .
Unique values: 0 Missing .: 43/43
Tabulation: Freq. Value
43 .
-------------------------------------------------------------------------------
demographic_details_timestamp (unlabeled)
-------------------------------------------------------------------------------
Type: String (str19)
Unique values: 43 Missing "": 0/43
Examples: "2025-02-11 11:05:45"
"2025-02-11 21:20:27"
"2025-02-13 16:10:32"
"2025-02-15 17:37:24"
Warning: Variable has embedded blanks.
-------------------------------------------------------------------------------
name (unlabeled)
-------------------------------------------------------------------------------
Type: String (str12)
Unique values: 39 Missing "": 0/43
Examples: "Boitumelo "
"Katlie"
"Musawenkosi "
"Sylvia "
Warning: Variable has trailing blanks.
-------------------------------------------------------------------------------
surname (unlabeled)
-------------------------------------------------------------------------------
Type: String (str10)
Unique values: 34 Missing "": 0/43
Examples: "Kgobane "
"Mbele "
"Mpoto"
"Qetuka"
Warning: Variable has trailing blanks.
-------------------------------------------------------------------------------
student_id (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (long)
Range: [243577,2.152e+08] Units: 1
Unique values: 34 Missing .: 0/43
Mean: 1.7e+07
Std. dev.: 5.5e+07
Percentiles: 10% 25% 50% 75% 90%
1.1e+06 2.4e+06 2.6e+06 3.0e+06 3.0e+06
-------------------------------------------------------------------------------
age (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [20,36] Units: 1
Unique values: 14 Missing .: 0/43
Mean: 26.5814
Std. dev.: 4.34923
Percentiles: 10% 25% 50% 75% 90%
22 23 26 30 31
-------------------------------------------------------------------------------
gender (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,5] Units: 1
Unique values: 4 Missing .: 0/43
Tabulation: Freq. Value
8 1
33 2
1 3
1 5
-------------------------------------------------------------------------------
other_gender (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [.,.] Units: .
Unique values: 0 Missing .: 43/43
Tabulation: Freq. Value
43 .
-------------------------------------------------------------------------------
race (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Label: Race
Range: [1,4] Units: 1
Unique values: 3 Missing .: 0/43
Tabulation: Freq. Numeric Label
1 1 White
41 2 Black
1 4 Indian
-------------------------------------------------------------------------------
other_race (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [.,.] Units: .
Unique values: 0 Missing .: 43/43
Tabulation: Freq. Value
43 .
-------------------------------------------------------------------------------
home_language (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [2,13] Units: 1
Unique values: 11 Missing .: 0/43
Mean: 7.88372
Std. dev.: 3.33958
Percentiles: 10% 25% 50% 75% 90%
4 5 8 11 12
-------------------------------------------------------------------------------
other_hl (unlabeled)
-------------------------------------------------------------------------------
Type: String (str5)
Unique values: 1 Missing "": 42/43
Tabulation: Freq. Value
42 ""
1 "Shona"
-------------------------------------------------------------------------------
english (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,5] Units: 1
Unique values: 5 Missing .: 4/43
Tabulation: Freq. Value
1 1
6 2
13 3
13 4
6 5
4 .
-------------------------------------------------------------------------------
study_level (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,2] Units: 1
Unique values: 2 Missing .: 1/43
Tabulation: Freq. Value
16 1
26 2
1 .
-------------------------------------------------------------------------------
study_field (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,6] Units: 1
Unique values: 6 Missing .: 18/43
Tabulation: Freq. Value
8 1
3 2
1 3
8 4
2 5
3 6
18 .
-------------------------------------------------------------------------------
education_level (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,4] Units: 1
Unique values: 4 Missing .: 0/43
Tabulation: Freq. Value
22 1
3 2
12 3
6 4
-------------------------------------------------------------------------------
employment_status (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 0/43
Tabulation: Freq. Value
30 0
13 1
-------------------------------------------------------------------------------
res (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,4] Units: 1
Unique values: 4 Missing .: 0/43
Tabulation: Freq. Value
5 1
29 2
8 3
1 4
-------------------------------------------------------------------------------
other_res (unlabeled)
-------------------------------------------------------------------------------
Type: String (str12)
Unique values: 1 Missing "": 42/43
Tabulation: Freq. Value
42 ""
1 "Private Flat"
Warning: Variable has embedded blanks.
-------------------------------------------------------------------------------
demographic_details_complete (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [2,2] Units: 1
Unique values: 1 Missing .: 0/43
Tabulation: Freq. Value
43 2
-------------------------------------------------------------------------------
dob (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric daily date (float)
Range: [10666,16299] Units: 1
Or equivalently: [15mar1989,16aug2004] Units: days
Unique values: 35 Missing .: 0/43
Mean: 14058.2 = 28jun1998(+ 6 hours)
Std. dev.: 1564.19
Percentiles: 10% 25% 50% 75% 90%
12293 12900 14257 15541 15639
28aug1993 27apr1995 13jan1999 20jul2002 26oct2002
-------------------------------------------------------------------------------
level_of_confidence_timestamp (unlabeled)
-------------------------------------------------------------------------------
Type: String (str19)
Unique values: 41 Missing "": 2/43
Examples: "2025-02-11 11:00:41"
"2025-02-11 14:25:44"
"2025-02-13 10:05:01"
"2025-02-15 17:39:19"
Warning: Variable has embedded blanks.
-------------------------------------------------------------------------------
prior_stata (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 2/43
Tabulation: Freq. Value
20 0
21 1
2 .
-------------------------------------------------------------------------------
stata_familiarity (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,5] Units: 1
Unique values: 5 Missing .: 3/43
Tabulation: Freq. Value
15 1
6 2
7 3
11 4
1 5
3 .
-------------------------------------------------------------------------------
data_management (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,4] Units: 1
Unique values: 4 Missing .: 6/43
Tabulation: Freq. Value
11 1
9 2
8 3
9 4
6 .
-------------------------------------------------------------------------------
descriptive_stats (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,5] Units: 1
Unique values: 5 Missing .: 3/43
Tabulation: Freq. Value
10 1
7 2
10 3
11 4
2 5
3 .
-------------------------------------------------------------------------------
data_visualizations (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,5] Units: 1
Unique values: 5 Missing .: 2/43
Tabulation: Freq. Value
9 1
9 2
14 3
6 4
3 5
2 .
-------------------------------------------------------------------------------
dofiles (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,5] Units: 1
Unique values: 5 Missing .: 3/43
Tabulation: Freq. Value
5 1
10 2
8 3
10 4
7 5
3 .
-------------------------------------------------------------------------------
effectiveness (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [1,5] Units: 1
Unique values: 5 Missing .: 3/43
Tabulation: Freq. Value
1 1
16 2
16 3
5 4
2 5
3 .
-------------------------------------------------------------------------------
need_support (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 4/43
Tabulation: Freq. Value
5 0
34 1
4 .
-------------------------------------------------------------------------------
further_training___1 (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 0/43
Tabulation: Freq. Value
17 0
26 1
-------------------------------------------------------------------------------
further_training___2 (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 0/43
Tabulation: Freq. Value
21 0
22 1
-------------------------------------------------------------------------------
further_training___3 (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 0/43
Tabulation: Freq. Value
21 0
22 1
-------------------------------------------------------------------------------
further_training___4 (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 0/43
Tabulation: Freq. Value
20 0
23 1
-------------------------------------------------------------------------------
further_training___5 (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 0/43
Tabulation: Freq. Value
32 0
11 1
-------------------------------------------------------------------------------
further_training___6 (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,0] Units: 1
Unique values: 1 Missing .: 0/43
Tabulation: Freq. Value
43 0
-------------------------------------------------------------------------------
stata_ready (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 3/43
Tabulation: Freq. Value
34 0
6 1
3 .
-------------------------------------------------------------------------------
additional_support___1 (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 0/43
Tabulation: Freq. Value
17 0
26 1
-------------------------------------------------------------------------------
additional_support___2 (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 0/43
Tabulation: Freq. Value
25 0
18 1
-------------------------------------------------------------------------------
additional_support___3 (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 0/43
Tabulation: Freq. Value
21 0
22 1
-------------------------------------------------------------------------------
additional_support___4 (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 0/43
Tabulation: Freq. Value
18 0
25 1
-------------------------------------------------------------------------------
additional_support___5 (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,1] Units: 1
Unique values: 2 Missing .: 0/43
Tabulation: Freq. Value
42 0
1 1
-------------------------------------------------------------------------------
other_support (unlabeled)
-------------------------------------------------------------------------------
Type: String (str45)
Unique values: 1 Missing "": 42/43
Tabulation: Freq. Value
42 ""
1 "I need to work on real projects using
STATA. "
Warning: Variable has embedded and trailing blanks.
-------------------------------------------------------------------------------
level_of_confidence_complete (unlabeled)
-------------------------------------------------------------------------------
Type: Numeric (byte)
Range: [0,2] Units: 1
Unique values: 2 Missing .: 0/43
Tabulation: Freq. Value
2 0
41 2
-------------------------------------------------------------------------------
_merge Matching result from merge
-------------------------------------------------------------------------------
Type: Numeric (byte)
Label: _merge
Range: [3,3] Units: 1
Unique values: 1 Missing .: 0/43
Tabulation: Freq. Numeric Label
43 3 Matched (3)
use merged5.dta, clear
rename home_language home_language
label variable home_language "Hlanguage"
label define Hlanguage 1 "Afrikaans" 2 "English" 3 "Ndebele" 4 "Pedi" 5 "Sotho" 6 "South African Sign language" 7 "Swati" 8 "Tsonga" 9 "Tswana" 10 "Venda" 11 "Xhosa" 12 "Zulu" 13 "Other" , replace
label values home_language Hlanguage
save merged5.dta, replace
(all newnames==oldnames)
file merged5.dta saved
use merged5.dta, clear
label variable english English_Proficiency
label define English_Proficiency 1 "Novice" 2 "Intermediate" 3 "Advanced" 4 "Superior" 5 "Distinguished"
label values english English_Proficiency
save merged5.dta, replace
use merged5.dta, clear
label variable study_Level Study_Level
label define Study_Level 1 "Honours" 2 "Masters"
save merged5.dta, replace
variable study_Level not found
r(111);
r(111);