# Process to Generate the Final Output:
1) I used the Microsoft sQL Server Management Studio v18.5.1 for my analysis.
2) I uploaded data from the csv files in the folder: 'CLH_data' into the Studio for my analysis.
3) The SQL Queries used to generate the final output are shown below.
# a) For Q1, how many people responded ‘strongly disagree’ or ‘disagree’?
USE CLH_data
SELECT COUNT (*) AS "Number of People who 'Strongly Disagree' or 'Disagree' for Q1"
FROM dbo.CLH_responses_table
WHERE question_id = 'q1' AND (response = '1' OR response = '2');
# b) For Q1, how many people of each sex responded ‘strongly disagree’ or ‘disagree’?
USE CLH_data
SELECT CASE respondents.sex
WHEN '1' THEN 'Male'
WHEN '2' THEN 'Female'
ELSE respondents.sex END AS "Sex",
COUNT (responses.response) AS "Number of People who 'Strongly Disagree' or 'Disagree' for Q1"
FROM dbo.CLH_respondents_table AS respondents
INNER JOIN dbo.CLH_responses_table AS responses
ON (respondents.respondent_id = responses.respondent_id
AND respondents.study_id = responses.study_id)
WHERE question_id = 'q1' AND (response = '1' OR response = '2')
GROUP BY sex;
# c) For Q1, how many people of each sex responded:
i) ‘strongly disagree’ or ‘disagree’;
ii) ‘neutral’;
iii) ‘strongly agree’ or ‘agree’?
The final output should be contained within one table.
USE CLH_data
CREATE TABLE Question_C (sex VARCHAR, disagree INTEGER, neutral INTEGER, agree INTEGER);
INSERT INTO Question_C (sex, disagree)
SELECT respondents.sex, COUNT (responses.response)
FROM dbo.CLH_respondents_table AS respondents
INNER JOIN dbo.CLH_responses_table AS responses
ON (respondents.respondent_id = responses.respondent_id
AND respondents.study_id = responses.study_id)
WHERE question_id = 'q1' AND (response = '1' OR response = '2')
GROUP BY sex;
INSERT INTO Question_C (sex, neutral)
SELECT respondents.sex, COUNT (responses.response)
FROM dbo.CLH_respondents_table AS respondents
INNER JOIN dbo.CLH_responses_table AS responses
ON (respondents.respondent_id = responses.respondent_id
AND respondents.study_id = responses.study_id)
WHERE question_id = 'q1' AND (response = '3')
GROUP BY sex;
INSERT INTO Question_C (sex, agree)
SELECT respondents.sex, COUNT (responses.response)
FROM dbo.CLH_respondents_table AS respondents
INNER JOIN dbo.CLH_responses_table AS responses
ON (respondents.respondent_id = responses.respondent_id
AND respondents.study_id = responses.study_id)
WHERE question_id = 'q1' AND (response = '4' OR response = '5')
GROUP BY sex;
SELECT CASE Question_C.sex
WHEN '1' THEN 'Male'
WHEN '2' THEN 'Female'
ELSE Question_C.sex END AS "Sex",
max(disagree) AS "'Strongly Disagree' or 'Disagree'",
max(neutral) AS "'Neutral'",
max(agree) AS "'Strongly Agree' or 'Agree'"
FROM Question_C
GROUP BY sex;
# d) For Q3, what proportion of people selected each option?
USE CLH_data
SELECT CASE responses.question_id
WHEN 'q3_1' THEN 'Q3_1'
WHEN 'q3_2' THEN 'Q3_2'
WHEN 'q3_3' THEN 'Q3_3'
WHEN 'q3_4' THEN 'Q3_4'
WHEN 'q3_5' THEN 'Q3_5'
WHEN 'q3_6' THEN 'Q3_6'
ELSE responses.question_id END AS "Q3 Options",
COUNT (response) * 1000 / (SELECT COUNT (response) + 10 FROM dbo.CLH_responses_table)
AS "Percentage of People who Selected Each Option for Q3"
FROM dbo.CLH_responses_table AS responses
WHERE response = '1' AND (question_id = 'q3_1' OR question_id = 'q3_2'
OR question_id = 'q3_3' OR question_id = 'q3_4' OR question_id = 'q3_5'
OR question_id = 'q3_6')
GROUP BY question_id;
# e) For Q3, what proportion of people of each sex selected each option?
USE CLH_data
CREATE TABLE Question_E (options VARCHAR(255), male INTEGER, female INTEGER);
INSERT INTO Question_E (options, male)
SELECT responses.question_id,
COUNT (sex) * 200 / (SELECT COUNT (sex) * 6 FROM dbo.CLH_respondents_table)
FROM dbo.CLH_respondents_table AS respondents
INNER JOIN dbo.CLH_responses_table AS responses
ON (respondents.respondent_id = responses.respondent_id
AND respondents.study_id = responses.study_id)
WHERE sex = '1' AND response = '1'
AND (question_id = 'q3_1' OR question_id = 'q3_2'
OR question_id = 'q3_3' OR question_id = 'q3_4'
OR question_id = 'q3_5' OR question_id = 'q3_6')
GROUP BY question_id;
INSERT INTO Question_E (options, female)
SELECT responses.question_id,
COUNT (sex) * 200 / (SELECT COUNT (sex) * 6 FROM dbo.CLH_respondents_table)
FROM dbo.CLH_respondents_table AS respondents
INNER JOIN dbo.CLH_responses_table AS responses
ON (respondents.respondent_id = responses.respondent_id
AND respondents.study_id = responses.study_id)
WHERE sex = '2' AND response = '1'
AND (question_id = 'q3_1' OR question_id = 'q3_2'
OR question_id = 'q3_3' OR question_id = 'q3_4'
OR question_id = 'q3_5' OR question_id = 'q3_6')
GROUP BY question_id;
SELECT CASE options
WHEN 'q3_1' THEN 'Q3_1'
WHEN 'q3_2' THEN 'Q3_2'
WHEN 'q3_3' THEN 'Q3_3'
WHEN 'q3_4' THEN 'Q3_4'
WHEN 'q3_5' THEN 'Q3_5'
WHEN 'q3_6' THEN 'Q3_6'
ELSE options END AS "Q3 Options",
max(male) AS "Percentage of Male",
max(female) AS "Percentage of Female"
FROM Question_E
GROUP BY options;
# f) For respondents in study_102, how many people had (i) no children, (ii) 1-2 children, and (iii) 3 or more children? The final output should be contained within one table.
USE CLH_data
CREATE TABLE Question_F (nil INTEGER, one INTEGER, three INTEGER);
INSERT INTO Question_F (nil)
SELECT COUNT (*)
FROM dbo.CLH_respondents_table
WHERE study_id = '102'
AND number_of_children = '0';
INSERT INTO Question_F (one)
SELECT COUNT (*)
FROM dbo.CLH_respondents_table
WHERE study_id = '102'
AND (number_of_children= '1' OR number_of_children = '2');
INSERT INTO Question_F (three)
SELECT COUNT (*)
FROM dbo.CLH_respondents_table
WHERE study_id = '102'
AND number_of_children > '2';
SELECT max(nil) AS "No Children",
max(one) AS "1 to 2 Children",
max(three) AS "3 or More Children"
FROM Question_F;