This document provides generic code examples in SQL, Python, VBA, and R (using tidyverse) that can be used in an interview to assess a candidate’s understanding of basic data manipulation and querying skills.
The following SQL queries test basic understanding of SELECT, JOIN, WHERE, GROUP BY, and aggregate functions:
-- Assuming a hypothetical database with tables: 'Children', 'Hospitals', and 'Births'.
-- 1. Select all details of children born in 2023
SELECT *
FROM Children
WHERE YearOfBirth = 2023;
-- 2. Get the count of children born in each hospital in 2023
SELECT h.HospitalName, COUNT(b.ChildID) AS NumberOfBirths
FROM Hospitals h
JOIN Births b ON h.HospitalID = b.HospitalID
WHERE b.YearOfBirth = 2023
GROUP BY h.HospitalName;
-- 3. Find all children born in a hospital with the name 'City General Hospital'
SELECT c.ChildName, c.DateOfBirth
FROM Children c
JOIN Births b ON c.ChildID = b.ChildID
JOIN Hospitals h ON b.HospitalID = h.HospitalID
WHERE h.HospitalName = 'City General Hospital';
-- 4. Calculate the average weight of children born in 2023
SELECT AVG(c.BirthWeight) AS AverageBirthWeight
FROM Children c
JOIN Births b ON c.ChildID = b.ChildID
WHERE b.YearOfBirth = 2023;
The Python code below assumes the data is in pandas DataFrames and tests basic data manipulation:
import pandas as pd
# Assuming DataFrames: children_df, hospitals_df, births_df
# 1. Select all details of children born in 2023
children_2023 = children_df[children_df['YearOfBirth'] == 2023]
print(children_2023)
# 2. Get the count of children born in each hospital in 2023
births_2023 = births_df[births_df['YearOfBirth'] == 2023]
births_count_by_hospital = births_2023.groupby('HospitalID')['ChildID'].count()
births_count_by_hospital_df = births_count_by_hospital.reset_index()
births_count_by_hospital_df = births_count_by_hospital_df.merge(hospitals_df, on='HospitalID')
print(births_count_by_hospital_df[['HospitalName', 'ChildID']])
# 3. Find all children born in a hospital with the name 'City General Hospital'
city_general_hospital = hospitals_df[hospitals_df['HospitalName'] == 'City General Hospital']
children_city_general = births_df[births_df['HospitalID'].isin(city_general_hospital['HospitalID'])]
children_city_general_details = children_city_general.merge(children_df, on='ChildID')
print(children_city_general_details[['ChildName', 'DateOfBirth']])
# 4. Calculate the average weight of children born in 2023
average_weight_2023 = children_2023['BirthWeight'].mean()
print(f'Average birth weight in 2023: {average_weight_2023}')
This VBA code assumes data is stored in Excel sheets named “Children”, “Hospitals”, and “Births”. It tests basic manipulation and data extraction.
Sub BasicSQLVBA()
' 1. Select all details of children born in 2023
Dim wsChildren As Worksheet
Set wsChildren = ThisWorkbook.Sheets("Children")
Dim lastRow As Long
lastRow = wsChildren.Cells(wsChildren.Rows.Count, "A").End(xlUp).Row
Dim i As Long
For i = 2 To lastRow
If wsChildren.Cells(i, 3).Value = 2023 Then
Debug.Print wsChildren.Cells(i, 1).Value & " " & wsChildren.Cells(i, 2).Value
End If
Next i
' 2. Get the count of children born in each hospital in 2023
Dim wsBirths As Worksheet
Dim wsHospitals As Worksheet
Set wsBirths = ThisWorkbook.Sheets("Births")
Set wsHospitals = ThisWorkbook.Sheets("Hospitals")
Dim hospitalID As String
Dim countDict As Object
Set countDict = CreateObject("Scripting.Dictionary")
lastRow = wsBirths.Cells(wsBirths.Rows.Count, "A").End(xlUp).Row
For i = 2 To lastRow
If wsBirths.Cells(i, 3).Value = 2023 Then
hospitalID = wsBirths.Cells(i, 2).Value
If countDict.exists(hospitalID) Then
countDict(hospitalID) = countDict(hospitalID) + 1
Else
countDict.Add hospitalID, 1
End If
End If
Next i
For Each key In countDict.Keys
Debug.Print "Hospital ID: " & key & " Number of Births: " & countDict(key)
Next key
' 3. Find all children born in a hospital with the name 'City General Hospital'
Dim hospitalName As String
hospitalName = "City General Hospital"
lastRow = wsHospitals.Cells(wsHospitals.Rows.Count, "A").End(xlUp).Row
For i = 2 To lastRow
If wsHospitals.Cells(i, 2).Value = hospitalName Then
hospitalID = wsHospitals.Cells(i, 1).Value
End If
Next i
lastRow = wsBirths.Cells(wsBirths.Rows.Count, "A").End(xlUp).Row
For i = 2 To lastRow
If wsBirths.Cells(i, 2).Value = hospitalID Then
Dim childID As String
childID = wsBirths.Cells(i, 1).Value
Debug.Print "Child ID: " & childID
End If
Next i
' 4. Calculate the average weight of children born in 2023
Dim totalWeight As Double
Dim count As Long
totalWeight = 0
count = 0
lastRow = wsChildren.Cells(wsChildren.Rows.Count, "A").End(xlUp).Row
For i = 2 To lastRow
If wsChildren.Cells(i, 3).Value = 2023 Then
totalWeight = totalWeight + wsChildren.Cells(i, 4).Value
count = count + 1
End If
Next i
If count > 0 Then
Debug.Print "Average Birth Weight in 2023: " & (totalWeight / count)
Else
Debug.Print "No children born in 2023"
End If
End Sub
Here’s the R code using the tidyverse package, assuming
the data is stored in data frames named children,
hospitals, and births.
library(tidyverse)
# Assuming data frames: children, hospitals, births
# 1. Select all details of children born in 2023
children_2023 <- children %>%
filter(YearOfBirth == 2023)
print(children_2023)
# 2. Get the count of children born in each hospital in 2023
births_2023 <- births %>%
filter(YearOfBirth == 2023) %>%
group_by(HospitalID) %>%
summarise(NumberOfBirths = n()) %>%
left_join(hospitals, by = "HospitalID")
print(births_2023 %>% select(HospitalName, NumberOfBirths))
# 3. Find all children born in a hospital with the name 'City General Hospital'
children_city_general <- hospitals %>%
filter(HospitalName == "City General Hospital") %>%
left_join(births, by = "HospitalID") %>%
left_join(children, by = "ChildID")
print(children_city_general %>% select(ChildName, DateOfBirth))
# 4. Calculate the average weight of children born in 2023
average_weight_2023 <- children_2023 %>%
summarise(AverageBirthWeight = mean(BirthWeight, na.rm = TRUE))
print(average_weight_2023)