Install/run required libraries

library(devtools)
library(DBI)
library(tidyverse)
library(RSQLite)
library(DescTools)
library(DT)
library(Datatablethemes)
SQL_Connection <- dbConnect(SQLite(), "SQLite Test.db")
dbListTables(SQL_Connection)
dbListFields(SQL_Connection, "table_to_join")
## [1] "dataset_1"      "table_to_join"  "table_to_union"
## [1] "time"        "part_of_day"
dataSet_1 <- tbl(SQL_Connection, "dataset_1")
table_to_join <- tbl(SQL_Connection, "table_to_join")
table_to_union <- tbl(SQL_Connection, "table_to_union")

dataSet_1.df <- data.frame(dataSet_1)
table_to_join.df <- data.frame(table_to_join)
table_to_union.df <- data.frame(table_to_union)

R Code

Selecting specific variables / limiting to 10 observations

dataSet_1.df %>% 
  select(destination, passanger) %>% 
  Datatablethemes::Datatable_red()

Unique Passanger values

unique(dataSet_1.df$passanger)
## [1] "Alone"     "Friend(s)" "Kid(s)"    "Partner"

Filtering where passanger is = ‘Alone’ or time is = ‘2PM’ | Then arrange time by descending

dataSet_1.df %>% 
  filter(passanger == 'Alone' | time == '2PM') %>% 
  arrange(desc(time)) %>% 
  Datatable_green()

Changing the variable name of time to “The Time”

dataSet_1.df %>% 
  select(destination, passanger, time) %>% 
  filter(passanger == 'Alone' | time == '2PM') %>% 
  arrange(desc(time)) %>% 
  mutate(The_Time = time) %>% 
  select(-time) %>% 
  Datatablethemes::Datatable_blue()

Aggrogate average temperature by destination & time

dataSet_1.df %>%
  group_by(destination, time) %>% 
  summarise(AVG_Temp = mean(temperature), 
            SUM_Temp = sum(temperature),
            Count_Temp = length(temperature),
            Count.distinct_Temp = length(unique(temperature))) %>% 
  filter(time != '10PM') %>% 
  arrange(AVG_Temp) %>% 
  Datatablethemes::Datatable_purple()

Union 2 tables(Stacking) | Then checking unique values of destination to make sure it worked

table_to_union.df$age <- as.character(table_to_union.df$age)
union_table <- union(dataSet_1.df, table_to_union.df)

unique(union_table$destination)
## [1] "No Urgent Place" "Home"            "Work"            "UNION"

Left Join on dataset_1 table by time to include part of day

left_join(dataSet_1.df, table_to_join.df, by = "time") %>% 
  select(destination, time, part_of_day) %>% 
  Datatablethemes::Datatable_red()

Advanced filtering

dataSet_1 %>% 
  filter(time %like% "%P%") %>% 
  as.data.frame() %>% 
  Datatablethemes::Datatable_green()

Between example:

dataSet_1.df %>% 
  select(temperature) %>% 
  unique() %>% 
  filter(between(temperature, 29, 75))
##   temperature
## 1          55
## 2          30

Over & partition by example:

dataSet_1.df %>% 
  group_by(weather) %>% 
  summarise(avg_temp_by_weather = mean(temperature)) %>% 
  left_join(dataSet_1.df, ., by = "weather") %>% 
  select(destination, weather, avg_temp_by_weather) %>% 
  arrange(weather) %>% 
  Datatablethemes::Datatable_purple()

SQL Code

Selecting specific variables / limiting to 10 observations

-- Hello
SELECT destination, passanger
FROM dataset_1
LIMIT 10;
Displaying records 1 - 10
destination passanger
No Urgent Place Alone
No Urgent Place Friend(s)
No Urgent Place Friend(s)
No Urgent Place Friend(s)
No Urgent Place Friend(s)
No Urgent Place Friend(s)
No Urgent Place Friend(s)
No Urgent Place Kid(s)
No Urgent Place Kid(s)
No Urgent Place Kid(s)

Unique Passanger values

SELECT DISTINCT passanger
FROM dataset_1;
4 records
passanger
Alone
Friend(s)
Kid(s)
Partner

Filtering where passanger is = ‘Alone’ or time is = ‘2PM’ | Then arrange time by descending

SELECT *
FROM dataset_1
WHERE passanger = 'Alone'
OR time = '2PM'
ORDER BY time DESC;
Displaying records 1 - 10
destination passanger weather temperature time coupon expiration gender age maritalStatus has_children education occupation income car Bar CoffeeHouse CarryAway RestaurantLessThan20 Restaurant20To50 toCoupon_GEQ5min toCoupon_GEQ15min toCoupon_GEQ25min direction_same direction_opp Y row_count
Work Alone Sunny 55 7AM Coffee House 2h Female 21 Unmarried partner 1 Some college - no degree Unemployed $37500 - $49999 never never 4~8 1~3 1 1 1 0 1 1 17
Work Alone Sunny 55 7AM Bar 1d Female 21 Unmarried partner 1 Some college - no degree Unemployed $37500 - $49999 never never 4~8 1~3 1 1 1 0 1 0 18
Work Alone Sunny 80 7AM Restaurant(20-50) 1d Female 21 Unmarried partner 1 Some college - no degree Unemployed $37500 - $49999 never never 4~8 1~3 1 1 0 0 1 1 19
Work Alone Sunny 80 7AM Carry out & Take away 2h Female 21 Unmarried partner 1 Some college - no degree Unemployed $37500 - $49999 never never 4~8 1~3 1 0 0 1 0 1 20
Work Alone Sunny 55 7AM Restaurant(<20) 1d Female 21 Unmarried partner 1 Some college - no degree Unemployed $37500 - $49999 never never 4~8 1~3 1 0 0 0 1 0 21
Work Alone Sunny 55 7AM Coffee House 2h Female 21 Unmarried partner 1 Some college - no degree Unemployed $37500 - $49999 never never 4~8 1~3 1 1 0 0 1 1 22
Work Alone Sunny 55 7AM Coffee House 2h Male 21 Single 0 Bachelors degree Architecture & Engineering $62500 - $74999 never less1 4~8 4~8 less1 1 1 1 0 1 0 39
Work Alone Sunny 55 7AM Bar 1d Male 21 Single 0 Bachelors degree Architecture & Engineering $62500 - $74999 never less1 4~8 4~8 less1 1 1 1 0 1 1 40
Work Alone Sunny 80 7AM Restaurant(20-50) 1d Male 21 Single 0 Bachelors degree Architecture & Engineering $62500 - $74999 never less1 4~8 4~8 less1 1 1 0 0 1 0 41
Work Alone Sunny 80 7AM Carry out & Take away 2h Male 21 Single 0 Bachelors degree Architecture & Engineering $62500 - $74999 never less1 4~8 4~8 less1 1 0 0 1 0 1 42

Changing the variable name of time to “The Time”

SELECT
destination ,
passanger ,
time as 'The Time'
FROM dataset_1
WHERE passanger = 'Alone'
OR time = '2PM'
ORDER BY time DESC;
Displaying records 1 - 10
destination passanger The Time
Work Alone 7AM
Work Alone 7AM
Work Alone 7AM
Work Alone 7AM
Work Alone 7AM
Work Alone 7AM
Work Alone 7AM
Work Alone 7AM
Work Alone 7AM
Work Alone 7AM

Aggrogate average temperature by destination & time

SELECT 
destination, 
time,
AVG(temperature),
SUM(temperature),
count(temperature),
COUNT(DISTINCT temperature)
FROM dataset_1 d
WHERE time <> '10PM'
GROUP BY destination, time
ORDER BY AVG(temperature);
5 records
destination time AVG(temperature) SUM(temperature) count(temperature) COUNT(DISTINCT temperature)
Work 7AM 59.94627 189670 3164 3
Home 6PM 64.50306 136940 2123 3
No Urgent Place 2PM 65.14186 130870 2009 3
No Urgent Place 10AM 68.75824 156425 2275 3
No Urgent Place 6PM 69.99548 77485 1107 3

Union 2 tables(Stacking) | Then checking unique values of destination to make sure it worked

SELECT DISTINCT destination 
FROM
(
SELECT *
FROM dataset_1 d 
UNION
SELECT *
FROM table_to_union ttu);
4 records
destination
Home
No Urgent Place
UNION
Work

Left Join on dataset_1 table by time to include part of day

SELECT destination, 
d.time, 
ttj.part_of_day
FROM dataset_1 d 
LEFT JOIN table_to_join ttj 
ON d.time = ttj.time;
Displaying records 1 - 10
destination time part_of_day
No Urgent Place 2PM Afternoon
No Urgent Place 10AM Morning
No Urgent Place 10AM Morning
No Urgent Place 2PM Afternoon
No Urgent Place 2PM Afternoon
No Urgent Place 6PM Evening
No Urgent Place 2PM Afternoon
No Urgent Place 10AM Morning
No Urgent Place 10AM Morning
No Urgent Place 10AM Morning

Advanced filtering

SELECT *
FROM dataset_1 d
WHERE d.time LIKE '%p%';
Displaying records 1 - 10
destination passanger weather temperature time coupon expiration gender age maritalStatus has_children education occupation income car Bar CoffeeHouse CarryAway RestaurantLessThan20 Restaurant20To50 toCoupon_GEQ5min toCoupon_GEQ15min toCoupon_GEQ25min direction_same direction_opp Y row_count
No Urgent Place Alone Sunny 55 2PM Restaurant(<20) 1d Female 21 Unmarried partner 1 Some college - no degree Unemployed $37500 - $49999 never never 4~8 1~3 1 0 0 0 1 1 1
No Urgent Place Friend(s) Sunny 80 2PM Coffee House 2h Female 21 Unmarried partner 1 Some college - no degree Unemployed $37500 - $49999 never never 4~8 1~3 1 1 0 0 1 0 4
No Urgent Place Friend(s) Sunny 80 2PM Coffee House 1d Female 21 Unmarried partner 1 Some college - no degree Unemployed $37500 - $49999 never never 4~8 1~3 1 1 0 0 1 0 5
No Urgent Place Friend(s) Sunny 80 6PM Restaurant(<20) 2h Female 21 Unmarried partner 1 Some college - no degree Unemployed $37500 - $49999 never never 4~8 1~3 1 1 0 0 1 1 6
No Urgent Place Friend(s) Sunny 55 2PM Carry out & Take away 1d Female 21 Unmarried partner 1 Some college - no degree Unemployed $37500 - $49999 never never 4~8 1~3 1 1 0 0 1 1 7
No Urgent Place Kid(s) Sunny 80 2PM Restaurant(<20) 1d Female 21 Unmarried partner 1 Some college - no degree Unemployed $37500 - $49999 never never 4~8 1~3 1 0 0 0 1 1 11
No Urgent Place Kid(s) Sunny 55 2PM Restaurant(<20) 1d Female 21 Unmarried partner 1 Some college - no degree Unemployed $37500 - $49999 never never 4~8 1~3 1 1 0 0 1 1 12
No Urgent Place Kid(s) Sunny 55 6PM Coffee House 2h Female 21 Unmarried partner 1 Some college - no degree Unemployed $37500 - $49999 never never 4~8 1~3 1 1 0 0 1 1 13
Home Alone Sunny 55 6PM Bar 1d Female 21 Unmarried partner 1 Some college - no degree Unemployed $37500 - $49999 never never 4~8 1~3 1 0 0 1 0 1 14
Home Alone Sunny 55 6PM Restaurant(20-50) 1d Female 21 Unmarried partner 1 Some college - no degree Unemployed $37500 - $49999 never never 4~8 1~3 1 1 0 0 1 1 15

Between example:

SELECT distinct temperature
FROM dataset_1 d
WHERE temperature BETWEEN 29 AND 75;
2 records
temperature
55
30

Over & partition by example:

SELECT 
destination,
weather,
AVG(temperature) OVER (PARTITION BY weather) AS 'avg_temp_by_weather' 
FROM dataset_1 d;
Displaying records 1 - 10
destination weather avg_temp_by_weather
No Urgent Place Rainy 55
No Urgent Place Rainy 55
No Urgent Place Rainy 55
Work Rainy 55
No Urgent Place Rainy 55
No Urgent Place Rainy 55
No Urgent Place Rainy 55
Work Rainy 55
No Urgent Place Rainy 55
No Urgent Place Rainy 55

Using Rank()

SELECT 
destination,
weather,
Rank() OVER (PARTITION BY weather ORDER BY destination)
FROM dataset_1 d;
Displaying records 1 - 10
destination weather Rank() OVER (PARTITION BY weather ORDER BY destination)
Home Rainy 1
Home Rainy 1
Home Rainy 1
Home Rainy 1
Home Rainy 1
Home Rainy 1
Home Rainy 1
Home Rainy 1
Home Rainy 1
Home Rainy 1