Install/run required
libraries
| library(devtools) |
| library(DBI) |
| library(tidyverse) |
| library(RSQLite) |
| library(DescTools) |
| library(DT) |
| library(Datatablethemes) |
SQL_Connection <- dbConnect(SQLite(), "SQLite Test.db")
dbListTables(SQL_Connection)
dbListFields(SQL_Connection, "table_to_join")
## [1] "dataset_1" "table_to_join" "table_to_union"
## [1] "time" "part_of_day"
dataSet_1 <- tbl(SQL_Connection, "dataset_1")
table_to_join <- tbl(SQL_Connection, "table_to_join")
table_to_union <- tbl(SQL_Connection, "table_to_union")
dataSet_1.df <- data.frame(dataSet_1)
table_to_join.df <- data.frame(table_to_join)
table_to_union.df <- data.frame(table_to_union)
R Code
Selecting specific variables / limiting to
10 observations
dataSet_1.df %>%
select(destination, passanger) %>%
Datatablethemes::Datatable_red()
Unique Passanger values
unique(dataSet_1.df$passanger)
## [1] "Alone" "Friend(s)" "Kid(s)" "Partner"
Filtering where passanger is = ‘Alone’ or
time is = ‘2PM’ | Then arrange time by descending
dataSet_1.df %>%
filter(passanger == 'Alone' | time == '2PM') %>%
arrange(desc(time)) %>%
Datatable_green()
Changing the variable name of time to “The
Time”
dataSet_1.df %>%
select(destination, passanger, time) %>%
filter(passanger == 'Alone' | time == '2PM') %>%
arrange(desc(time)) %>%
mutate(The_Time = time) %>%
select(-time) %>%
Datatablethemes::Datatable_blue()
Aggrogate average temperature by
destination & time
dataSet_1.df %>%
group_by(destination, time) %>%
summarise(AVG_Temp = mean(temperature),
SUM_Temp = sum(temperature),
Count_Temp = length(temperature),
Count.distinct_Temp = length(unique(temperature))) %>%
filter(time != '10PM') %>%
arrange(AVG_Temp) %>%
Datatablethemes::Datatable_purple()
Union 2 tables(Stacking) | Then checking
unique values of destination to make sure it worked
table_to_union.df$age <- as.character(table_to_union.df$age)
union_table <- union(dataSet_1.df, table_to_union.df)
unique(union_table$destination)
## [1] "No Urgent Place" "Home" "Work" "UNION"
Left Join on dataset_1 table by time to
include part of day
left_join(dataSet_1.df, table_to_join.df, by = "time") %>%
select(destination, time, part_of_day) %>%
Datatablethemes::Datatable_red()
Advanced filtering
dataSet_1 %>%
filter(time %like% "%P%") %>%
as.data.frame() %>%
Datatablethemes::Datatable_green()
Between example:
dataSet_1.df %>%
select(temperature) %>%
unique() %>%
filter(between(temperature, 29, 75))
## temperature
## 1 55
## 2 30
Over & partition by
example:
dataSet_1.df %>%
group_by(weather) %>%
summarise(avg_temp_by_weather = mean(temperature)) %>%
left_join(dataSet_1.df, ., by = "weather") %>%
select(destination, weather, avg_temp_by_weather) %>%
arrange(weather) %>%
Datatablethemes::Datatable_purple()
SQL Code
Selecting specific variables / limiting to
10 observations
-- Hello
SELECT destination, passanger
FROM dataset_1
LIMIT 10;
Displaying records 1 - 10
| No Urgent Place |
Alone |
| No Urgent Place |
Friend(s) |
| No Urgent Place |
Friend(s) |
| No Urgent Place |
Friend(s) |
| No Urgent Place |
Friend(s) |
| No Urgent Place |
Friend(s) |
| No Urgent Place |
Friend(s) |
| No Urgent Place |
Kid(s) |
| No Urgent Place |
Kid(s) |
| No Urgent Place |
Kid(s) |
Unique Passanger values
SELECT DISTINCT passanger
FROM dataset_1;
4 records
| Alone |
| Friend(s) |
| Kid(s) |
| Partner |
Filtering where passanger is = ‘Alone’ or
time is = ‘2PM’ | Then arrange time by descending
SELECT *
FROM dataset_1
WHERE passanger = 'Alone'
OR time = '2PM'
ORDER BY time DESC;
Displaying records 1 - 10
| Work |
Alone |
Sunny |
55 |
7AM |
Coffee House |
2h |
Female |
21 |
Unmarried partner |
1 |
Some college - no degree |
Unemployed |
$37500 - $49999 |
|
never |
never |
|
4~8 |
1~3 |
1 |
1 |
1 |
0 |
1 |
1 |
17 |
| Work |
Alone |
Sunny |
55 |
7AM |
Bar |
1d |
Female |
21 |
Unmarried partner |
1 |
Some college - no degree |
Unemployed |
$37500 - $49999 |
|
never |
never |
|
4~8 |
1~3 |
1 |
1 |
1 |
0 |
1 |
0 |
18 |
| Work |
Alone |
Sunny |
80 |
7AM |
Restaurant(20-50) |
1d |
Female |
21 |
Unmarried partner |
1 |
Some college - no degree |
Unemployed |
$37500 - $49999 |
|
never |
never |
|
4~8 |
1~3 |
1 |
1 |
0 |
0 |
1 |
1 |
19 |
| Work |
Alone |
Sunny |
80 |
7AM |
Carry out & Take away |
2h |
Female |
21 |
Unmarried partner |
1 |
Some college - no degree |
Unemployed |
$37500 - $49999 |
|
never |
never |
|
4~8 |
1~3 |
1 |
0 |
0 |
1 |
0 |
1 |
20 |
| Work |
Alone |
Sunny |
55 |
7AM |
Restaurant(<20) |
1d |
Female |
21 |
Unmarried partner |
1 |
Some college - no degree |
Unemployed |
$37500 - $49999 |
|
never |
never |
|
4~8 |
1~3 |
1 |
0 |
0 |
0 |
1 |
0 |
21 |
| Work |
Alone |
Sunny |
55 |
7AM |
Coffee House |
2h |
Female |
21 |
Unmarried partner |
1 |
Some college - no degree |
Unemployed |
$37500 - $49999 |
|
never |
never |
|
4~8 |
1~3 |
1 |
1 |
0 |
0 |
1 |
1 |
22 |
| Work |
Alone |
Sunny |
55 |
7AM |
Coffee House |
2h |
Male |
21 |
Single |
0 |
Bachelors degree |
Architecture & Engineering |
$62500 - $74999 |
|
never |
less1 |
4~8 |
4~8 |
less1 |
1 |
1 |
1 |
0 |
1 |
0 |
39 |
| Work |
Alone |
Sunny |
55 |
7AM |
Bar |
1d |
Male |
21 |
Single |
0 |
Bachelors degree |
Architecture & Engineering |
$62500 - $74999 |
|
never |
less1 |
4~8 |
4~8 |
less1 |
1 |
1 |
1 |
0 |
1 |
1 |
40 |
| Work |
Alone |
Sunny |
80 |
7AM |
Restaurant(20-50) |
1d |
Male |
21 |
Single |
0 |
Bachelors degree |
Architecture & Engineering |
$62500 - $74999 |
|
never |
less1 |
4~8 |
4~8 |
less1 |
1 |
1 |
0 |
0 |
1 |
0 |
41 |
| Work |
Alone |
Sunny |
80 |
7AM |
Carry out & Take away |
2h |
Male |
21 |
Single |
0 |
Bachelors degree |
Architecture & Engineering |
$62500 - $74999 |
|
never |
less1 |
4~8 |
4~8 |
less1 |
1 |
0 |
0 |
1 |
0 |
1 |
42 |
Changing the variable name of time to “The
Time”
SELECT
destination ,
passanger ,
time as 'The Time'
FROM dataset_1
WHERE passanger = 'Alone'
OR time = '2PM'
ORDER BY time DESC;
Displaying records 1 - 10
| Work |
Alone |
7AM |
| Work |
Alone |
7AM |
| Work |
Alone |
7AM |
| Work |
Alone |
7AM |
| Work |
Alone |
7AM |
| Work |
Alone |
7AM |
| Work |
Alone |
7AM |
| Work |
Alone |
7AM |
| Work |
Alone |
7AM |
| Work |
Alone |
7AM |
Aggrogate average temperature by
destination & time
SELECT
destination,
time,
AVG(temperature),
SUM(temperature),
count(temperature),
COUNT(DISTINCT temperature)
FROM dataset_1 d
WHERE time <> '10PM'
GROUP BY destination, time
ORDER BY AVG(temperature);
5 records
| Work |
7AM |
59.94627 |
189670 |
3164 |
3 |
| Home |
6PM |
64.50306 |
136940 |
2123 |
3 |
| No Urgent Place |
2PM |
65.14186 |
130870 |
2009 |
3 |
| No Urgent Place |
10AM |
68.75824 |
156425 |
2275 |
3 |
| No Urgent Place |
6PM |
69.99548 |
77485 |
1107 |
3 |
Union 2 tables(Stacking) | Then checking
unique values of destination to make sure it worked
SELECT DISTINCT destination
FROM
(
SELECT *
FROM dataset_1 d
UNION
SELECT *
FROM table_to_union ttu);
4 records
| Home |
| No Urgent Place |
| UNION |
| Work |
Left Join on dataset_1 table by time to
include part of day
SELECT destination,
d.time,
ttj.part_of_day
FROM dataset_1 d
LEFT JOIN table_to_join ttj
ON d.time = ttj.time;
Displaying records 1 - 10
| No Urgent Place |
2PM |
Afternoon |
| No Urgent Place |
10AM |
Morning |
| No Urgent Place |
10AM |
Morning |
| No Urgent Place |
2PM |
Afternoon |
| No Urgent Place |
2PM |
Afternoon |
| No Urgent Place |
6PM |
Evening |
| No Urgent Place |
2PM |
Afternoon |
| No Urgent Place |
10AM |
Morning |
| No Urgent Place |
10AM |
Morning |
| No Urgent Place |
10AM |
Morning |
Advanced filtering
SELECT *
FROM dataset_1 d
WHERE d.time LIKE '%p%';
Displaying records 1 - 10
| No Urgent Place |
Alone |
Sunny |
55 |
2PM |
Restaurant(<20) |
1d |
Female |
21 |
Unmarried partner |
1 |
Some college - no degree |
Unemployed |
$37500 - $49999 |
|
never |
never |
|
4~8 |
1~3 |
1 |
0 |
0 |
0 |
1 |
1 |
1 |
| No Urgent Place |
Friend(s) |
Sunny |
80 |
2PM |
Coffee House |
2h |
Female |
21 |
Unmarried partner |
1 |
Some college - no degree |
Unemployed |
$37500 - $49999 |
|
never |
never |
|
4~8 |
1~3 |
1 |
1 |
0 |
0 |
1 |
0 |
4 |
| No Urgent Place |
Friend(s) |
Sunny |
80 |
2PM |
Coffee House |
1d |
Female |
21 |
Unmarried partner |
1 |
Some college - no degree |
Unemployed |
$37500 - $49999 |
|
never |
never |
|
4~8 |
1~3 |
1 |
1 |
0 |
0 |
1 |
0 |
5 |
| No Urgent Place |
Friend(s) |
Sunny |
80 |
6PM |
Restaurant(<20) |
2h |
Female |
21 |
Unmarried partner |
1 |
Some college - no degree |
Unemployed |
$37500 - $49999 |
|
never |
never |
|
4~8 |
1~3 |
1 |
1 |
0 |
0 |
1 |
1 |
6 |
| No Urgent Place |
Friend(s) |
Sunny |
55 |
2PM |
Carry out & Take away |
1d |
Female |
21 |
Unmarried partner |
1 |
Some college - no degree |
Unemployed |
$37500 - $49999 |
|
never |
never |
|
4~8 |
1~3 |
1 |
1 |
0 |
0 |
1 |
1 |
7 |
| No Urgent Place |
Kid(s) |
Sunny |
80 |
2PM |
Restaurant(<20) |
1d |
Female |
21 |
Unmarried partner |
1 |
Some college - no degree |
Unemployed |
$37500 - $49999 |
|
never |
never |
|
4~8 |
1~3 |
1 |
0 |
0 |
0 |
1 |
1 |
11 |
| No Urgent Place |
Kid(s) |
Sunny |
55 |
2PM |
Restaurant(<20) |
1d |
Female |
21 |
Unmarried partner |
1 |
Some college - no degree |
Unemployed |
$37500 - $49999 |
|
never |
never |
|
4~8 |
1~3 |
1 |
1 |
0 |
0 |
1 |
1 |
12 |
| No Urgent Place |
Kid(s) |
Sunny |
55 |
6PM |
Coffee House |
2h |
Female |
21 |
Unmarried partner |
1 |
Some college - no degree |
Unemployed |
$37500 - $49999 |
|
never |
never |
|
4~8 |
1~3 |
1 |
1 |
0 |
0 |
1 |
1 |
13 |
| Home |
Alone |
Sunny |
55 |
6PM |
Bar |
1d |
Female |
21 |
Unmarried partner |
1 |
Some college - no degree |
Unemployed |
$37500 - $49999 |
|
never |
never |
|
4~8 |
1~3 |
1 |
0 |
0 |
1 |
0 |
1 |
14 |
| Home |
Alone |
Sunny |
55 |
6PM |
Restaurant(20-50) |
1d |
Female |
21 |
Unmarried partner |
1 |
Some college - no degree |
Unemployed |
$37500 - $49999 |
|
never |
never |
|
4~8 |
1~3 |
1 |
1 |
0 |
0 |
1 |
1 |
15 |
Between example:
SELECT distinct temperature
FROM dataset_1 d
WHERE temperature BETWEEN 29 AND 75;
Over & partition by
example:
SELECT
destination,
weather,
AVG(temperature) OVER (PARTITION BY weather) AS 'avg_temp_by_weather'
FROM dataset_1 d;
Displaying records 1 - 10
| No Urgent Place |
Rainy |
55 |
| No Urgent Place |
Rainy |
55 |
| No Urgent Place |
Rainy |
55 |
| Work |
Rainy |
55 |
| No Urgent Place |
Rainy |
55 |
| No Urgent Place |
Rainy |
55 |
| No Urgent Place |
Rainy |
55 |
| Work |
Rainy |
55 |
| No Urgent Place |
Rainy |
55 |
| No Urgent Place |
Rainy |
55 |
Using Rank()
SELECT
destination,
weather,
Rank() OVER (PARTITION BY weather ORDER BY destination)
FROM dataset_1 d;
Displaying records 1 - 10
| Home |
Rainy |
1 |
| Home |
Rainy |
1 |
| Home |
Rainy |
1 |
| Home |
Rainy |
1 |
| Home |
Rainy |
1 |
| Home |
Rainy |
1 |
| Home |
Rainy |
1 |
| Home |
Rainy |
1 |
| Home |
Rainy |
1 |
| Home |
Rainy |
1 |