options(width=100)
knitr::opts_chunk$set(out.width='1000px',dpi=200,message=FALSE,warning=FALSE)
#load packages and csv file
library(ggplot2)
library(dplyr)
library(gridExtra)
library(grid)
library(corrplot)
library(fivethirtyeight)
library(ggthemes)
library(scales)
library(igraph)
library(ggraph)
library(plyr)

1 Introduction

This dataset represents the details of each painting of Bob Ross. Bob Ross was an american painter and host of the The Joy of Painting, an instructional television program that aired from 1983 to 1994 on PBS in the United States (wikipedia).

The original and detailed analysis of this data has been published on 538 by W. Hickey. In this notebook I will look at the following features :

2 Data preparation

2.1 Cleaning

As explained in the 538 article, there is a minor cleaning to do :

  • there are episodes where Bob Ross wasn’t the painter.
  • there are episodes for which the author wasn’t able to quantify the full painting description.

Basically when a feature is present in a painting, it will be flagged by 1.

df <- bob_ross
#define incomplete paintings
incomplete <-c("PURPLE MOUNTAIN RANGE","COUNTRY CHARM","PEACEFUL REFLECTIONS")
df <- df %>% filter(guest==0 & !(title %in% incomplete))

After cleaning, there are 378 paintings and 67 features to analyze.

2.2 Imputing Date values

I found the exact dates of all episodes on this website : http://thetvdb.com/?tab=seasonall&id=79167

So to use them in this kernel I have just copied the 3 columns directly and extract the year and month number related to each episode. The idea is then to join this dataset based on the season, episode_num values.

seasonCode<-c("1 x 1","1 x 2","1 x 3","1 x 4","1 x 5","1 x 6","1 x 7","1 x 8","1 x 9","1 x 10","1 x 11","1 x 12","1 x 13","2 x 1","2 x 2","2 x 3","2 x 4","2 x 5","2 x 6","2 x 7","2 x 8","2 x 9","2 x 10","2 x 11","2 x 12","2 x 13","3 x 1","3 x 2","3 x 3","3 x 4","3 x 5","3 x 6","3 x 7","3 x 8","3 x 9","3 x 10","3 x 11","3 x 12","3 x 13","4 x 1","4 x 2","4 x 3","4 x 4","4 x 5","4 x 6","4 x 7","4 x 8","4 x 9","4 x 10","4 x 11","4 x 12","4 x 13","5 x 1","5 x 2","5 x 3","5 x 4","5 x 5","5 x 6","5 x 7","5 x 8","5 x 9","5 x 10","5 x 11","5 x 12","5 x 13","6 x 1","6 x 2","6 x 3","6 x 4","6 x 5","6 x 6","6 x 7","6 x 8","6 x 9","6 x 10","6 x 11","6 x 12","6 x 13","7 x 1","7 x 2","7 x 3","7 x 4","7 x 5","7 x 6","7 x 7","7 x 8","7 x 9","7 x 10","7 x 11","7 x 12","7 x 13","8 x 1","8 x 2","8 x 3","8 x 4","8 x 5","8 x 6","8 x 7","8 x 8","8 x 9","8 x 10","8 x 11","8 x 12","8 x 13","9 x 1","9 x 2","9 x 3","9 x 4","9 x 5","9 x 6","9 x 7","9 x 8","9 x 9","9 x 10","9 x 11","9 x 12","9 x 13","10 x 1","10 x 2","10 x 3","10 x 4","10 x 5","10 x 6","10 x 7","10 x 8","10 x 9","10 x 10","10 x 11","10 x 12","10 x 13","11 x 1","11 x 2","11 x 3","11 x 4","11 x 5","11 x 6","11 x 7","11 x 8","11 x 9","11 x 10","11 x 11","11 x 12","11 x 13","12 x 1","12 x 2","12 x 3","12 x 4","12 x 5","12 x 6","12 x 7","12 x 8","12 x 9","12 x 10","12 x 11","12 x 12","12 x 13","13 x 1","13 x 2","13 x 3","13 x 4","13 x 5","13 x 6","13 x 7","13 x 8","13 x 9","13 x 10","13 x 11","13 x 12","13 x 13","14 x 1","14 x 2","14 x 3","14 x 4","14 x 5","14 x 6","14 x 7","14 x 8","14 x 9","14 x 10","14 x 11","14 x 12","14 x 13","15 x 1","15 x 2","15 x 3","15 x 4","15 x 5","15 x 6","15 x 7","15 x 8","15 x 9","15 x 10","15 x 11","15 x 12","15 x 13","16 x 1","16 x 2","16 x 3","16 x 4","16 x 5","16 x 6","16 x 7","16 x 8","16 x 9","16 x 10","16 x 11","16 x 12","16 x 13","17 x 1","17 x 2","17 x 3","17 x 4","17 x 5","17 x 6","17 x 7","17 x 8","17 x 9","17 x 10","17 x 11","17 x 12","17 x 13","18 x 1","18 x 2","18 x 3","18 x 4","18 x 5","18 x 6","18 x 7","18 x 8","18 x 9","18 x 10","18 x 11","18 x 12","18 x 13","19 x 1","19 x 2","19 x 3","19 x 4","19 x 5","19 x 6","19 x 7","19 x 8","19 x 9","19 x 10","19 x 11","19 x 12","19 x 13","20 x 1","20 x 2","20 x 3","20 x 4","20 x 5","20 x 6","20 x 7","20 x 8","20 x 9","20 x 10","20 x 11","20 x 12","20 x 13","21 x 1","21 x 2","21 x 3","21 x 4","21 x 5","21 x 6","21 x 7","21 x 8","21 x 9","21 x 10","21 x 11","21 x 12","21 x 13","22 x 1","22 x 2","22 x 3","22 x 4","22 x 5","22 x 6","22 x 7","22 x 8","22 x 9","22 x 10","22 x 11","22 x 12","22 x 13","23 x 1","23 x 2","23 x 3","23 x 4","23 x 5","23 x 6","23 x 7","23 x 8","23 x 9","23 x 10","23 x 11","23 x 12","23 x 13","24 x 1","24 x 2","24 x 3","24 x 4","24 x 5","24 x 6","24 x 7","24 x 8","24 x 9","24 x 10","24 x 11","24 x 12","24 x 13","25 x 1","25 x 2","25 x 3","25 x 4","25 x 5","25 x 6","25 x 7","25 x 8","25 x 9","25 x 10","25 x 11","25 x 12","25 x 13","26 x 1","26 x 2","26 x 3","26 x 4","26 x 5","26 x 6","26 x 7","26 x 8","26 x 9","26 x 10","26 x 11","26 x 12","26 x 13","27 x 1","27 x 2","27 x 3","27 x 4","27 x 5","27 x 6","27 x 7","27 x 8","27 x 9","27 x 10","27 x 11","27 x 12","27 x 13","28 x 1","28 x 2","28 x 3","28 x 4","28 x 5","28 x 6","28 x 7","28 x 8","28 x 9","28 x 10","28 x 11","28 x 12","28 x 13","29 x 1","29 x 2","29 x 3","29 x 4","29 x 5","29 x 6","29 x 7","29 x 8","29 x 9","29 x 10","29 x 11","29 x 12","29 x 13","30 x 1","30 x 2","30 x 3","30 x 4","30 x 5","30 x 6","30 x 7","30 x 8","30 x 9","30 x 10","30 x 11","30 x 12","30 x 13","31 x 1","31 x 2","31 x 3","31 x 4","31 x 5","31 x 6","31 x 7","31 x 8","31 x 9","31 x 10","31 x 11","31 x 12","31 x 13")
Title<-c("A Walk In The Woods","Mt McKinley","Ebony Sunset","Winter Mist","Quiet Stream","Winter Moon","Autumn Mountain","Peaceful Valley","Seascape","Mountain Lake","Winter Glow","Snow Fall","Final Reflections","Meadow Lake","Winter Sun","Ebony Sea","Shades of Grey","Autumn Splendor","Black River","Brown Mountain","Reflections","Black and White Seascape","Lazy River","Black Waterfall","Mountain Waterfall","Final Grace","Mountain Retreat","Blue Moon","Bubbling Stream","Winter Night","Distant Hills","Covered Bridge","Quiet Inlet","Night Light","The Old Mill","Campfire","Rustic Barn","Hidden Lake","Peaceful Waters","Purple Splendor","Tranquil Valley","Majestic Mountains","Winter Sawscape","Evening Seascape","Warm Summer Day","Cabin in the Woods","Wetlands","Cool Waters","Quiet Woods","Northwest Majesty","Autumn Days","Mountain Challenge","Mountain Waterfall","Twilight Meadow","Mountain Blossoms","Winter Stillness","Quiet Pond","Ocean Sunrise","Bubbling Brook","Arizona Splendor","Anatomy of a Wave","The Windmill","Autumn Glory","Indian Girl","Meadow Stream","Blue River","Nature's Edge","Morning Mist","Whispering Stream","Secluded Forest","Snow Trail","Arctic Beauty","Horizons West","High Chateau","Country Life","Western Expanse","Marshlands","Blaze of Color","Winter Cabin","Secluded Lake","Evergreens at Sunset","Mountain Cabin","Portrait of Sally","Misty Waterfall","Barn at Sunset","Mountain Splendor","Lake by Mountain","Mountain Glory","Grey Winter","Dock Scene","Dark Waterfall","Misty Rolling Hills","Lakeside Cabin","Warm Winter Day","Waterside Way","Hunter's Haven","Bubbling Mountain Brook","Winter Hideaway","Foot of the Mountain","Majestic Pine","Cactus at Sunset","Mountain Range","Lonely Retreat","Northern Lights","Winter Evergreens","Surf's Up","Red Sunset","Meadow Road","Winter Oval","Secluded Beach","Forest Hills","Little House by the Road","Mountain Path","Country Charm","Nature's Paradise","Mountain by the Sea","Mountain Hideaway","Towering Peaks","Cabin at Sunset","Twin Falls","Secluded Bridge","Ocean Breeze","Autumn Woods","Winter Solitude","Golden Sunset","Mountain Oval","Ocean Sunset","Triple View","Winter Frost","Lakeside Cabin","Mountain Stream","Country Cabin","Daisy Delight","Hidden Stream","Towering Glacier","Oval Barn","Lakeside Path","Sunset Oval","Winter Barn","Sunset over the Waves","Golden Glow","Roadside Barn","Happy Accident","Golden Knoll","Mountain Reflections","Secluded Mountain","Bright Autumn Trees","Black Seascape","Steep Mountains","Quiet Mountains River","Evening Waterfall","Tropical Seascape","Mountain at Sunset","Soft Mountain Glow","Mountain in an Oval","Winter Mountain","Rolling Hills","Frozen Solitude","Meadow Brook","Evening at Sunset","Mountain View","Hidden Creek","Peaceful Haven","Mountain Exhibition","Emerald Waters","Mountain Summit","Cabin Hideaway","Oval Essence","Lost Lake","Distant Mountains","Meadow Brook Surprise","Mountain Moonlight Oval","Snowy Solitude","Mountain River","Graceful Mountains","Windy Waves","On a Clear Day","Riverside Escape Oval","Surprising Falls","Shadow Pond","Misty Forest Oval","Natural Wonder","Splendor of Winter","Colors of Nature","Grandpa's Barn","Peaceful Reflections","Hidden Winter Moon Oval","Waves of Wonder","Cabin by the Pond","Fall Stream","Christmas Eve Snow","Forest Down Oval","Pathway to Autumn","Deep Forest Lake","Peaks of Majesty","Two Seasons","Nestled Cabin","Wintertime Discovery","Mountain Mirage Wood Shape","Double Oval Fantasy","Contemplative Lady","Deep Woods","High Tide","Barn In Snow Oval","That Time Of Year","Waterfall Wonder","Mighty Mountain Lake","Wooded Stream Oval","Golden Mist Oval","The Old Home Place","Soothing Vista","Stormy Seas","Country Time","A Mild Winter's Day","Spectacular Waterfall","View From The Park","Lake View","Old Country Mill","Morning Walk","Nature's Splendor","Mountain Beauty","Half-Oval Vignette","Absolutely Autumn","Mountain Seclusion","Crimson Oval","Autumn Exhibition","Majestic Peaks","Golden Morning Mist","Winter Lace","Seascape Fantasy","Double Oval Stream","Enchanted Forest","Southwest Serenity","Rippling Waters","Snowfall Magic","Quiet Mountain Lake","Final Embers of Sunlight","Snowy Morn","Camper's Haven","Waterfall in the Woods","Covered Bridge Oval","Scenic Seclusion","Ebb Tide","After the Rain","Winter Elegance","Evening's Peace","Valley of Tranquility","Mystic Mountain","New Day's Dawn","Winter in Pastel","Hazy Day","Divine Elegance","Cliffside","Autumn Fantasy","The Old Oak Tree","Winter Paradise","Days Gone By","Change of Seasons","Hidden Delight","Double Take","Valley View","Tranquil Dawn","Royal Majesty","Serenity","Cabin at Trails End","Mountain Rhapsody","Wilderness Cabin","By The Sea","Indian Summer","Blue Winter","Desert Glow","Lone Mountain","Florida's Glory","Autumn Images","Hint of Springtime","Around the Bend","Countryside Oval","Russet Winter","Purple Haze","Dimensions","Deep Wilderness Home","Haven in the Valley","Wintertime Blues","Pastel Seascape","Country Creek","Silent Forest","Frosty Winter Morn","Forest Edge","Mountain Ridge Lake","Reflections of Gold","Quiet Cove","Rivers Peace","At Dawn's Light","Valley Waterfall","Toward Days End","Falls in the Glen","Frozen Beauty in Vignette","Crimson Tide","Winter Bliss","Gray Mountain","Wayside Pond","Teton Winter","Little Home in the Meadow","A Pretty Autumn Day","Mirrored Images","Back-Country Path","Graceful Waterfall","Icy Lake","Rowboat on the Beach","Portrait of Winter","The Footbridge","Snowbound Cabin","Hide A Way Cove","Enchanted Falls Oval","Not Quite Spring","Splashes Of Autumn","Summer In The Mountain","Oriental Falls","Autumn Palette","Cypress Swamp","Downstream View","Just Before The Storm","Fisherman's Paradise","Desert Hues","The Property Line","In the Stillness of Morning","Delightful Meadow Home","First Snow","Lake in the Valley","A Trace of Spring","An Arctic Winter Day","Snow Birch","Early Autumn","Tranquil Wooded Stream","Purple Mountain Range","Storm's A Comin","Sunset Aglow","Evening at the Falls","Twilight Beauty","Angler's Haven","Rustic Winter Woods","Wilderness Falls","Winter at the Farm","Daisies at Dawn","A Spectacular View","Daybreak","Island Paradise","Sunlight in the Shadows","Splendor of a Snowy Winter","Forest River","Golden Glow of Morning","Fisherman's Trail","A Warm Winter","Under Pastel Skies","Golden Rays of Sunshine","The Magic of Fall","Glacier Lake","The Old Weathered Barn","Deep Forest Falls","Winter's Grace","Splendor of Autumn","Tranquil Seas","Mountain Serenity","Home before Nightfall","Island in the Wilderness","Autumn Oval","Seasonal Progression","Light at the Summit","Countryside Barn","Mountain Lake Falls","Cypress Creek","Trapper's Cabin","Storm on the Horizon","Pot O' Posies","A Perfect Winter Day","Aurora's Dance","Woodman's Retreat","Babbling Brook","Woodgrain's View","Winter's Peace","Wilderness Trail","A Copper Winter","Misty Foothills","Through the Window","Home in the Valley","Mountains of Grace","Seaside Harmony","A Cold Spring Day","Evening's Glow","Blue Ridge Falls","Reflections of Calm","Before the Snowfall","Winding Stream","Tranquility Cove","Cabin in the Hollow","View from Clear Creek","Bridge to Autumn","Trail's End","Evergreen Valley","Balmy Beach","Lake at the Ridge","In the Midst of Winter","Wilderness Day")
Aired<-c("1/11/83","1/11/83","1/18/83","1/25/83","2/1/83","2/8/83","2/15/83","2/22/83","3/1/83","3/8/83","3/15/83","3/22/83","3/29/83","8/31/83","9/7/83","9/14/83","9/21/83","9/28/83","10/5/83","10/12/83","10/19/83","10/26/83","11/2/83","11/9/83","11/16/83","11/23/83","1/4/84","1/11/84","1/18/84","1/25/84","2/1/84","2/8/84","2/15/84","2/22/84","2/29/84","3/7/84","3/14/84","3/21/84","3/28/84","9/5/84","9/12/84","9/19/84","9/26/84","10/3/84","10/10/84","10/17/84","10/24/84","10/31/84","11/7/84","11/14/84","11/21/84","11/28/84","1/2/85","1/9/85","1/16/85","1/23/85","1/30/85","2/6/85","2/13/85","2/20/85","2/27/85","3/6/85","3/13/85","3/20/85","3/27/85","5/1/85","5/8/85","5/15/85","5/22/85","5/29/85","6/5/85","6/12/85","6/19/85","6/26/85","7/2/85","7/9/85","7/16/85","7/23/85","10/2/85","10/9/85","10/16/85","10/23/85","10/30/85","11/6/85","11/13/85","11/20/85","11/27/85","12/4/85","12/11/85","12/18/85","12/25/85","1/2/86","1/9/86","1/16/86","1/23/86","1/30/86","2/6/86","2/13/86","2/20/86","2/27/86","3/6/86","3/13/86","3/20/86","3/27/86","4/30/86","5/7/86","5/14/86","5/21/86","5/28/86","6/4/86","6/11/86","6/18/86","6/25/86","7/2/86","7/9/86","7/16/86","7/23/86","9/3/86","9/10/86","9/17/86","9/24/86","10/1/86","10/8/86","10/15/86","10/22/86","10/29/86","11/5/86","11/12/86","11/19/86","11/26/86","12/30/86","1/7/86","1/14/86","1/21/86","1/28/86","2/4/86","2/11/86","2/18/86","2/25/86","3/4/86","3/11/86","3/18/86","3/25/86","4/29/87","5/6/87","5/13/87","5/20/87","5/27/87","6/3/87","6/10/87","6/17/87","6/24/87","7/1/87","7/8/87","7/15/87","7/22/87","9/2/87","9/9/87","9/16/87","9/23/87","9/30/87","10/7/87","10/14/87","10/21/87","10/28/87","11/4/87","11/11/87","11/18/87","11/25/87","12/30/87","1/6/88","1/13/88","1/20/88","1/27/88","2/3/88","2/10/88","2/17/88","2/24/88","3/2/88","3/9/88","3/16/88","3/23/88","4/27/88","5/4/88","5/11/88","5/18/88","5/25/88","6/1/88","6/8/88","6/15/88","6/22/88","6/29/88","7/6/88","7/13/88","7/20/88","7/17/88","7/24/88","7/31/88","8/7/88","8/14/88","8/21/88","8/28/88","9/4/88","9/11/88","9/18/88","9/25/88","10/2/88","10/9/88","1/4/89","1/11/89","1/18/89","1/25/89","2/1/89","2/8/89","2/15/89","2/22/89","3/1/89","3/8/89","3/15/89","3/22/89","3/29/89","7/5/89","7/12/89","7/19/89","7/26/89","8/2/89","8/9/89","8/16/89","8/23/89","8/30/89","9/6/89","9/13/89","9/20/89","9/27/89","1/3/90","1/10/90","1/17/90","1/24/90","1/31/90","2/7/90","2/14/90","2/21/90","2/28/90","3/7/90","3/14/90","3/21/90","3/28/90","4/4/90","4/11/90","4/18/90","4/25/90","5/2/90","5/9/90","5/16/90","5/23/90","5/30/90","6/6/90","6/13/90","6/20/90","6/27/90","9/5/90","9/12/90","9/19/90","9/26/90","10/3/90","10/10/90","10/17/90","10/24/90","10/31/90","11/7/90","11/14/90","11/21/90","11/28/90","1/1/91","1/8/91","1/15/91","1/22/91","1/29/91","2/5/91","2/12/91","2/19/91","2/26/91","3/5/91","3/12/91","3/19/91","3/26/91","9/3/91","9/10/91","9/17/91","9/24/91","10/1/91","10/8/91","10/15/91","10/22/91","10/29/91","11/5/91","11/12/91","11/19/91","11/26/91","1/7/92","1/14/92","1/21/92","1/28/92","2/4/92","2/11/92","2/18/92","2/25/92","3/3/92","3/10/92","3/17/92","3/24/92","3/31/92","8/25/92","9/1/92","9/8/92","9/15/92","9/22/92","9/29/92","10/6/92","10/13/92","10/20/92","10/27/92","11/3/92","11/10/92","11/17/92","12/1/92","12/8/92","12/15/92","12/22/92","12/29/92","1/5/93","1/12/93","1/19/93","1/26/93","2/2/93","2/9/93","2/16/93","2/23/93","3/2/93","3/9/93","3/16/93","3/23/93","3/30/93","4/6/93","4/13/93","4/20/93","4/27/93","5/4/93","5/11/93","5/18/93","5/20/93","5/25/93","6/1/93","6/8/93","6/15/93","6/22/93","6/29/93","7/6/93","7/13/93","7/20/93","7/27/93","8/3/93","8/10/93","8/17/93","8/24/93","8/31/93","9/7/93","9/14/93","9/21/93","9/28/93","10/5/93","10/12/93","10/19/93","10/26/93","11/2/93","11/9/93","11/16/93","11/23/93","11/30/93","12/7/93","12/14/93","12/21/93","12/28/93","1/4/94","1/11/94","1/18/94","1/25/94","2/1/94","2/8/94","2/15/94","2/22/94","3/1/94","3/8/94","3/15/94","3/22/94","3/29/94","4/5/94","4/12/94","4/19/94","4/26/94","5/3/94","5/10/94","5/17/94")

listEp<-data.frame("seasonCode"=seasonCode,"Title"=Title,"Aired"=Aired)
str(listEp)
## 'data.frame':    403 obs. of  3 variables:
##  $ seasonCode: Factor w/ 403 levels "1 x 1","1 x 10",..: 1 6 7 8 9 10 11 12 13 2 ...
##  $ Title     : Factor w/ 401 levels "A Cold Spring Day",..: 8 231 104 385 269 386 25 255 286 211 ...
##  $ Aired     : Factor w/ 402 levels "1/1/91","1/10/90",..: 3 3 15 29 146 190 157 170 195 243 ...
listEp$seasonCode<-as.character(listEp$seasonCode)

#function to extract 1st and last character (a digit)from a string a convert them to numeric

decode<-function(x,num){
  mystr<-strsplit(x, "[^[:digit:]]")
  return(as.numeric(unlist(mystr))[num])
}

listEp$season<-sapply(listEp$seasonCode,FUN=decode,num=1)
listEp$episode_num<-sapply(listEp$seasonCode,FUN=decode,num=4)
listEp$Aired<-as.character(listEp$Aired)
listEp$Date<-as.Date(listEp$Aired, format = "%m/%d/%y")
listEp$year<-as.integer(format(as.Date(listEp$Date,'%m/%d/%Y'),'%Y'))
listEp$month<-as.integer(format(as.Date(listEp$Date,'%m/%d/%Y'),'%m'))
mymonths <- c("January","February","March","April","May","June","July","August","September","October","November","December")
listEp$MonthAbb <- mymonths[ listEp$month ]
listEp$month_ordered <- factor(listEp$MonthAbb, levels = month.name)

head(listEp) 
##   seasonCode               Title   Aired season episode_num       Date year month MonthAbb
## 1      1 x 1 A Walk In The Woods 1/11/83      1           1 1983-01-11 1983     1  January
## 2      1 x 2         Mt McKinley 1/11/83      1           2 1983-01-11 1983     1  January
## 3      1 x 3        Ebony Sunset 1/18/83      1           3 1983-01-18 1983     1  January
## 4      1 x 4         Winter Mist 1/25/83      1           4 1983-01-25 1983     1  January
## 5      1 x 5        Quiet Stream  2/1/83      1           5 1983-02-01 1983     2 February
## 6      1 x 6         Winter Moon  2/8/83      1           6 1983-02-08 1983     2 February
##   month_ordered
## 1       January
## 2       January
## 3       January
## 4       January
## 5      February
## 6      February

3 Global plots

We can make:

x<-data.frame('number'= colSums(df[,5:71],dims=1))
features<-rownames(x)
rownames(x)<-1:nrow(x)
x$features<-features
totEpisode<-nrow(df)
totFeatures<-sum(x$number)

x$PercentageEp <- x$number / totEpisode * 100
x$PercentEp <-paste0(round(x$PercentageEp,1),"")
x$PercentageFeat <- x$number / totFeatures * 100
x$PercentFeat <-paste0(round(x$PercentageFeat,1),"")

3.1 Feature frequency over all episodes

ggplot(data=filter(x,number>10),aes(x=reorder(features,PercentageEp),y=PercentageEp,fill=PercentageEp)) + 
  geom_bar(stat='identity') + geom_text(aes(label=PercentEp), position=position_dodge(width=0.9), vjust=.5,hjust=0.1,size=2.5,color='#F21A00') +
  scale_fill_gradient(low="#EBCC2A",high="#3B9AB2") + 
  ylim(0,100)+ xlab('') + ylab('') + coord_polar(theta="y") + 
  geom_text(data = filter(x,number>10), hjust = 1, size = 2.75, aes(x = features, y = 0, label = features)) + 
  ggtitle("Feature's frequency over all episodes") + theme(legend.position="none",
panel.grid.major = element_blank(),panel.grid.minor = element_blank(),axis.line = element_blank(),axis.text.y = element_blank(),axis.text.x = element_blank(),axis.ticks = element_blank(),plot.title = element_text(hjust = 0.5,family="sans", face="bold",size=12))

ggplot(data=x,aes(x=reorder(features,-PercentageEp),y=PercentageEp,fill=PercentageEp)) + 
  geom_bar(stat='identity') + scale_fill_gradient(low="#EBCC2A",high="#3B9AB2") +
  theme(axis.text.x = element_text(angle=90, hjust=1),legend.position="none") + 
  xlab('') + ylab('') 

  • the 2 most frequent features are tree and trees with respectively 91% and 85% (for a grand total of 378 painting)
  • the 2 most painted features (over the 67 in the labelled in the dataset) are again tree and trees with respectively ~11% and ~10% of all features

3.2 Features percentage over all paintings

ggplot(data=filter(x,number>10),aes(x=reorder(features,PercentageFeat),y=PercentageFeat,fill=PercentageFeat)) + 
  geom_bar(stat='identity') + geom_text(aes(label=PercentFeat), position=position_dodge(width=0.9), vjust=.5,hjust=0.1,size=2.5,color='#F21A00') +
  scale_fill_gradient(low="#EBCC2A",high="#3B9AB2") + 
  ylim(0,13)+ xlab('') + ylab('') + coord_polar(theta="y") + 
  geom_text(data = filter(x,number>10), hjust = 1, size = 2.75, aes(x = features, y = 0, label = features)) + 
  ggtitle("Feature's percentage over all paintings") + theme(legend.position="none",
panel.grid.major = element_blank(),panel.grid.minor = element_blank(),axis.line = element_blank(),axis.text.y = element_blank(),axis.text.x = element_blank(),axis.ticks = element_blank(),plot.title = element_text(hjust = 0.5,family="sans", face="bold",size=12))

ggplot(data=x,aes(x=reorder(features,-PercentageFeat),y=PercentageFeat,fill=PercentageFeat)) + 
  geom_bar(stat='identity') + scale_fill_gradient(low="#EBCC2A",high="#3B9AB2") +
  theme(axis.text.x = element_text(angle=90, hjust=1),legend.position="none") + 
  xlab('') + ylab('') 

#df %>% mutate(sumVar = rowSums(.[5:71])) -> df2

3.3 Correlation

For the following plots, since a row with no entries causes a standard deviation = 0, I select the features based on their frequency and remove the features that do no appear at all

#remove feature with 0 entries causing a standard deviation = 0 
posFeatures <- x %>% filter(number>0) %>% arrange(-number)
pos <- posFeatures$features
df2 <- select_(df, .dots = pos)
#df2 <- df %>% select(-c(lakes))
num.cols <- sapply(df2, is.numeric)
cor.data <- cor(df2[,num.cols])
corrPLOT<-corrplot(cor.data,method='square',order="AOE")

comments next tab

3.4 Correlation of principal features (n>1)

topFeatures <- x %>% filter(number>1) %>% arrange(-number)
top <- topFeatures$features
df3 <- select_(df, .dots = top)
num2.cols <- sapply(df3, is.numeric)
cor2.data <- cor(df3[,num2.cols])
corrPLOT2<-corrplot(cor2.data,method='square',order="AOE")

comments :

  • there are some obvious positive correlation , like between tree and trees
  • there are some obvious negative correlation , like between tree and ocean, or tree and beach

3.5 Paintings with the most and less features

We can make a new column that will hold the sum (over painting) of all its features

summaryPainting <- df %>% mutate(featuresSum = rowSums(.[5:71])) %>% arrange(-featuresSum) %>% select(episode,season,episode_num,title,featuresSum)
head(summaryPainting,3)
## # A tibble: 3 x 5
##   episode season episode_num                   title featuresSum
##     <chr>  <dbl>       <dbl>                   <chr>       <dbl>
## 1  S02E04      2           4          SHADES OF GREY          14
## 2  S12E12     12          12    MOUNTAIN IN AND OVAL          14
## 3  S15E05     15           5 HIDDEN WINTER MOON OVAL          14
tail(summaryPainting,3)
## # A tibble: 3 x 5
##   episode season episode_num         title featuresSum
##     <chr>  <dbl>       <dbl>         <chr>       <dbl>
## 1  S24E02     24           2  WAYSIDE POND           4
## 2  S28E05     28           5 MAGIC OF FALL           4
## 3  S25E08     25           8 CYPRESS SWAMP           3

3.5.1 Most features

The painting with the most features is :

summaryPainting[1,]
## # A tibble: 1 x 5
##   episode season episode_num          title featuresSum
##     <chr>  <dbl>       <dbl>          <chr>       <dbl>
## 1  S02E04      2           4 SHADES OF GREY          14

This episode was very special because the painting was done for a blind man that Bob Ross met and told him that he could not paint hence could only see gray tones (source)

3.5.2 Less features

The painting with the less number of features is :

summaryPainting[nrow(summaryPainting),]
## # A tibble: 1 x 5
##   episode season episode_num         title featuresSum
##     <chr>  <dbl>       <dbl>         <chr>       <dbl>
## 1  S25E08     25           8 CYPRESS SWAMP           3

4 Time dependant analysis

With the dataframe containing the date information, we can make some further analysis. For exmaples :

Let’s first look at all features :

timeData<-inner_join(summaryPainting,listEp, by=c('season','episode_num'))
head(timeData)
## # A tibble: 6 x 13
##   episode season episode_num                   title featuresSum seasonCode                   Title
##     <chr>  <dbl>       <dbl>                   <chr>       <dbl>      <chr>                  <fctr>
## 1  S02E04      2           4          SHADES OF GREY          14      2 x 4          Shades of Grey
## 2  S12E12     12          12    MOUNTAIN IN AND OVAL          14    12 x 12     Mountain in an Oval
## 3  S15E05     15           5 HIDDEN WINTER MOON OVAL          14     15 x 5 Hidden Winter Moon Oval
## 4  S01E06      1           6             WINTER MOON          13      1 x 6             Winter Moon
## 5  S08E13      8          13         NORTHERN LIGHTS          13     8 x 13         Northern Lights
## 6  S10E11     10          11             TRIPLE VIEW          13    10 x 11             Triple View
## # ... with 6 more variables: Aired <chr>, Date <date>, year <int>, month <int>, MonthAbb <chr>,
## #   month_ordered <fctr>
ggplot(data = timeData, aes(x=factor(year),y=month_ordered)) + geom_tile(aes(fill = featuresSum),colour = "white")+xlab('') + ylab('') + scale_fill_gradient(low="gray",high="#C93312") + theme_fivethirtyeight() 

comments :

4.1 Seasonality

In the next plots I will select only features evoquing Summer or Winter.

timeDataAll<-inner_join(df,listEp, by=c('season','episode_num'))
winterLike <- timeDataAll %>% select(year, month_ordered,snow,snowy_mountain,winter) %>% mutate(featuresWinter = rowSums(.[3:5]))
summerLike <- timeDataAll %>% select(year, month_ordered,beach,lighthouse,palm_trees,ocean) %>% mutate(featuresSummer = rowSums(.[3:5]))

g1<-ggplot(data = winterLike, aes(x=factor(year),y=month_ordered)) + geom_tile(aes(fill = featuresWinter),colour = "white")+xlab('') + ylab('') + scale_fill_gradient(low="gray",high="#3B9AB2")
g2<-ggplot(data = summerLike, aes(x=factor(year),y=month_ordered)) + geom_tile(aes(fill = featuresSummer),colour = "white")+xlab('') + ylab('') + scale_fill_gradient(low="gray",high="#EBCC2A")

grid.draw(rbind(ggplotGrob(g1), ggplotGrob(g2), size = "last"))

5 Network analysis

The idea of a graph is to show, per paintings, the connections between the features paint. Technically, it means to keep only the features that have a value 1, and then loop over these entries and save all the combinations

#function to loop an array of X features and return a DF with feautre_1 | feature_2
makeConnection<-function(x){
    feature_1<-c()
    feature_2<-c()
    cnt<-1
    for(i in 1:(nrow(x)-1)){
        for(j in (i+1):(nrow(x))){
            #cat(x[i,2]," ",x[j,2],"\n")
            feature_1[cnt]<-(x[i,2])
            feature_2[cnt]<-(x[j,2])
            cnt<-cnt+1
            }
    }
    res<-data.frame("feature_1"=feature_1,"feature_2"=feature_2)
    return(res)
}

Test with the first 2 rows :

#create empty DF to store the results
tempo<-data.frame("feature_1"= character(),"feature_2"=character())

#loop over the fist 2 rows-paintings
for(i in 1:2){
    current<-data.frame(df[i,5:71])
    x<-data.frame(t(current))
    colnames(x)<-'number'
    features<-rownames(x)
    rownames(x)<-1:nrow(x)  
    x$features<-features
    xpos<-x %>% filter(number>0)
    res<-makeConnection(xpos)
    tempo<-rbind(tempo,res)
}

The interesting thing is that we can apply some weights to the graph. The weights are based on the frequency of the connection between 2 features

xx<-plyr::count(tempo, vars = c("feature_1","feature_2"))
colnames(xx)[3]<-'weight'

The example above has 51 entries initially but only the connection tree -- trees has a count(frequency) = 2, so we can pass this as a weight for a better visualization.

yy<-graph.data.frame(xx)
#E(yy)$weight
plot(yy, vertex.shape="none", edge.arrow.size=.4, edge.curved=.1, vertex.label.font=1, vertex.label.color="blue",vertex.label.cex=.8, edge.color="black",edge.width=E(yy)$weight)

comments :


History :