options(width=100)
knitr::opts_chunk$set(out.width='1000px',dpi=200,message=FALSE,warning=FALSE)
#load packages and csv file
library(ggplot2)
library(dplyr)
library(gridExtra)
library(grid)
library(corrplot)
library(fivethirtyeight)
library(ggthemes)
library(scales)
library(igraph)
library(ggraph)
library(plyr)
This dataset represents the details of each painting of Bob Ross. Bob Ross was an american painter and host of the The Joy of Painting, an instructional television program that aired from 1983 to 1994 on PBS in the United States (wikipedia).
The original and detailed analysis of this data has been published on 538 by W. Hickey. In this notebook I will look at the following features :
As explained in the 538
article, there is a minor cleaning to do :
Basically when a feature is present in a painting, it will be flagged by 1
.
df <- bob_ross
#define incomplete paintings
incomplete <-c("PURPLE MOUNTAIN RANGE","COUNTRY CHARM","PEACEFUL REFLECTIONS")
df <- df %>% filter(guest==0 & !(title %in% incomplete))
After cleaning, there are 378 paintings and 67 features to analyze.
I found the exact dates of all episodes on this website : http://thetvdb.com/?tab=seasonall&id=79167
So to use them in this kernel I have just copied the 3 columns directly and extract the year
and month
number related to each episode. The idea is then to join this dataset based on the season
, episode_num
values.
seasonCode<-c("1 x 1","1 x 2","1 x 3","1 x 4","1 x 5","1 x 6","1 x 7","1 x 8","1 x 9","1 x 10","1 x 11","1 x 12","1 x 13","2 x 1","2 x 2","2 x 3","2 x 4","2 x 5","2 x 6","2 x 7","2 x 8","2 x 9","2 x 10","2 x 11","2 x 12","2 x 13","3 x 1","3 x 2","3 x 3","3 x 4","3 x 5","3 x 6","3 x 7","3 x 8","3 x 9","3 x 10","3 x 11","3 x 12","3 x 13","4 x 1","4 x 2","4 x 3","4 x 4","4 x 5","4 x 6","4 x 7","4 x 8","4 x 9","4 x 10","4 x 11","4 x 12","4 x 13","5 x 1","5 x 2","5 x 3","5 x 4","5 x 5","5 x 6","5 x 7","5 x 8","5 x 9","5 x 10","5 x 11","5 x 12","5 x 13","6 x 1","6 x 2","6 x 3","6 x 4","6 x 5","6 x 6","6 x 7","6 x 8","6 x 9","6 x 10","6 x 11","6 x 12","6 x 13","7 x 1","7 x 2","7 x 3","7 x 4","7 x 5","7 x 6","7 x 7","7 x 8","7 x 9","7 x 10","7 x 11","7 x 12","7 x 13","8 x 1","8 x 2","8 x 3","8 x 4","8 x 5","8 x 6","8 x 7","8 x 8","8 x 9","8 x 10","8 x 11","8 x 12","8 x 13","9 x 1","9 x 2","9 x 3","9 x 4","9 x 5","9 x 6","9 x 7","9 x 8","9 x 9","9 x 10","9 x 11","9 x 12","9 x 13","10 x 1","10 x 2","10 x 3","10 x 4","10 x 5","10 x 6","10 x 7","10 x 8","10 x 9","10 x 10","10 x 11","10 x 12","10 x 13","11 x 1","11 x 2","11 x 3","11 x 4","11 x 5","11 x 6","11 x 7","11 x 8","11 x 9","11 x 10","11 x 11","11 x 12","11 x 13","12 x 1","12 x 2","12 x 3","12 x 4","12 x 5","12 x 6","12 x 7","12 x 8","12 x 9","12 x 10","12 x 11","12 x 12","12 x 13","13 x 1","13 x 2","13 x 3","13 x 4","13 x 5","13 x 6","13 x 7","13 x 8","13 x 9","13 x 10","13 x 11","13 x 12","13 x 13","14 x 1","14 x 2","14 x 3","14 x 4","14 x 5","14 x 6","14 x 7","14 x 8","14 x 9","14 x 10","14 x 11","14 x 12","14 x 13","15 x 1","15 x 2","15 x 3","15 x 4","15 x 5","15 x 6","15 x 7","15 x 8","15 x 9","15 x 10","15 x 11","15 x 12","15 x 13","16 x 1","16 x 2","16 x 3","16 x 4","16 x 5","16 x 6","16 x 7","16 x 8","16 x 9","16 x 10","16 x 11","16 x 12","16 x 13","17 x 1","17 x 2","17 x 3","17 x 4","17 x 5","17 x 6","17 x 7","17 x 8","17 x 9","17 x 10","17 x 11","17 x 12","17 x 13","18 x 1","18 x 2","18 x 3","18 x 4","18 x 5","18 x 6","18 x 7","18 x 8","18 x 9","18 x 10","18 x 11","18 x 12","18 x 13","19 x 1","19 x 2","19 x 3","19 x 4","19 x 5","19 x 6","19 x 7","19 x 8","19 x 9","19 x 10","19 x 11","19 x 12","19 x 13","20 x 1","20 x 2","20 x 3","20 x 4","20 x 5","20 x 6","20 x 7","20 x 8","20 x 9","20 x 10","20 x 11","20 x 12","20 x 13","21 x 1","21 x 2","21 x 3","21 x 4","21 x 5","21 x 6","21 x 7","21 x 8","21 x 9","21 x 10","21 x 11","21 x 12","21 x 13","22 x 1","22 x 2","22 x 3","22 x 4","22 x 5","22 x 6","22 x 7","22 x 8","22 x 9","22 x 10","22 x 11","22 x 12","22 x 13","23 x 1","23 x 2","23 x 3","23 x 4","23 x 5","23 x 6","23 x 7","23 x 8","23 x 9","23 x 10","23 x 11","23 x 12","23 x 13","24 x 1","24 x 2","24 x 3","24 x 4","24 x 5","24 x 6","24 x 7","24 x 8","24 x 9","24 x 10","24 x 11","24 x 12","24 x 13","25 x 1","25 x 2","25 x 3","25 x 4","25 x 5","25 x 6","25 x 7","25 x 8","25 x 9","25 x 10","25 x 11","25 x 12","25 x 13","26 x 1","26 x 2","26 x 3","26 x 4","26 x 5","26 x 6","26 x 7","26 x 8","26 x 9","26 x 10","26 x 11","26 x 12","26 x 13","27 x 1","27 x 2","27 x 3","27 x 4","27 x 5","27 x 6","27 x 7","27 x 8","27 x 9","27 x 10","27 x 11","27 x 12","27 x 13","28 x 1","28 x 2","28 x 3","28 x 4","28 x 5","28 x 6","28 x 7","28 x 8","28 x 9","28 x 10","28 x 11","28 x 12","28 x 13","29 x 1","29 x 2","29 x 3","29 x 4","29 x 5","29 x 6","29 x 7","29 x 8","29 x 9","29 x 10","29 x 11","29 x 12","29 x 13","30 x 1","30 x 2","30 x 3","30 x 4","30 x 5","30 x 6","30 x 7","30 x 8","30 x 9","30 x 10","30 x 11","30 x 12","30 x 13","31 x 1","31 x 2","31 x 3","31 x 4","31 x 5","31 x 6","31 x 7","31 x 8","31 x 9","31 x 10","31 x 11","31 x 12","31 x 13")
Title<-c("A Walk In The Woods","Mt McKinley","Ebony Sunset","Winter Mist","Quiet Stream","Winter Moon","Autumn Mountain","Peaceful Valley","Seascape","Mountain Lake","Winter Glow","Snow Fall","Final Reflections","Meadow Lake","Winter Sun","Ebony Sea","Shades of Grey","Autumn Splendor","Black River","Brown Mountain","Reflections","Black and White Seascape","Lazy River","Black Waterfall","Mountain Waterfall","Final Grace","Mountain Retreat","Blue Moon","Bubbling Stream","Winter Night","Distant Hills","Covered Bridge","Quiet Inlet","Night Light","The Old Mill","Campfire","Rustic Barn","Hidden Lake","Peaceful Waters","Purple Splendor","Tranquil Valley","Majestic Mountains","Winter Sawscape","Evening Seascape","Warm Summer Day","Cabin in the Woods","Wetlands","Cool Waters","Quiet Woods","Northwest Majesty","Autumn Days","Mountain Challenge","Mountain Waterfall","Twilight Meadow","Mountain Blossoms","Winter Stillness","Quiet Pond","Ocean Sunrise","Bubbling Brook","Arizona Splendor","Anatomy of a Wave","The Windmill","Autumn Glory","Indian Girl","Meadow Stream","Blue River","Nature's Edge","Morning Mist","Whispering Stream","Secluded Forest","Snow Trail","Arctic Beauty","Horizons West","High Chateau","Country Life","Western Expanse","Marshlands","Blaze of Color","Winter Cabin","Secluded Lake","Evergreens at Sunset","Mountain Cabin","Portrait of Sally","Misty Waterfall","Barn at Sunset","Mountain Splendor","Lake by Mountain","Mountain Glory","Grey Winter","Dock Scene","Dark Waterfall","Misty Rolling Hills","Lakeside Cabin","Warm Winter Day","Waterside Way","Hunter's Haven","Bubbling Mountain Brook","Winter Hideaway","Foot of the Mountain","Majestic Pine","Cactus at Sunset","Mountain Range","Lonely Retreat","Northern Lights","Winter Evergreens","Surf's Up","Red Sunset","Meadow Road","Winter Oval","Secluded Beach","Forest Hills","Little House by the Road","Mountain Path","Country Charm","Nature's Paradise","Mountain by the Sea","Mountain Hideaway","Towering Peaks","Cabin at Sunset","Twin Falls","Secluded Bridge","Ocean Breeze","Autumn Woods","Winter Solitude","Golden Sunset","Mountain Oval","Ocean Sunset","Triple View","Winter Frost","Lakeside Cabin","Mountain Stream","Country Cabin","Daisy Delight","Hidden Stream","Towering Glacier","Oval Barn","Lakeside Path","Sunset Oval","Winter Barn","Sunset over the Waves","Golden Glow","Roadside Barn","Happy Accident","Golden Knoll","Mountain Reflections","Secluded Mountain","Bright Autumn Trees","Black Seascape","Steep Mountains","Quiet Mountains River","Evening Waterfall","Tropical Seascape","Mountain at Sunset","Soft Mountain Glow","Mountain in an Oval","Winter Mountain","Rolling Hills","Frozen Solitude","Meadow Brook","Evening at Sunset","Mountain View","Hidden Creek","Peaceful Haven","Mountain Exhibition","Emerald Waters","Mountain Summit","Cabin Hideaway","Oval Essence","Lost Lake","Distant Mountains","Meadow Brook Surprise","Mountain Moonlight Oval","Snowy Solitude","Mountain River","Graceful Mountains","Windy Waves","On a Clear Day","Riverside Escape Oval","Surprising Falls","Shadow Pond","Misty Forest Oval","Natural Wonder","Splendor of Winter","Colors of Nature","Grandpa's Barn","Peaceful Reflections","Hidden Winter Moon Oval","Waves of Wonder","Cabin by the Pond","Fall Stream","Christmas Eve Snow","Forest Down Oval","Pathway to Autumn","Deep Forest Lake","Peaks of Majesty","Two Seasons","Nestled Cabin","Wintertime Discovery","Mountain Mirage Wood Shape","Double Oval Fantasy","Contemplative Lady","Deep Woods","High Tide","Barn In Snow Oval","That Time Of Year","Waterfall Wonder","Mighty Mountain Lake","Wooded Stream Oval","Golden Mist Oval","The Old Home Place","Soothing Vista","Stormy Seas","Country Time","A Mild Winter's Day","Spectacular Waterfall","View From The Park","Lake View","Old Country Mill","Morning Walk","Nature's Splendor","Mountain Beauty","Half-Oval Vignette","Absolutely Autumn","Mountain Seclusion","Crimson Oval","Autumn Exhibition","Majestic Peaks","Golden Morning Mist","Winter Lace","Seascape Fantasy","Double Oval Stream","Enchanted Forest","Southwest Serenity","Rippling Waters","Snowfall Magic","Quiet Mountain Lake","Final Embers of Sunlight","Snowy Morn","Camper's Haven","Waterfall in the Woods","Covered Bridge Oval","Scenic Seclusion","Ebb Tide","After the Rain","Winter Elegance","Evening's Peace","Valley of Tranquility","Mystic Mountain","New Day's Dawn","Winter in Pastel","Hazy Day","Divine Elegance","Cliffside","Autumn Fantasy","The Old Oak Tree","Winter Paradise","Days Gone By","Change of Seasons","Hidden Delight","Double Take","Valley View","Tranquil Dawn","Royal Majesty","Serenity","Cabin at Trails End","Mountain Rhapsody","Wilderness Cabin","By The Sea","Indian Summer","Blue Winter","Desert Glow","Lone Mountain","Florida's Glory","Autumn Images","Hint of Springtime","Around the Bend","Countryside Oval","Russet Winter","Purple Haze","Dimensions","Deep Wilderness Home","Haven in the Valley","Wintertime Blues","Pastel Seascape","Country Creek","Silent Forest","Frosty Winter Morn","Forest Edge","Mountain Ridge Lake","Reflections of Gold","Quiet Cove","Rivers Peace","At Dawn's Light","Valley Waterfall","Toward Days End","Falls in the Glen","Frozen Beauty in Vignette","Crimson Tide","Winter Bliss","Gray Mountain","Wayside Pond","Teton Winter","Little Home in the Meadow","A Pretty Autumn Day","Mirrored Images","Back-Country Path","Graceful Waterfall","Icy Lake","Rowboat on the Beach","Portrait of Winter","The Footbridge","Snowbound Cabin","Hide A Way Cove","Enchanted Falls Oval","Not Quite Spring","Splashes Of Autumn","Summer In The Mountain","Oriental Falls","Autumn Palette","Cypress Swamp","Downstream View","Just Before The Storm","Fisherman's Paradise","Desert Hues","The Property Line","In the Stillness of Morning","Delightful Meadow Home","First Snow","Lake in the Valley","A Trace of Spring","An Arctic Winter Day","Snow Birch","Early Autumn","Tranquil Wooded Stream","Purple Mountain Range","Storm's A Comin","Sunset Aglow","Evening at the Falls","Twilight Beauty","Angler's Haven","Rustic Winter Woods","Wilderness Falls","Winter at the Farm","Daisies at Dawn","A Spectacular View","Daybreak","Island Paradise","Sunlight in the Shadows","Splendor of a Snowy Winter","Forest River","Golden Glow of Morning","Fisherman's Trail","A Warm Winter","Under Pastel Skies","Golden Rays of Sunshine","The Magic of Fall","Glacier Lake","The Old Weathered Barn","Deep Forest Falls","Winter's Grace","Splendor of Autumn","Tranquil Seas","Mountain Serenity","Home before Nightfall","Island in the Wilderness","Autumn Oval","Seasonal Progression","Light at the Summit","Countryside Barn","Mountain Lake Falls","Cypress Creek","Trapper's Cabin","Storm on the Horizon","Pot O' Posies","A Perfect Winter Day","Aurora's Dance","Woodman's Retreat","Babbling Brook","Woodgrain's View","Winter's Peace","Wilderness Trail","A Copper Winter","Misty Foothills","Through the Window","Home in the Valley","Mountains of Grace","Seaside Harmony","A Cold Spring Day","Evening's Glow","Blue Ridge Falls","Reflections of Calm","Before the Snowfall","Winding Stream","Tranquility Cove","Cabin in the Hollow","View from Clear Creek","Bridge to Autumn","Trail's End","Evergreen Valley","Balmy Beach","Lake at the Ridge","In the Midst of Winter","Wilderness Day")
Aired<-c("1/11/83","1/11/83","1/18/83","1/25/83","2/1/83","2/8/83","2/15/83","2/22/83","3/1/83","3/8/83","3/15/83","3/22/83","3/29/83","8/31/83","9/7/83","9/14/83","9/21/83","9/28/83","10/5/83","10/12/83","10/19/83","10/26/83","11/2/83","11/9/83","11/16/83","11/23/83","1/4/84","1/11/84","1/18/84","1/25/84","2/1/84","2/8/84","2/15/84","2/22/84","2/29/84","3/7/84","3/14/84","3/21/84","3/28/84","9/5/84","9/12/84","9/19/84","9/26/84","10/3/84","10/10/84","10/17/84","10/24/84","10/31/84","11/7/84","11/14/84","11/21/84","11/28/84","1/2/85","1/9/85","1/16/85","1/23/85","1/30/85","2/6/85","2/13/85","2/20/85","2/27/85","3/6/85","3/13/85","3/20/85","3/27/85","5/1/85","5/8/85","5/15/85","5/22/85","5/29/85","6/5/85","6/12/85","6/19/85","6/26/85","7/2/85","7/9/85","7/16/85","7/23/85","10/2/85","10/9/85","10/16/85","10/23/85","10/30/85","11/6/85","11/13/85","11/20/85","11/27/85","12/4/85","12/11/85","12/18/85","12/25/85","1/2/86","1/9/86","1/16/86","1/23/86","1/30/86","2/6/86","2/13/86","2/20/86","2/27/86","3/6/86","3/13/86","3/20/86","3/27/86","4/30/86","5/7/86","5/14/86","5/21/86","5/28/86","6/4/86","6/11/86","6/18/86","6/25/86","7/2/86","7/9/86","7/16/86","7/23/86","9/3/86","9/10/86","9/17/86","9/24/86","10/1/86","10/8/86","10/15/86","10/22/86","10/29/86","11/5/86","11/12/86","11/19/86","11/26/86","12/30/86","1/7/86","1/14/86","1/21/86","1/28/86","2/4/86","2/11/86","2/18/86","2/25/86","3/4/86","3/11/86","3/18/86","3/25/86","4/29/87","5/6/87","5/13/87","5/20/87","5/27/87","6/3/87","6/10/87","6/17/87","6/24/87","7/1/87","7/8/87","7/15/87","7/22/87","9/2/87","9/9/87","9/16/87","9/23/87","9/30/87","10/7/87","10/14/87","10/21/87","10/28/87","11/4/87","11/11/87","11/18/87","11/25/87","12/30/87","1/6/88","1/13/88","1/20/88","1/27/88","2/3/88","2/10/88","2/17/88","2/24/88","3/2/88","3/9/88","3/16/88","3/23/88","4/27/88","5/4/88","5/11/88","5/18/88","5/25/88","6/1/88","6/8/88","6/15/88","6/22/88","6/29/88","7/6/88","7/13/88","7/20/88","7/17/88","7/24/88","7/31/88","8/7/88","8/14/88","8/21/88","8/28/88","9/4/88","9/11/88","9/18/88","9/25/88","10/2/88","10/9/88","1/4/89","1/11/89","1/18/89","1/25/89","2/1/89","2/8/89","2/15/89","2/22/89","3/1/89","3/8/89","3/15/89","3/22/89","3/29/89","7/5/89","7/12/89","7/19/89","7/26/89","8/2/89","8/9/89","8/16/89","8/23/89","8/30/89","9/6/89","9/13/89","9/20/89","9/27/89","1/3/90","1/10/90","1/17/90","1/24/90","1/31/90","2/7/90","2/14/90","2/21/90","2/28/90","3/7/90","3/14/90","3/21/90","3/28/90","4/4/90","4/11/90","4/18/90","4/25/90","5/2/90","5/9/90","5/16/90","5/23/90","5/30/90","6/6/90","6/13/90","6/20/90","6/27/90","9/5/90","9/12/90","9/19/90","9/26/90","10/3/90","10/10/90","10/17/90","10/24/90","10/31/90","11/7/90","11/14/90","11/21/90","11/28/90","1/1/91","1/8/91","1/15/91","1/22/91","1/29/91","2/5/91","2/12/91","2/19/91","2/26/91","3/5/91","3/12/91","3/19/91","3/26/91","9/3/91","9/10/91","9/17/91","9/24/91","10/1/91","10/8/91","10/15/91","10/22/91","10/29/91","11/5/91","11/12/91","11/19/91","11/26/91","1/7/92","1/14/92","1/21/92","1/28/92","2/4/92","2/11/92","2/18/92","2/25/92","3/3/92","3/10/92","3/17/92","3/24/92","3/31/92","8/25/92","9/1/92","9/8/92","9/15/92","9/22/92","9/29/92","10/6/92","10/13/92","10/20/92","10/27/92","11/3/92","11/10/92","11/17/92","12/1/92","12/8/92","12/15/92","12/22/92","12/29/92","1/5/93","1/12/93","1/19/93","1/26/93","2/2/93","2/9/93","2/16/93","2/23/93","3/2/93","3/9/93","3/16/93","3/23/93","3/30/93","4/6/93","4/13/93","4/20/93","4/27/93","5/4/93","5/11/93","5/18/93","5/20/93","5/25/93","6/1/93","6/8/93","6/15/93","6/22/93","6/29/93","7/6/93","7/13/93","7/20/93","7/27/93","8/3/93","8/10/93","8/17/93","8/24/93","8/31/93","9/7/93","9/14/93","9/21/93","9/28/93","10/5/93","10/12/93","10/19/93","10/26/93","11/2/93","11/9/93","11/16/93","11/23/93","11/30/93","12/7/93","12/14/93","12/21/93","12/28/93","1/4/94","1/11/94","1/18/94","1/25/94","2/1/94","2/8/94","2/15/94","2/22/94","3/1/94","3/8/94","3/15/94","3/22/94","3/29/94","4/5/94","4/12/94","4/19/94","4/26/94","5/3/94","5/10/94","5/17/94")
listEp<-data.frame("seasonCode"=seasonCode,"Title"=Title,"Aired"=Aired)
str(listEp)
## 'data.frame': 403 obs. of 3 variables:
## $ seasonCode: Factor w/ 403 levels "1 x 1","1 x 10",..: 1 6 7 8 9 10 11 12 13 2 ...
## $ Title : Factor w/ 401 levels "A Cold Spring Day",..: 8 231 104 385 269 386 25 255 286 211 ...
## $ Aired : Factor w/ 402 levels "1/1/91","1/10/90",..: 3 3 15 29 146 190 157 170 195 243 ...
listEp$seasonCode<-as.character(listEp$seasonCode)
#function to extract 1st and last character (a digit)from a string a convert them to numeric
decode<-function(x,num){
mystr<-strsplit(x, "[^[:digit:]]")
return(as.numeric(unlist(mystr))[num])
}
listEp$season<-sapply(listEp$seasonCode,FUN=decode,num=1)
listEp$episode_num<-sapply(listEp$seasonCode,FUN=decode,num=4)
listEp$Aired<-as.character(listEp$Aired)
listEp$Date<-as.Date(listEp$Aired, format = "%m/%d/%y")
listEp$year<-as.integer(format(as.Date(listEp$Date,'%m/%d/%Y'),'%Y'))
listEp$month<-as.integer(format(as.Date(listEp$Date,'%m/%d/%Y'),'%m'))
mymonths <- c("January","February","March","April","May","June","July","August","September","October","November","December")
listEp$MonthAbb <- mymonths[ listEp$month ]
listEp$month_ordered <- factor(listEp$MonthAbb, levels = month.name)
head(listEp)
## seasonCode Title Aired season episode_num Date year month MonthAbb
## 1 1 x 1 A Walk In The Woods 1/11/83 1 1 1983-01-11 1983 1 January
## 2 1 x 2 Mt McKinley 1/11/83 1 2 1983-01-11 1983 1 January
## 3 1 x 3 Ebony Sunset 1/18/83 1 3 1983-01-18 1983 1 January
## 4 1 x 4 Winter Mist 1/25/83 1 4 1983-01-25 1983 1 January
## 5 1 x 5 Quiet Stream 2/1/83 1 5 1983-02-01 1983 2 February
## 6 1 x 6 Winter Moon 2/8/83 1 6 1983-02-08 1983 2 February
## month_ordered
## 1 January
## 2 January
## 3 January
## 4 January
## 5 February
## 6 February
We can make:
colSum()
. Indeed as each row correponds to 1 episode and as each cell is filled by 1
(present) ot 0
(not present), the sum will indicate how often this feature appeared among all episodescolSum()
and summing all them.x<-data.frame('number'= colSums(df[,5:71],dims=1))
features<-rownames(x)
rownames(x)<-1:nrow(x)
x$features<-features
totEpisode<-nrow(df)
totFeatures<-sum(x$number)
x$PercentageEp <- x$number / totEpisode * 100
x$PercentEp <-paste0(round(x$PercentageEp,1),"")
x$PercentageFeat <- x$number / totFeatures * 100
x$PercentFeat <-paste0(round(x$PercentageFeat,1),"")
ggplot(data=filter(x,number>10),aes(x=reorder(features,PercentageEp),y=PercentageEp,fill=PercentageEp)) +
geom_bar(stat='identity') + geom_text(aes(label=PercentEp), position=position_dodge(width=0.9), vjust=.5,hjust=0.1,size=2.5,color='#F21A00') +
scale_fill_gradient(low="#EBCC2A",high="#3B9AB2") +
ylim(0,100)+ xlab('') + ylab('') + coord_polar(theta="y") +
geom_text(data = filter(x,number>10), hjust = 1, size = 2.75, aes(x = features, y = 0, label = features)) +
ggtitle("Feature's frequency over all episodes") + theme(legend.position="none",
panel.grid.major = element_blank(),panel.grid.minor = element_blank(),axis.line = element_blank(),axis.text.y = element_blank(),axis.text.x = element_blank(),axis.ticks = element_blank(),plot.title = element_text(hjust = 0.5,family="sans", face="bold",size=12))
ggplot(data=x,aes(x=reorder(features,-PercentageEp),y=PercentageEp,fill=PercentageEp)) +
geom_bar(stat='identity') + scale_fill_gradient(low="#EBCC2A",high="#3B9AB2") +
theme(axis.text.x = element_text(angle=90, hjust=1),legend.position="none") +
xlab('') + ylab('')
tree
and trees
with respectively 91% and 85% (for a grand total of 378 painting)tree
and trees
with respectively ~11% and ~10% of all featuresggplot(data=filter(x,number>10),aes(x=reorder(features,PercentageFeat),y=PercentageFeat,fill=PercentageFeat)) +
geom_bar(stat='identity') + geom_text(aes(label=PercentFeat), position=position_dodge(width=0.9), vjust=.5,hjust=0.1,size=2.5,color='#F21A00') +
scale_fill_gradient(low="#EBCC2A",high="#3B9AB2") +
ylim(0,13)+ xlab('') + ylab('') + coord_polar(theta="y") +
geom_text(data = filter(x,number>10), hjust = 1, size = 2.75, aes(x = features, y = 0, label = features)) +
ggtitle("Feature's percentage over all paintings") + theme(legend.position="none",
panel.grid.major = element_blank(),panel.grid.minor = element_blank(),axis.line = element_blank(),axis.text.y = element_blank(),axis.text.x = element_blank(),axis.ticks = element_blank(),plot.title = element_text(hjust = 0.5,family="sans", face="bold",size=12))
ggplot(data=x,aes(x=reorder(features,-PercentageFeat),y=PercentageFeat,fill=PercentageFeat)) +
geom_bar(stat='identity') + scale_fill_gradient(low="#EBCC2A",high="#3B9AB2") +
theme(axis.text.x = element_text(angle=90, hjust=1),legend.position="none") +
xlab('') + ylab('')
#df %>% mutate(sumVar = rowSums(.[5:71])) -> df2
For the following plots, since a row with no entries causes a standard deviation = 0, I select the features based on their frequency and remove the features that do no appear at all
#remove feature with 0 entries causing a standard deviation = 0
posFeatures <- x %>% filter(number>0) %>% arrange(-number)
pos <- posFeatures$features
df2 <- select_(df, .dots = pos)
#df2 <- df %>% select(-c(lakes))
num.cols <- sapply(df2, is.numeric)
cor.data <- cor(df2[,num.cols])
corrPLOT<-corrplot(cor.data,method='square',order="AOE")
comments next tab
topFeatures <- x %>% filter(number>1) %>% arrange(-number)
top <- topFeatures$features
df3 <- select_(df, .dots = top)
num2.cols <- sapply(df3, is.numeric)
cor2.data <- cor(df3[,num2.cols])
corrPLOT2<-corrplot(cor2.data,method='square',order="AOE")
comments :
tree
and trees
tree
and ocean
, or tree
and beach
We can make a new column that will hold the sum (over painting) of all its features
summaryPainting <- df %>% mutate(featuresSum = rowSums(.[5:71])) %>% arrange(-featuresSum) %>% select(episode,season,episode_num,title,featuresSum)
head(summaryPainting,3)
## # A tibble: 3 x 5
## episode season episode_num title featuresSum
## <chr> <dbl> <dbl> <chr> <dbl>
## 1 S02E04 2 4 SHADES OF GREY 14
## 2 S12E12 12 12 MOUNTAIN IN AND OVAL 14
## 3 S15E05 15 5 HIDDEN WINTER MOON OVAL 14
tail(summaryPainting,3)
## # A tibble: 3 x 5
## episode season episode_num title featuresSum
## <chr> <dbl> <dbl> <chr> <dbl>
## 1 S24E02 24 2 WAYSIDE POND 4
## 2 S28E05 28 5 MAGIC OF FALL 4
## 3 S25E08 25 8 CYPRESS SWAMP 3
The painting with the most features is :
summaryPainting[1,]
## # A tibble: 1 x 5
## episode season episode_num title featuresSum
## <chr> <dbl> <dbl> <chr> <dbl>
## 1 S02E04 2 4 SHADES OF GREY 14
This episode was very special because the painting was done for a blind man that Bob Ross met and told him that he could not paint hence could only see gray tones (source)
The painting with the less number of features is :
summaryPainting[nrow(summaryPainting),]
## # A tibble: 1 x 5
## episode season episode_num title featuresSum
## <chr> <dbl> <dbl> <chr> <dbl>
## 1 S25E08 25 8 CYPRESS SWAMP 3
With the dataframe containing the date information, we can make some further analysis. For exmaples :
snow
, or mountains
features, as opposed during Summer where paintings having beach
feature may be predominant.Let’s first look at all features :
timeData<-inner_join(summaryPainting,listEp, by=c('season','episode_num'))
head(timeData)
## # A tibble: 6 x 13
## episode season episode_num title featuresSum seasonCode Title
## <chr> <dbl> <dbl> <chr> <dbl> <chr> <fctr>
## 1 S02E04 2 4 SHADES OF GREY 14 2 x 4 Shades of Grey
## 2 S12E12 12 12 MOUNTAIN IN AND OVAL 14 12 x 12 Mountain in an Oval
## 3 S15E05 15 5 HIDDEN WINTER MOON OVAL 14 15 x 5 Hidden Winter Moon Oval
## 4 S01E06 1 6 WINTER MOON 13 1 x 6 Winter Moon
## 5 S08E13 8 13 NORTHERN LIGHTS 13 8 x 13 Northern Lights
## 6 S10E11 10 11 TRIPLE VIEW 13 10 x 11 Triple View
## # ... with 6 more variables: Aired <chr>, Date <date>, year <int>, month <int>, MonthAbb <chr>,
## # month_ordered <fctr>
ggplot(data = timeData, aes(x=factor(year),y=month_ordered)) + geom_tile(aes(fill = featuresSum),colour = "white")+xlab('') + ylab('') + scale_fill_gradient(low="gray",high="#C93312") + theme_fivethirtyeight()
comments :
In the next plots I will select only features evoquing Summer
or Winter
.
timeDataAll<-inner_join(df,listEp, by=c('season','episode_num'))
winterLike <- timeDataAll %>% select(year, month_ordered,snow,snowy_mountain,winter) %>% mutate(featuresWinter = rowSums(.[3:5]))
summerLike <- timeDataAll %>% select(year, month_ordered,beach,lighthouse,palm_trees,ocean) %>% mutate(featuresSummer = rowSums(.[3:5]))
g1<-ggplot(data = winterLike, aes(x=factor(year),y=month_ordered)) + geom_tile(aes(fill = featuresWinter),colour = "white")+xlab('') + ylab('') + scale_fill_gradient(low="gray",high="#3B9AB2")
g2<-ggplot(data = summerLike, aes(x=factor(year),y=month_ordered)) + geom_tile(aes(fill = featuresSummer),colour = "white")+xlab('') + ylab('') + scale_fill_gradient(low="gray",high="#EBCC2A")
grid.draw(rbind(ggplotGrob(g1), ggplotGrob(g2), size = "last"))
The idea of a graph is to show, per paintings, the connections between the features paint. Technically, it means to keep only the features that have a value 1
, and then loop over these entries and save all the combinations
#function to loop an array of X features and return a DF with feautre_1 | feature_2
makeConnection<-function(x){
feature_1<-c()
feature_2<-c()
cnt<-1
for(i in 1:(nrow(x)-1)){
for(j in (i+1):(nrow(x))){
#cat(x[i,2]," ",x[j,2],"\n")
feature_1[cnt]<-(x[i,2])
feature_2[cnt]<-(x[j,2])
cnt<-cnt+1
}
}
res<-data.frame("feature_1"=feature_1,"feature_2"=feature_2)
return(res)
}
Test with the first 2 rows :
#create empty DF to store the results
tempo<-data.frame("feature_1"= character(),"feature_2"=character())
#loop over the fist 2 rows-paintings
for(i in 1:2){
current<-data.frame(df[i,5:71])
x<-data.frame(t(current))
colnames(x)<-'number'
features<-rownames(x)
rownames(x)<-1:nrow(x)
x$features<-features
xpos<-x %>% filter(number>0)
res<-makeConnection(xpos)
tempo<-rbind(tempo,res)
}
The interesting thing is that we can apply some weights to the graph. The weights are based on the frequency of the connection between 2 features
xx<-plyr::count(tempo, vars = c("feature_1","feature_2"))
colnames(xx)[3]<-'weight'
The example above has 51 entries initially but only the connection tree -- trees
has a count(frequency) = 2
, so we can pass this as a weight for a better visualization.
yy<-graph.data.frame(xx)
#E(yy)$weight
plot(yy, vertex.shape="none", edge.arrow.size=.4, edge.curved=.1, vertex.label.font=1, vertex.label.color="blue",vertex.label.cex=.8, edge.color="black",edge.width=E(yy)$weight)
comments :
tree
and trees
are the dominant features.History :