This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
Use control+Enter to run the code chunks on PC. Use command+Enter to run the code chunks on MAC.
In this section, we install and load the necessary packages.
In this section, we import the necessary data for this lab.
You are hired as a Business Analyst to help the manager to find out if spending millions of dollars on Super Bowl ads creates social media buzz.
As a first step, carry out the analysis using DPLYR using superbowl.csv data. There are some questions below that you need to answer.
# Use five functions dim(), colnames(), str(), head() and tail() to explore all the superbowl table
# observe different columns of the table and values in the rows
# type your answer below this line
dim(superbowl)
## [1] 45 12
colnames(superbowl)
## [1] "id" "time" "new_brand" "week_of" "adspend" "month"
## [7] "year" "volume" "pos" "neg" "mixed" "superbowl"
str(superbowl)
## 'data.frame': 45 obs. of 12 variables:
## $ id : int 1 1 1 1 1 1 1 1 1 1 ...
## $ time : int 1 2 3 4 5 6 7 8 9 10 ...
## $ new_brand: chr "Beetle" "Beetle" "Beetle" "Beetle" ...
## $ week_of : chr "2-Jan-12" "9-Jan-12" "16-Jan-12" "23-Jan-12" ...
## $ adspend : num 2.7 4 1.3 0.7 7105 ...
## $ month : int 1 1 1 1 1 2 2 2 2 3 ...
## $ year : int 2012 2012 2012 2012 2012 2012 2012 2012 2012 2012 ...
## $ volume : int 2661 3620 4138 3255 5144 8021 3728 4396 4297 4326 ...
## $ pos : int 533 677 753 674 1199 2150 881 1021 865 895 ...
## $ neg : int 62 87 98 99 172 435 145 172 158 148 ...
## $ mixed : int 2066 2856 3287 2482 3773 5436 2702 3203 3274 3283 ...
## $ superbowl: int 0 0 0 0 1 1 1 1 1 1 ...
head(superbowl)
## id time new_brand week_of adspend month year volume pos neg mixed
## 1 1 1 Beetle 2-Jan-12 2.7 1 2012 2661 533 62 2066
## 2 1 2 Beetle 9-Jan-12 4.0 1 2012 3620 677 87 2856
## 3 1 3 Beetle 16-Jan-12 1.3 1 2012 4138 753 98 3287
## 4 1 4 Beetle 23-Jan-12 0.7 1 2012 3255 674 99 2482
## 5 1 5 Beetle 30-Jan-12 7105.0 1 2012 5144 1199 172 3773
## 6 1 6 Beetle 6-Feb-12 2576.2 2 2012 8021 2150 435 5436
## superbowl
## 1 0
## 2 0
## 3 0
## 4 0
## 5 1
## 6 1
tail(superbowl)
## id time new_brand week_of adspend month year volume pos neg mixed
## 40 3 10 Camaro 5-Mar-12 202.5 3 2012 108438 26236 5582 76620
## 41 3 11 Camaro 12-Mar-12 132.3 3 2012 104014 26190 5010 72814
## 42 3 12 Camaro 19-Mar-12 105.5 3 2012 92904 23048 4944 64912
## 43 3 13 Camaro 26-Mar-12 534.0 3 2012 107044 26418 5742 74884
## 44 3 14 Camaro 2-Apr-12 165.3 4 2012 85274 19522 4000 61752
## 45 3 15 Camaro 9-Apr-12 82.4 4 2012 30078 5732 924 23422
## superbowl
## 40 1
## 41 1
## 42 1
## 43 1
## 44 1
## 45 1
# tell R, new_brand, month and superbowl columns are factor
superbowl$new_brand<- as.factor(superbowl$new_brand)
superbowl$month<- as.factor(superbowl$month)
superbowl$superbowl= as.factor(superbowl$superbowl)
# tell R, week_of is date of format "%d-%b-%y"
superbowl$week_of=as.Date(superbowl$week_of, "%d-%b-%y")
# you can try and experiment with different date formats here:
# https://campus.datacamp.com/courses/intermediate-r-for-finance/dates?ex=6
# Query 1. Filter all rows where new_brand == 'Beetle'
# type your answer below this line
filter(superbowl,new_brand == 'Beetle')
## id time new_brand week_of adspend month year volume pos neg mixed
## 1 1 1 Beetle 2012-01-02 2.7 1 2012 2661 533 62 2066
## 2 1 2 Beetle 2012-01-09 4.0 1 2012 3620 677 87 2856
## 3 1 3 Beetle 2012-01-16 1.3 1 2012 4138 753 98 3287
## 4 1 4 Beetle 2012-01-23 0.7 1 2012 3255 674 99 2482
## 5 1 5 Beetle 2012-01-30 7105.0 1 2012 5144 1199 172 3773
## 6 1 6 Beetle 2012-02-06 2576.2 2 2012 8021 2150 435 5436
## 7 1 7 Beetle 2012-02-13 41.2 2 2012 3728 881 145 2702
## 8 1 8 Beetle 2012-02-20 42.8 2 2012 4396 1021 172 3203
## 9 1 9 Beetle 2012-02-27 19.8 2 2012 4297 865 158 3274
## 10 1 10 Beetle 2012-03-05 0.3 3 2012 4326 895 148 3283
## 11 1 11 Beetle 2012-03-12 1.4 3 2012 3870 873 113 2884
## 12 1 12 Beetle 2012-03-19 2.5 3 2012 3840 956 135 2749
## 13 1 13 Beetle 2012-03-26 4.3 3 2012 4240 878 109 3253
## 14 1 14 Beetle 2012-04-02 2.3 4 2012 3760 589 84 3087
## 15 1 15 Beetle 2012-04-09 0.9 4 2012 1885 311 44 1530
## superbowl
## 1 0
## 2 0
## 3 0
## 4 0
## 5 1
## 6 1
## 7 1
## 8 1
## 9 1
## 10 1
## 11 1
## 12 1
## 13 1
## 14 1
## 15 1
superbowl %>% filter(new_brand == 'Beetle')
## id time new_brand week_of adspend month year volume pos neg mixed
## 1 1 1 Beetle 2012-01-02 2.7 1 2012 2661 533 62 2066
## 2 1 2 Beetle 2012-01-09 4.0 1 2012 3620 677 87 2856
## 3 1 3 Beetle 2012-01-16 1.3 1 2012 4138 753 98 3287
## 4 1 4 Beetle 2012-01-23 0.7 1 2012 3255 674 99 2482
## 5 1 5 Beetle 2012-01-30 7105.0 1 2012 5144 1199 172 3773
## 6 1 6 Beetle 2012-02-06 2576.2 2 2012 8021 2150 435 5436
## 7 1 7 Beetle 2012-02-13 41.2 2 2012 3728 881 145 2702
## 8 1 8 Beetle 2012-02-20 42.8 2 2012 4396 1021 172 3203
## 9 1 9 Beetle 2012-02-27 19.8 2 2012 4297 865 158 3274
## 10 1 10 Beetle 2012-03-05 0.3 3 2012 4326 895 148 3283
## 11 1 11 Beetle 2012-03-12 1.4 3 2012 3870 873 113 2884
## 12 1 12 Beetle 2012-03-19 2.5 3 2012 3840 956 135 2749
## 13 1 13 Beetle 2012-03-26 4.3 3 2012 4240 878 109 3253
## 14 1 14 Beetle 2012-04-02 2.3 4 2012 3760 589 84 3087
## 15 1 15 Beetle 2012-04-09 0.9 4 2012 1885 311 44 1530
## superbowl
## 1 0
## 2 0
## 3 0
## 4 0
## 5 1
## 6 1
## 7 1
## 8 1
## 9 1
## 10 1
## 11 1
## 12 1
## 13 1
## 14 1
## 15 1
# interpret the results:
################################################################################
# Query 2.Select all the columns related to buzz metrics (volume, positive and negative mentions)
# type your answer below this line
superbowl %>% select(volume, pos, neg)
## volume pos neg
## 1 2661 533 62
## 2 3620 677 87
## 3 4138 753 98
## 4 3255 674 99
## 5 5144 1199 172
## 6 8021 2150 435
## 7 3728 881 145
## 8 4396 1021 172
## 9 4297 865 158
## 10 4326 895 148
## 11 3870 873 113
## 12 3840 956 135
## 13 4240 878 109
## 14 3760 589 84
## 15 1885 311 44
## 16 1014 246 54
## 17 970 232 42
## 18 1554 348 110
## 19 1148 246 36
## 20 3342 446 72
## 21 2038 580 72
## 22 1534 264 56
## 23 1324 494 42
## 24 1434 432 66
## 25 1268 302 36
## 26 1228 282 34
## 27 1230 298 76
## 28 1396 282 50
## 29 988 262 40
## 30 566 88 4
## 31 78030 18528 4036
## 32 84868 19966 4348
## 33 91640 23230 4698
## 34 95844 24004 4752
## 35 98910 25140 4908
## 36 100034 27568 5022
## 37 94030 24234 4640
## 38 101664 26762 5454
## 39 107356 27746 5242
## 40 108438 26236 5582
## 41 104014 26190 5010
## 42 92904 23048 4944
## 43 107044 26418 5742
## 44 85274 19522 4000
## 45 30078 5732 924
# interpret the results:
################################################################################
# Query 3. Arrange the adspend column in descending order and see which brand has the highest ad spend and in which week_of
# type your answer below this line
superbowl %>% arrange(-adspend)
## id time new_brand week_of adspend month year volume pos neg mixed
## 1 1 5 Beetle 2012-01-30 7105.0 1 2012 5144 1199 172 3773
## 2 3 5 Camaro 2012-01-30 3977.6 1 2012 98910 25140 4908 68862
## 3 1 6 Beetle 2012-02-06 2576.2 2 2012 8021 2150 435 5436
## 4 3 13 Camaro 2012-03-26 534.0 3 2012 107044 26418 5742 74884
## 5 3 6 Camaro 2012-02-06 272.1 2 2012 100034 27568 5022 67444
## 6 3 10 Camaro 2012-03-05 202.5 3 2012 108438 26236 5582 76620
## 7 3 14 Camaro 2012-04-02 165.3 4 2012 85274 19522 4000 61752
## 8 3 11 Camaro 2012-03-12 132.3 3 2012 104014 26190 5010 72814
## 9 3 12 Camaro 2012-03-19 105.5 3 2012 92904 23048 4944 64912
## 10 3 9 Camaro 2012-02-27 88.9 2 2012 107356 27746 5242 74368
## 11 3 3 Camaro 2012-01-16 86.9 1 2012 91640 23230 4698 63712
## 12 3 15 Camaro 2012-04-09 82.4 4 2012 30078 5732 924 23422
## 13 3 1 Camaro 2012-01-02 75.3 1 2012 78030 18528 4036 55466
## 14 3 8 Camaro 2012-02-20 72.7 2 2012 101664 26762 5454 69448
## 15 3 4 Camaro 2012-01-23 61.2 1 2012 95844 24004 4752 67088
## 16 3 2 Camaro 2012-01-09 61.0 1 2012 84868 19966 4348 60554
## 17 3 7 Camaro 2012-02-13 59.9 2 2012 94030 24234 4640 65156
## 18 1 8 Beetle 2012-02-20 42.8 2 2012 4396 1021 172 3203
## 19 1 7 Beetle 2012-02-13 41.2 2 2012 3728 881 145 2702
## 20 1 9 Beetle 2012-02-27 19.8 2 2012 4297 865 158 3274
## 21 1 13 Beetle 2012-03-26 4.3 3 2012 4240 878 109 3253
## 22 1 2 Beetle 2012-01-09 4.0 1 2012 3620 677 87 2856
## 23 1 1 Beetle 2012-01-02 2.7 1 2012 2661 533 62 2066
## 24 1 12 Beetle 2012-03-19 2.5 3 2012 3840 956 135 2749
## 25 2 4 CR-Z 2012-01-23 2.4 1 2012 1148 246 36 866
## 26 1 14 Beetle 2012-04-02 2.3 4 2012 3760 589 84 3087
## 27 1 11 Beetle 2012-03-12 1.4 3 2012 3870 873 113 2884
## 28 1 3 Beetle 2012-01-16 1.3 1 2012 4138 753 98 3287
## 29 2 3 CR-Z 2012-01-16 1.2 1 2012 1554 348 110 1096
## 30 2 7 CR-Z 2012-02-13 1.2 2 2012 1534 264 56 1214
## 31 2 1 CR-Z 2012-01-02 1.0 1 2012 1014 246 54 714
## 32 1 15 Beetle 2012-04-09 0.9 4 2012 1885 311 44 1530
## 33 2 5 CR-Z 2012-01-30 0.9 1 2012 3342 446 72 2824
## 34 2 8 CR-Z 2012-02-20 0.9 2 2012 1324 494 42 788
## 35 2 9 CR-Z 2012-02-27 0.8 2 2012 1434 432 66 936
## 36 2 11 CR-Z 2012-03-12 0.8 3 2012 1228 282 34 912
## 37 1 4 Beetle 2012-01-23 0.7 1 2012 3255 674 99 2482
## 38 2 6 CR-Z 2012-02-06 0.6 2 2012 2038 580 72 1386
## 39 2 12 CR-Z 2012-03-19 0.6 3 2012 1230 298 76 856
## 40 2 10 CR-Z 2012-03-05 0.5 3 2012 1268 302 36 930
## 41 2 13 CR-Z 2012-03-26 0.5 3 2012 1396 282 50 1064
## 42 2 14 CR-Z 2012-04-02 0.5 4 2012 988 262 40 686
## 43 2 15 CR-Z 2012-04-09 0.5 4 2012 566 88 4 474
## 44 2 2 CR-Z 2012-01-09 0.4 1 2012 970 232 42 696
## 45 1 10 Beetle 2012-03-05 0.3 3 2012 4326 895 148 3283
## superbowl
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
## 7 1
## 8 1
## 9 1
## 10 1
## 11 0
## 12 1
## 13 0
## 14 1
## 15 0
## 16 0
## 17 1
## 18 1
## 19 1
## 20 1
## 21 1
## 22 0
## 23 0
## 24 1
## 25 0
## 26 1
## 27 1
## 28 0
## 29 0
## 30 1
## 31 0
## 32 1
## 33 1
## 34 1
## 35 1
## 36 1
## 37 0
## 38 1
## 39 1
## 40 1
## 41 1
## 42 1
## 43 1
## 44 0
## 45 1
# interpret the results:
################################################################################
# Query 4. Create a new column called sentiment which is a sum of positive (column name is pos) and negative (column name is neg) mentions,
# and select only three columns for display, week_of, new_brand and sentiment
# type your answer below this line
superbowl %>% mutate(sentiment=pos + neg) %>%
select(week_of, new_brand, sentiment)
## week_of new_brand sentiment
## 1 2012-01-02 Beetle 595
## 2 2012-01-09 Beetle 764
## 3 2012-01-16 Beetle 851
## 4 2012-01-23 Beetle 773
## 5 2012-01-30 Beetle 1371
## 6 2012-02-06 Beetle 2585
## 7 2012-02-13 Beetle 1026
## 8 2012-02-20 Beetle 1193
## 9 2012-02-27 Beetle 1023
## 10 2012-03-05 Beetle 1043
## 11 2012-03-12 Beetle 986
## 12 2012-03-19 Beetle 1091
## 13 2012-03-26 Beetle 987
## 14 2012-04-02 Beetle 673
## 15 2012-04-09 Beetle 355
## 16 2012-01-02 CR-Z 300
## 17 2012-01-09 CR-Z 274
## 18 2012-01-16 CR-Z 458
## 19 2012-01-23 CR-Z 282
## 20 2012-01-30 CR-Z 518
## 21 2012-02-06 CR-Z 652
## 22 2012-02-13 CR-Z 320
## 23 2012-02-20 CR-Z 536
## 24 2012-02-27 CR-Z 498
## 25 2012-03-05 CR-Z 338
## 26 2012-03-12 CR-Z 316
## 27 2012-03-19 CR-Z 374
## 28 2012-03-26 CR-Z 332
## 29 2012-04-02 CR-Z 302
## 30 2012-04-09 CR-Z 92
## 31 2012-01-02 Camaro 22564
## 32 2012-01-09 Camaro 24314
## 33 2012-01-16 Camaro 27928
## 34 2012-01-23 Camaro 28756
## 35 2012-01-30 Camaro 30048
## 36 2012-02-06 Camaro 32590
## 37 2012-02-13 Camaro 28874
## 38 2012-02-20 Camaro 32216
## 39 2012-02-27 Camaro 32988
## 40 2012-03-05 Camaro 31818
## 41 2012-03-12 Camaro 31200
## 42 2012-03-19 Camaro 27992
## 43 2012-03-26 Camaro 32160
## 44 2012-04-02 Camaro 23522
## 45 2012-04-09 Camaro 6656
# interpret the results: