Getting & Cleanning Dataset

URL: https://www.census.gov/data/tables/2021/econ/tech-stats/2021-exhibit-16.html URL: https://www.census.gov/foreign-trade/Press-Release/2019pr/12/exh16a.pdf

library(xlsx)

dt1 <- read.xlsx("C:/Users/Anhuynh/Desktop/Data Science Project/Trends In Advanced Tech Products 2021/exh16a.xlsx", sheetIndex = 1, header = TRUE, startRow = 6)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
sub_dt1 <- subset(dt1, select = c(1:7))
sub_dt1 <- sub_dt1[2:12,]
colnames(sub_dt1) <- c("Technology.Group", "Balance_05.2021", "Exports_05.2021", "Imports_05.2021", "Balance_04.2021", "Exports_04.2021", "Imports_04.2021")
sub_dt1$Exports_05.2021 <- as.numeric(sub_dt1$Exports_05.2021)
sub_dt1$Exports_04.2021 <- as.numeric(sub_dt1$Exports_04.2021)
sub_dt1$Imports_05.2021 <- as.numeric(sub_dt1$Imports_05.2021)
sub_dt1$Imports_04.2021 <- as.numeric(sub_dt1$Imports_04.2021)
sub_dt1 <- sub_dt1[3:12,] %>%
    mutate(Total.Export = Exports_05.2021 + Exports_04.2021) %>%
    mutate(Total.Import = Imports_05.2021 + Imports_04.2021) %>%
    mutate(Change.Ex = (Exports_05.2021 - Exports_04.2021)/Exports_05.2021) %>%
    mutate(Change.Imp = (Imports_05.2021 - Imports_04.2021)/Imports_05.2021)

sub_dt1$Up_DownEx <- ifelse(sub_dt1$Change.Ex > 0, "Up" , "Down")
sub_dt1$Up_DownImp <- ifelse(sub_dt1$Change.Imp > 0, "Up" , "Down")
sub_dt1 <- na.omit(sub_dt1)
library(ggplot2)

ggplot(sub_dt1, aes(x = Total.Export, y = Technology.Group, fill = Up_DownEx, label = Total.Export)) +
  geom_col() +
  scale_fill_manual(values = c("#3db5ff", "#0099f9")) +
  geom_text(position = position_stack(vjust = 0.5), size = 4, color = "#ffffff") +
  labs(title="US Tech Products Export Q2 2021 (Millions of Dollars)")

ggplot(sub_dt1, aes(x = Total.Import, y = Technology.Group, fill = Up_DownImp, label = Total.Import)) +
  geom_col() +
  scale_fill_manual(values = c("#3db5ff", "#0099f9")) +
  geom_text(position = position_stack(vjust = 0.5), size = 4, color = "#ffffff") +
  labs(title="US Tech Products Import Q2 2021 (Millions of Dollars)")

sub_dt1$Balance_05.2021 <- as.numeric(sub_dt1$Balance_05.2021)
sub_dt1$Balance_04.2021 <- as.numeric(sub_dt1$Balance_04.2021)

cor.dt <- cor(sub_dt1[,c("Balance_05.2021", "Exports_05.2021", "Imports_05.2021", "Balance_04.2021", "Exports_04.2021", "Imports_04.2021")])
head(round(cor.dt,2))
##                 Balance_05.2021 Exports_05.2021 Imports_05.2021 Balance_04.2021
## Balance_05.2021            1.00           -0.38           -0.93            0.99
## Exports_05.2021           -0.38            1.00            0.69           -0.34
## Imports_05.2021           -0.93            0.69            1.00           -0.91
## Balance_04.2021            0.99           -0.34           -0.91            1.00
## Exports_04.2021           -0.35            0.99            0.67           -0.31
## Imports_04.2021           -0.93            0.68            1.00           -0.92
##                 Exports_04.2021 Imports_04.2021
## Balance_05.2021           -0.35           -0.93
## Exports_05.2021            0.99            0.68
## Imports_05.2021            0.67            1.00
## Balance_04.2021           -0.31           -0.92
## Exports_04.2021            1.00            0.66
## Imports_04.2021            0.66            1.00
library(lares)

sub_cor <- subset(sub_dt1, select = c("Balance_05.2021", "Exports_05.2021", "Imports_05.2021", "Balance_04.2021", "Exports_04.2021", "Imports_04.2021"))
corr_cross(sub_cor, max_pvalue = 0.05, top = 10)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Warning in theme_lares(legend = "top"): Font 'Arial Narrow' is not installed,
## has other name, or can't be found

sub_dt2 <- subset(dt1, select = c(1:7))
sub_dt2 <- sub_dt2[16:46,]
colnames(sub_dt2) <- c("Selected Countries and Areas", "Balance_05.2021", "Exports_05.2021", "Imports_05.2021", "Balance_04.2021", "Exports_04.2021", "Imports_04.2021")
sub_dt2$Exports_05.2021 <- as.numeric(sub_dt2$Exports_05.2021)
sub_dt2$Exports_04.2021 <- as.numeric(sub_dt2$Exports_04.2021)
sub_dt2$Imports_05.2021 <- as.numeric(sub_dt2$Imports_05.2021)
sub_dt2$Imports_04.2021 <- as.numeric(sub_dt2$Imports_04.2021)
sub_dt2 <- sub_dt2[17:46,] %>%
    mutate(Total.Export = Exports_05.2021 + Exports_04.2021) %>%
    mutate(Total.Import = Imports_05.2021 + Imports_04.2021) %>%
    mutate(Change.Ex = (Exports_05.2021 - Exports_04.2021)/Exports_05.2021) %>%
    mutate(Change.Imp = (Imports_05.2021 - Imports_04.2021)/Imports_05.2021)

sub_dt2$Up_DownEx <- ifelse(sub_dt2$Change.Ex > 0, "Up" , "Down")
sub_dt2$Up_DownImp <- ifelse(sub_dt2$Change.Imp > 0, "Up" , "Down")

sub_dt2[6, 1] <- 'Other_Pacific Rim Countries'
sub_dt2[9, 1] <- 'Other_South/Central America'

sub_dt2 <- na.omit(sub_dt2)
# Sorting data for creating graph chart.

subEx = sub_dt2[order(sub_dt2$Change.Ex, decreasing = TRUE), ]
sort_subEx <- head(subEx, 10)

subImp = sub_dt2[order(sub_dt2$Change.Imp, decreasing = TRUE), ]
sort_subImp <- head(subImp, 10)


# Data visualization

sort_subEx$value <- round(sort_subEx$Change.Ex)
sort_subEx <- sort_subEx %>%
  group_by(Up_DownEx) %>%
  mutate(cum_val = cumsum(Change.Ex)) %>%
  mutate(sum_val = sum(Change.Ex) )

sort_subImp$value <- round(sort_subImp$Change.Imp)
sort_subImp <- sort_subImp %>%
  group_by(Up_DownImp) %>%
  mutate(cum_val2 = cumsum(Change.Imp)) %>%
  mutate(sum_val2 = sum(Change.Imp) )

ggplot(data=sort_subEx, aes(x = reorder(`Selected Countries and Areas`, -sum_val), y=Total.Export, fill=Up_DownEx, label=Total.Export)) +
geom_bar(stat="identity", width=0.8) +
geom_text(aes(y=cum_val, label=""), vjust=1, color="black", size=2) +
scale_fill_manual(values = c("#3db5ff", "#0099f9")) +
labs(title="Top 10 Export Countries For Advanced Tech Products In Q2 2021") +
labs(x="Countries and Areas",y="Total Export (millions)") +
theme(legend.title = element_blank(), axis.text.x=element_text(angle=45,hjust=1,vjust=1))

ggplot(data=sort_subImp, aes(x = reorder(`Selected Countries and Areas`, -sum_val2), y=Total.Import, fill=Up_DownImp, label=Total.Import)) +
geom_bar(stat="identity", width=0.8) +
geom_text(aes(y=cum_val2, label=""), vjust=1, color="black", size=2) +
scale_fill_manual(values = c("#3db5ff", "#0099f9")) +
labs(title="Top 10 Import Countries For Advanced Tech Products In Q2 2021") +
labs(x="Countries and Areas",y="Total Import (millions)") +
theme(legend.title = element_blank(), axis.text.x=element_text(angle=45,hjust=1,vjust=1))

Part II: Technology/SaaS For Marketing Case Study

URL: https://data.world/databrett/martech-5000-from-the-2017-marketing-technology-conference/workspace/project-summary?agentid=databrett&datasetid=martech-5000-from-the-2017-marketing-technology-conference

URL: https://chiefmartec.com/2017/05/marketing-techniology-landscape-supergraphic-2017/

library("httr")
library("readxl")

GET("https://query.data.world/s/yspclintwoqawwpqovf2f4mvwznqku", write_disk(tf <- tempfile(fileext = ".xlsx")))
## Response [https://download.data.world/file_download/databrett/martech-5000-from-the-2017-marketing-technology-conference/martech2017.xlsx?auth=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJwcm9kLXVzZXItY2xpZW50OmFubmFodXluaCIsImlzcyI6ImFnZW50OmFubmFodXluaDo6M2M2NjlhZWUtYmZkMi00MDg2LWFhYjItMjg0MWNhZWJkYmI0IiwiaWF0IjoxNjI1NzE1NjEyLCJyb2xlIjpbInVzZXIiLCJ1c2VyX2FwaV9hZG1pbiIsInVzZXJfYXBpX2VudGVycHJpc2VfYWRtaW4iLCJ1c2VyX2FwaV9yZWFkIiwidXNlcl9hcGlfd3JpdGUiXSwiZ2VuZXJhbC1wdXJwb3NlIjpmYWxzZSwidXJsIjoiMjc2Y2RkYzUyZjVlMTZkMzhkYjM0N2QwMTZhNzRiNmJhMzQwYTRhYSJ9.FycDN1J2MrsM9mCWYUwGkS2K2RC0U5QSHNSevGd_fR2Qef0NfvdmSsO-CYr-fN53MdJSToIil-iAY5nsQvDsfQ]
##   Date: 2021-07-08 16:52
##   Status: 200
##   Content-Type: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
##   Size: 383 kB
## <ON DISK>  C:\Users\Anhuynh\AppData\Local\Temp\Rtmpiul98w\file1de462fd7f30.xlsx
df <- read_excel(tf)
library(dplyr)

# Count on Sub_Category
sub <- matrix(table(df$`Sub-Category`))
df_sub <- data.frame(`Sub-Category` = c("ABM", "Advocacy, Loyalty & Referrals", "Affiliate Marketing & Management", "Agile & Lean Management", "Audience/Marketing Data & Data Enhancement", "Budgeting & Finance", "Business/Customer Intelligence & Data Science", "Call Analytics & Management", "Channel, Partner & Local Marketing", "CMS & Web Experience Management", "Collaboration", "Community & Reviews", "Content Marketing", "CRM", "Customer Data Platform", "Customer Experience, Service & Success", "DAM & MRM", "Dashboards & Data Visualization", "Display & Programmatic Advertising", "DMP", "Ecommerce Marketing", "Ecommerce Platforms & Carts", "Email Marketing", "Events, Meetings & Webinars", "Feedback & Chat", "Influencers", "Interactive Content", "iPaaS, Cloud/Data Integration & Tag Management", "Marketing Analytics, Performance & Attribution", "Marketing Automation & Campaign/Lead Management", "Mobile & Web Analytics", "Mobile Apps", " Mobile Marketing", "Native/Content Advertising", "Optimization, Personalization & Testing", "PR", "Predictive Analytics", "Print", "Product Management", "Projects & Workflow", "Retail, Proximity & IoT Marketing", "Sales Automation, Enablement & Intelligence", "Search & Social Advertising", "SEO", "Social Media Marketing & Monitoring", "Talent Management", "Vendor Analysis", "Video Advertising", "Video Marketing" ), count_sub = sub[,1])

cat <- matrix(table(df$Category))
df_cat <- data.frame(Category = c("Advertising & Promotion", " Commerce & Sales", "Content & Experience", "Data", "Management", "Social & Relationships"), count_cat = cat[,1])

df_fin <- merge(df_cat, df_sub)

df_fin <- df_fin %>%
    group_by(Category) %>%
    mutate(sum_by_category = sum(count_cat))

df_fin <- df_fin %>%
    group_by(Sub.Category) %>%
    mutate(sum_by_sub = sum(count_sub))
library(ggplot2)

subcat = df_sub[order(df_sub$count_sub, decreasing = TRUE), ]
sort_subcat <- head(subcat, 5)

sort_sub <- sort_subcat %>%
  group_by(Sub.Category) %>%
  mutate(cum_val3 = cumsum(count_sub)) %>%
  mutate(sum_val3 = sum(count_sub) )

ggplot(data=sort_sub, aes(x = reorder(Sub.Category, -sum_val3), y=count_sub, fill=Sub.Category)) +
geom_bar(stat="identity", width=0.8) +
geom_text(aes(y=cum_val3, label=""), vjust=1, color="black", size=1.5) +
labs(title="Top 5 Sub-Categories Supported by Tech Products in Marketing") +
labs(x="Sub-Category",y="Count of products") +
theme(legend.title = element_blank() )

library(ggthemes)

# Need to make a new transformed data-set for this visualization
 
(
  classe_table <- df %>%
    count(Category = factor(Category)) %>% 
    mutate(pct = prop.table(n)) %>%
    arrange(-pct) %>% 
    tibble()
)
## # A tibble: 6 x 3
##   Category                    n    pct
##   <fct>                   <int>  <dbl>
## 1 Content & Experience     1317 0.253 
## 2 Social & Relationships   1104 0.212 
## 3 Commerce & Sales          893 0.172 
## 4 Data                      816 0.157 
## 5 Advertising & Promotion   693 0.133 
## 6 Management                382 0.0734
ggplot(
  classe_table %>% filter(Category != "NA"),
  mapping = aes(
    x = reorder(Category, n),
    y = pct,
    group = 1,
    label = scales::percent(pct)
  )
) +
  theme_fivethirtyeight() +
  geom_bar(stat = "identity",
           fill = "#0099f9") +
  geom_text(position = position_dodge(width = 0.9),
            # move to center of bars
            hjust = -0.05,
            #Have Text just above bars
            size = 2.5) +
  labs(x = "Categories",
       y = "Proportion") +
  theme(axis.text.x = element_text(
    angle = 90,
    vjust = 0.5,
    hjust = 1
  )) +
  ggtitle("Categories Supported By Tech Products") +
  scale_y_continuous(labels = scales::percent) +                
  coord_flip()