Homework assignment No.4

Describe Your Data:

  1. 4 erroneous visualizations and correct versions.
  2. 4 lyging/misleading vizualization and correct versions.

Visualizations

The graphs are presented in such order: the first graph is incorrect, and the graph after it (the second one) is what I believe to be a correct way to visualize.

## [1] "LC_COLLATE=English_United States.1252;LC_CTYPE=English_United States.1252;LC_MONETARY=English_United States.1252;LC_NUMERIC=C;LC_TIME=English_United States.1252"

Errorneous visualizations

1. Pie charts are difficult to read when many different variables are present

ggplot(data3, aes(x="", y=prop, fill=Var1)) +
  geom_bar(stat="identity", width=1, color="white") +
  coord_polar("y", start=0) +
  theme_void() + 
  geom_text(aes(x = 1.35,y = ypos, label = Freq), color = "black", size=4) +
  scale_fill_brewer(palette="Set2", name = "Unit type ID") + 
  labs(title = "Number of different types of cargo")


ggplot(data3, aes(x=Var1, y=Freq, fill=Var1)) +
  geom_bar(stat="identity") + 
  geom_text(aes(label=Freq), vjust=1.2, color="black", size=4)+
  scale_fill_brewer(palette="Set2", name = "Unit type ID") + 
  labs(title = "Number of different types of cargo",
       x = "Unit type ID", y = "Count") + 
  theme_light()

2. Unknown values presented

ggplot(data1, aes(x="", y=prop, fill=Var1)) +
  geom_bar(stat="identity", width=1, color="white") +
  coord_polar("y", start=0) +
  theme_void() + 
  geom_text(aes(y = ypos, label = Var1), color = "white", size=6) +
  scale_fill_brewer(palette="Dark2", name = "") + 
  labs(title = "Percentage of cargo in regards to redirecting each to a terminal") + 
  theme(legend.position="none")



ggplot(data1, aes(x="", y=prop, fill=Var1)) +
  geom_bar(stat="identity", width=1, color="white") +
  coord_polar("y", start=0) +
  theme_void() + 
  geom_text(aes(y = ypos, label = proc), color = "white", size=6) +
  scale_fill_brewer(palette="Dark2", name = "Terminal in the sender's city") + 
  labs(title = "Percentage of cargo in regards to redirecting each to a terminal")

3. Missing a legend

ggplot(data, aes(day, ..count..)) + 
  geom_bar(aes(fill = week), position = "dodge") +
  theme_minimal() +
  theme(legend.position="none") +
  labs(title = "Number of cargo requests per day each week",
       x = "Day of the week", y = "Count")


ggplot(data, aes(day, ..count..)) + 
  geom_bar(aes(fill = week), position = "dodge") +
  theme_minimal() +
  labs(title = "Number of cargo requests per day each week",
       x = "Day of the week", y = "Count")

4. Wrong graph used

ggplot(df, aes(x=day, y=number, fill=day)) +
  labs(title = "Number of cargo requests per day") +
  geom_bar(stat="identity") +
  # ylim(-11,17) +
  theme_minimal() +
  # theme(
  #   axis.text = element_blank(),
  #   axis.title = element_blank(),
  #   panel.grid = element_blank(),
  #   plot.margin = unit(rep(-2,5), "cm")
  # ) +
  coord_polar(start = 0)


ggplot(data, aes(x=day, y=datecount, fill=day)) +
  geom_bar(stat="identity")+theme_minimal() +
  labs(title = "Number of cargo requests per day",
       x = "Day of the week", y = "Count")

Misleading visualizations

1. Misleading proportions

ggplot(data1, aes(x="", y=c(50,50), fill=Var1)) +
  geom_bar(stat="identity", width=1, color="white") +
  coord_polar("y", start=0) +
  theme_void() + 
  geom_text(aes(y = ypos, label = proc), color = "white", size=6) +
  scale_fill_brewer(palette="Dark2", name = "Terminal in the sender's city") + 
  labs(title = "Percentage of cargo in regards to redirecting each to a terminal")


ggplot(data1, aes(x="", y=prop, fill=Var1)) +
  geom_bar(stat="identity", width=1, color="white") +
  coord_polar("y", start=0) +
  theme_void() + 
  geom_text(aes(y = ypos, label = proc), color = "white", size=6) +
  scale_fill_brewer(palette="Dark2", name = "Terminal in the sender's city") + 
  labs(title = "Percentage of cargo in regards to redirecting each to a terminal")       

2. Increasing significance by changing the y-axis

ggplot(data, aes(x=Terminal_s, y=datecount, fill = Terminal_s)) +
  geom_bar(stat="identity") + 
  coord_cartesian(ylim=c(20,45)) + 
  labs(title = "Number of cargo that was redirected to a terminal in the sender country",
       x = "Unit type ID", y = "Count") + 
  theme_light()


ggplot(data, aes(x=Terminal_s, y=datecount, fill = Terminal_s)) +
  geom_bar(stat="identity") + 
  ylim(0, 45) +
  labs(title = "Number of cargo that was redirected to a terminal in the sender country",
       x = "Unit type ID", y = "Count") + 
  theme_light()

3. Ommiting significant values

ggplot(df[-9,], aes(x=day, y=number, fill=day)) +
  geom_bar(stat="identity")+
  theme_light() +
  labs(title = "Number of cargo requests per day",
       x = "", y = "Number of cargo")  

  
ggplot(df, aes(x=day, y=number, fill=day)) +
  geom_bar(stat="identity")+
  theme_light() +
  labs(title = "Number of cargo requests per day",
       x = "", y = "Number of cargo")

4. Using the average where outliers are present

ggplot(data5, aes(y = Value, x = UnitTypeID, color = UnitTypeID)) +
  ggtitle("Average revenue in different unit types") + 
  geom_point(size = 4) + 
  labs(x = "Unit Type", y = "Revenue (€)", fill = "Unit Type") +
  scale_color_brewer(palette = "Dark2", name = "Unit Type") +
  theme_minimal()+
  theme(axis.text.x=element_blank())


ggplot(data, aes(x = UnitTypeID, y = Value, fill = UnitTypeID)) +  
  ggtitle("Average revenue in different unit types") + 
  geom_boxplot(outlier.colour="black",outlier.shape=16,
               outlier.size=3, notch=F) + 
  scale_fill_brewer(palette = "Dark2", name = "Unit Type") +
  labs(x = "", y = "Revenue (€)") +
  theme_minimal()