2  Data visualization

2.1 Introduction

using PalmerPenguins, DataFrames
using AlgebraOfGraphics, CairoMakie
import AlgebraOfGraphics.density
using TidierData, TidierPlots
using Pipe: @pipe
axis = (width = 1000, height = 600)
(width = 1000, height = 600)
penguins = dropmissing(DataFrame(PalmerPenguins.load()))
first(penguins, 6)
6×7 DataFrame
Row species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
String15 String15 Float64 Float64 Int64 Int64 String7
1 Adelie Torgersen 39.1 18.7 181 3750 male
2 Adelie Torgersen 39.5 17.4 186 3800 female
3 Adelie Torgersen 40.3 18.0 195 3250 female
4 Adelie Torgersen 36.7 19.3 193 3450 female
5 Adelie Torgersen 39.3 20.6 190 3650 male
6 Adelie Torgersen 38.9 17.8 181 3625 female
describe(penguins)
7×7 DataFrame
Row variable mean min median max nmissing eltype
Symbol Union… Any Union… Any Int64 DataType
1 species Adelie Gentoo 0 String15
2 island Biscoe Torgersen 0 String15
3 bill_length_mm 43.9928 32.1 44.5 59.6 0 Float64
4 bill_depth_mm 17.1649 13.1 17.3 21.5 0 Float64
5 flipper_length_mm 200.967 172 197.0 231 0 Int64
6 body_mass_g 4207.06 2700 4050.0 6300 0 Int64
7 sex female male 0 String7

2.2 First steps

pp = data(penguins) * 
  mapping(:flipper_length_mm, :body_mass_g)
draw(pp; axis = axis)

@ggplot(data = penguins) + 
    @geom_point(aes(x = flipper_length_mm, y = body_mass_g));

pp2 = pp * mapping(color = :species)
draw(pp2; axis = axis)

@ggplot(
  penguins,
  aes(x = flipper_length_mm, y = body_mass_g, color = species)
) +
  @geom_point();

layers = linear() + mapping()
pp2 = pp * layers * mapping(color = :species)
draw(pp2; axis = axis)

@ggplot(
  penguins,
  aes(x = flipper_length_mm, y = body_mass_g, color = species)
) +
  @geom_point() + 
  @geom_smooth(method = "lm");

pp2 = pp * 
  mapping(color = :species) + 
  pp * linear()
draw(pp2; axis = axis)

@ggplot(
  penguins,
  aes(x = flipper_length_mm, y = body_mass_g)
) +
  @geom_point(aes(color = species)) + 
  @geom_smooth(method = "lm");

pp2 = pp * 
  mapping(color = :species, marker = :species) + 
  pp * linear()
draw(pp2; axis = axis)

@ggplot(
  penguins,
  aes(x = flipper_length_mm, y = body_mass_g)
) +
  @geom_point(aes(color = species, shape = species)) + 
  @geom_smooth(method = "lm");

pp2 = pp * 
  mapping(color = :species, marker = :species) + 
  pp * linear()
draw(pp2; axis = axis)

@ggplot(
  penguins,
  aes(x = flipper_length_mm, y = body_mass_g)
) +
  @geom_point(aes(color = species, shape = species)) + 
  @geom_smooth(method = "lm") + 
  @labs(     
    title = "Body mass and flipper length",
    subtitle = "Dimensions for Adelie, Chinstrap, and Gentoo Penguins",
    x = "Flipper length (mm)", y = "Body mass (g)",
    color = "Species", shape = "Species"  
  );

2.3 Visualizing distributions

pp = data(penguins) * frequency() * mapping(:species)
draw(pp; axis = axis)

@ggplot(penguins, aes(x = species)) +
  @geom_bar();

pp = data(penguins) * density() * mapping(:body_mass_g)
draw(pp; axis = axis)

2.4 Visualizing relationships

2.4.1 A numerical and a categorical variable

pp = data(penguins) * mapping(:species, :body_mass_g) * visual(BoxPlot)
draw(pp; axis = axis)

@ggplot(penguins, aes(x = species, y = body_mass_g)) +
  @geom_boxplot();

pp = data(penguins) * mapping(:body_mass_g, color = :species) * density()
draw(pp; axis = axis)

# Not available yet.
pp = data(penguins) * mapping(:body_mass_g, color = :species) * density() * visual(alpha = 0.5)
draw(pp; axis = axis)

# Not available yet.

2.4.2 Two categorical variables

pp = data(penguins) * 
  mapping(:island, color = :species, stack = :species) * 
  frequency()
draw(pp; axis = axis)

@ggplot(penguins, aes(x = island, fill = species)) +
  @geom_bar();

2.4.3 Two numerical variables

pp = data(penguins) * mapping(:flipper_length_mm, :body_mass_g)
draw(pp; axis = axis)

@ggplot(penguins, aes(x = flipper_length_mm, y = body_mass_g)) +
  @geom_point();

2.4.4 Three or more variables

pp = data(penguins) * 
  mapping(:flipper_length_mm, :body_mass_g
  , color = :species, marker = :island)
draw(pp; axis = axis)

@ggplot(penguins, aes(x = flipper_length_mm, y = body_mass_g)) +
  @geom_point(aes(color = species, shape = island));

pp = data(penguins) * 
  mapping(:flipper_length_mm, :body_mass_g
  , color = :species, marker = :species
  , col = :island)
draw(pp; axis = axis)

# Not available yet.

2.5 Saving your plots

2.6 Common problems

2.7 Summary