For our analysis of the German election data, we prepared a script in the statistical programming environment R. The script was divided in two main parts: A preprocessing part in which we cleaned and structured the raw election data as we got it directly from the electoral management body, and the analysis part that transformed the election data into tables, models and maps.

All steps in the preprocessing were made to fit the data from the 2013 elections and were adjusted for the 2017 data when the first extrapolations were published. In this Notebook we show how we structured the original dataset in order to work with it and how we created the small multiples published in our project.

Preprocessing the data from the electoral management body

# install.packages("needs");library(needs)
needs(tidyverse, magrittr, rgdal, grid, gridExtra, mapproj, rgeos) # loading needed packages
# tidyverse, magrittr, stringr: to manipulate and structure data
# rgdal, mapproj, rgeos: to load geospatial data, apply map projections and calculate shape centroids
# grid, gridExtra: to arrange the maps in a grid for this Notebook

Load the election data

d <- read.csv("https://www.bundeswahlleiter.de/dam/jcr/72f186bb-aa56-47d3-b24c-6a46f5de22d0/btw17_kerg.csv", header=F, sep=";", skip=1, stringsAsFactors = F, na.strings="")

head(d[1:9])
##                                                                                         V1
## 1                                                                                       # 
## 2                                  # Wahl zum 19. Deutschen Bundestag (24. September 2017)
## 3 # Endgültige Ergebnisse der Erst- und Zweitstimmen sowie der Vorperiode nach Wahlkreisen
## 4                                                                                       # 
## 5                                                                                       Nr
## 6                                                                                     <NA>
##       V2        V3              V4   V5           V6   V7          V8   V9
## 1   <NA>      <NA>            <NA> <NA>         <NA> <NA>        <NA> <NA>
## 2   <NA>      <NA>            <NA> <NA>         <NA> <NA>        <NA> <NA>
## 3   <NA>      <NA>            <NA> <NA>         <NA> <NA>        <NA> <NA>
## 4   <NA>      <NA>            <NA> <NA>         <NA> <NA>        <NA> <NA>
## 5 Gebiet gehört zu Wahlberechtigte <NA>         <NA> <NA>      Wähler <NA>
## 6   <NA>      <NA>     Erststimmen <NA> Zweitstimmen <NA> Erststimmen <NA>

Renaming and restructuring

# paste party names to second vote ("Zweitstimme")
for(i in 1:ncol(d)){
  if(d[6,i] %in% "Zweitstimmen"){
    d[6,i] <- paste(d[5,i-2], d[6,i])
  }
}

colnames(d) <- d[6,] # set new column names
d <- d[-(1:7),] # remove extra rows
d <- d[d[3] != "99" & d[2] != "Bundesgebiet",] # remove totals of states and country
d <- cbind(d[1:2], d[grepl("Zweitstimmen$",colnames(d))]) # only keep second vote
colnames(d)[1:2] <- c("wkrnr","wahlkreisname") # rename first and second column
d <- d[!(is.na(d$wkrnr)),] # remove rows with missing values
d[-2] <- apply(d[-2],2,as.numeric) # save votes as numeric data

# add CDU and CSU votes to UNION
d %<>% group_by(wkrnr) %>% mutate(`UNION Zweitstimmen` = sum(`Christlich Demokratische Union Deutschlands Zweitstimmen`,`Christlich-Soziale Union in Bayern e.V. Zweitstimmen`, na.rm=T)) %>% select(-c(`Christlich Demokratische Union Deutschlands Zweitstimmen`, `Christlich-Soziale Union in Bayern e.V. Zweitstimmen`))

# Convert votes to percentages
d[grepl("Zweitstimmen$",colnames(d))][-(1:4)] <- round(d[grepl("Zweitstimmen$",colnames(d))][-(1:4)]/d$`Gültige Zweitstimmen`, 3) 
d %<>% mutate(`Nichtwähler Zweitstimmen`=1-(`Wähler Zweitstimmen`/`Wahlberechtigte Zweitstimmen`), id=wkrnr-1, id=as.character(id)) %>% # add id to fit shape ids
  gather(key="party", value="value", 7:48) %>% mutate(fill="#129B93") # convert to tidy data

head(d[1:6]) # take a look at the now tidy data
## # A tibble: 6 x 6
## # Groups:   wkrnr [6]
##   wkrnr                     wahlkreisname `Wahlberechtigte Zweitstimmen`
##   <dbl>                             <chr>                          <dbl>
## 1     1             Flensburg – Schleswig                         228471
## 2     2 Nordfriesland – Dithmarschen Nord                         186568
## 3     3      Steinburg – Dithmarschen Süd                         176636
## 4     4             Rendsburg-Eckernförde                         200831
## 5     5                              Kiel                         204650
## 6     6                 Plön – Neumünster                         174937
## # ... with 3 more variables: `Wähler Zweitstimmen` <dbl>, `Ungültige
## #   Zweitstimmen` <dbl>, `Gültige Zweitstimmen` <dbl>

Small multiples

Now that the main preprocessing is done, we can start constructing the maps.

a) Map of the winners

# load state borders and shapes of the constituencies
# You can download them here: https://interaktiv.morgenpost.de/analyse-bundestagswahl-2017/data/btw17-shapes.zip
bund_shp <- readOGR("btw17-shapes/bundeslaender_small.shp", "bundeslaender_small", stringsAsFactors=FALSE, encoding="latin1") %>% broom::tidy()
## OGR data source with driver: ESRI Shapefile 
## Source: "btw17-shapes/bundeslaender_small.shp", layer: "bundeslaender_small"
## with 16 features
## It has 4 fields
## Integer64 fields read as strings:  WKR_NR
wahlkreise <- readOGR("btw17-shapes/wahlkreise_small.shp", "wahlkreise_small", stringsAsFactors=FALSE, encoding="latin1") 
## OGR data source with driver: ESRI Shapefile 
## Source: "btw17-shapes/wahlkreise_small.shp", layer: "wahlkreise_small"
## with 299 features
## It has 4 fields
## Integer64 fields read as strings:  WKR_NR
krs_shp <- wahlkreise %>% broom::tidy() # broom to tidy shape data to make it work smoothly with the graphic package ggplot2
                                                        
btw_plot <- merge(krs_shp, d, by="id", all.y=T) # merge shapes with vote data by id
head(btw_plot[1:6])
##   id     long      lat order  hole piece
## 1  0 9.676908 54.83785     1 FALSE     1
## 2  0 9.676908 54.83785     1 FALSE     1
## 3  0 9.676908 54.83785     1 FALSE     1
## 4  0 9.676908 54.83785     1 FALSE     1
## 5  0 9.676908 54.83785     1 FALSE     1
## 6  0 9.676908 54.83785     1 FALSE     1

Add color to the parties

# generate color gradient for each of the established parties and add it to the data depending on how high the vote was
gewinner_plot <- btw_plot

gewinner_plot[gewinner_plot$party %in% "Sozialdemokratische Partei Deutschlands Zweitstimmen",]$fill <-
  colorRampPalette(c('#F0BAA5','#f40502'))(10)[as.numeric(cut(gewinner_plot[gewinner_plot$party %in% "Sozialdemokratische Partei Deutschlands Zweitstimmen",]$value, breaks = 10))]

gewinner_plot[gewinner_plot$party %in% "UNION Zweitstimmen",]$fill <-
  colorRampPalette(c('#959595','#000000'))(10)[as.numeric(cut(gewinner_plot[gewinner_plot$party %in% "UNION Zweitstimmen",]$value, breaks=10))]

gewinner_plot[gewinner_plot$party %in% "Alternative für Deutschland Zweitstimmen",]$fill <-
  colorRampPalette(c('#CEE9FF','#009de0'))(10)[as.numeric(cut(gewinner_plot[gewinner_plot$party %in% "Alternative für Deutschland Zweitstimmen",]$value, breaks=10))]

gewinner_plot[gewinner_plot$party %in% "DIE LINKE Zweitstimmen",]$fill <-
  colorRampPalette(c('#E0A7C3','#8b1b62'))(10)[as.numeric(cut(gewinner_plot[gewinner_plot$party %in% "DIE LINKE Zweitstimmen",]$value, breaks=10))]

gewinner_plot[gewinner_plot$party %in% "Freie Demokratische Partei Zweitstimmen",]$fill <-
  colorRampPalette(c('#FFFBD1','#feed01'))(10)[as.numeric(cut(gewinner_plot[gewinner_plot$party %in% "Freie Demokratische Partei Zweitstimmen",]$value, breaks=10))]

gewinner_plot[gewinner_plot$party %in% "BÃœNDNIS 90/DIE GRÃœNEN Zweitstimmen",]$fill <-
  colorRampPalette(c('#A4D78F','#42a62a'))(10)[as.numeric(cut(gewinner_plot[gewinner_plot$party %in% "BÃœNDNIS 90/DIE GRÃœNEN Zweitstimmen",]$value, breaks=10))]

# filter constituencies for winning party
gewinner_plot %<>% group_by(id) %>% filter(party %in% party[value %in% max(value, na.rm=T)]) %>% arrange(id,order)

Plot the map with ggplot2

win <- ggplot(data=gewinner_plot, aes(x=long, y=lat, group=group)) +
  geom_polygon(aes(fill=fill), show.legend = T) + # fill shapes with generated color
  # add state borders
  geom_polygon(data=bund_shp, aes(x=long, y=lat, group=group), fill=NA, color="white", size=0.4) + 
  scale_fill_identity() +
  theme_void() + # remove axes
  coord_map() # apply projection
win

b) Small multiple choropleths

Add colors to the parties

# add colors, this time a fixed color per party
btw_plot$cols <- "#129B93"
btw_plot$cols[btw_plot$party %in% "Sozialdemokratische Partei Deutschlands Zweitstimmen"] <- "#DB4240"
btw_plot$cols[btw_plot$party %in% "BÃœNDNIS 90/DIE GRÃœNEN Zweitstimmen"] <- "#4BA345"
btw_plot$cols[btw_plot$party %in% "DIE LINKE Zweitstimmen"] <- "#96276E"
btw_plot$cols[btw_plot$party %in% "Freie Demokratische Partei Zweitstimmen"] <- "#F6BB00"
btw_plot$cols[btw_plot$party %in% "Alternative für Deutschland Zweitstimmen"] <- "#34A3D2"
btw_plot$cols[btw_plot$party %in% "UNION Zweitstimmen"] <- "#373737"

Small multiples choropleth plot

# loop through the parties and save the plot as an element of a list
plotlist <- list() # create empty list to fill with plots
for(i in 1:length(unique(btw_plot$party)))
  local({
    i <- i
    # filter data to only contain current party
    plotdata <- btw_plot %>% filter(party %in% unique(btw_plot$party)[i]) %>% arrange(id,order)
    
    # save plot as list element  
    plotlist[[i]] <<- ggplot(data=plotdata, aes(x=long, y=lat, group=group)) +
      geom_polygon(aes(fill=value), show.legend = F) + 
      # add state borders
      geom_polygon(data=bund_shp, aes(x=long, y=lat, group=group), fill=NA, color="white", size=0.2) +
      theme_void() +
      # gradient from #F4F4F4 to party color depending on how high vote was, scaled individually
      scale_fill_gradient(space = "Lab", na.value = "#F4F4F4", name=" ", low="#F4F4F4", high=unique(plotdata$cols), guide=FALSE) +
      theme(line = element_blank(),
            text = element_blank(),
            title = element_blank())+
      coord_map()
  })
do.call("grid.arrange", c(plotlist, ncol=7)) # arrange the maps for the Notebook output

c) Gain and loss

Preprocessing

# to calculate gain or loss compared to vote from 2013
btw13 <- read.csv("https://www.bundeswahlleiter.de/dam/jcr/36efa904-5d4a-4159-a11e-2c8ecc1b0f77/btwkr17_umrechnung_btw13.csv", sep=";", header=F, stringsAsFactors = F, na.strings="", fileEncoding = "latin1") 

# again, we have to tidy up the data sheet
colnames(btw13) <- paste(btw13[5,], btw13[6,])
btw13 <- btw13[-(1:6),] # remove extra rows
btw13 <- btw13[!(btw13[[3]] %in% c("Insgesamt", "Land")),] # remove totals of states and country
btw13 <- cbind(btw13[1:2], btw13[grepl("Zweitstimmen$",colnames(btw13))]) # only keep second vote
# change column names to match with the party names of the 2017 dataset
colnames(btw13)[c(1:2,6:9,26)] <- c("wkrnr","wahlkreisname","Sozialdemokratische Partei Deutschlands Zweitstimmen","Freie Demokratische Partei Zweitstimmen","DIE LINKE Zweitstimmen","BÜNDNIS 90/DIE GRÜNEN Zweitstimmen","Alternative für Deutschland Zweitstimmen")
btw13[-2] <- apply(btw13[-2],2,as.numeric) # save votes as numeric data
# add CDU and CSU votes to UNION
btw13 %<>% group_by(wkrnr) %>% mutate(`UNION Zweitstimmen` = sum(`CDU Zweitstimmen`,`CSU Zweitstimmen`, na.rm=T)) %>% select(-c(`CDU Zweitstimmen`, `CSU Zweitstimmen`))
# Convert votes to percentages
btw13[grepl("Zweitstimmen$",colnames(btw13))][-(1:2)] <- round(btw13[grepl("Zweitstimmen$",colnames(btw13))][-(1:2)]/btw13$`Gültige Zweitstimmen`, 3) 
# calculate and add centroids of the constuencies
btw13 <- cbind(as.data.frame(btw13),as.data.frame(gCentroid(wahlkreise, byid=TRUE))) %>% gather(key=party, value=value, 5:33)

head(btw13[1:6]) # take a look at the now tidy data
##   wkrnr wahlkreisname Ungültige Zweitstimmen Gültige Zweitstimmen
## 1     1            SH                   2113               160636
## 2     2            SH                   1483               130044
## 3     3            SH                   1451               124958
## 4     4            SH                   1616               147967
## 5     5            SH                   1483               144969
## 6     6            SH                   1520               125573
##           x        y
## 1  9.509453 54.63156
## 2  8.944818 54.52165
## 3  9.405561 53.96564
## 4  9.778875 54.29168
## 5 10.132485 54.33390
## 6 10.315654 54.20776
# we merge the 2017 data with the 2013 data, filter for our six parties of interest and calculate the gain/loss. Finally we merge the result with the shapefile
btw_verluste <- d %>% merge(.,btw13[c(1,5:8)], by=c("wkrnr","party")) %>% 
  filter(party %in% c("Sozialdemokratische Partei Deutschlands Zweitstimmen","BÜNDNIS 90/DIE GRÜNEN Zweitstimmen","DIE LINKE Zweitstimmen","Freie Demokratische Partei Zweitstimmen","Alternative für Deutschland Zweitstimmen","UNION Zweitstimmen")) %>%
  group_by(id, party) %>% mutate(verlust=value.x-value.y) %>% select(c("id","party","verlust","x","y")) %>%
  merge(krs_shp, ., by="id", all.y=T) %>% arrange(id, order)

# add colors for gain...
btw_verluste$cols[btw_verluste$party %in% "Sozialdemokratische Partei Deutschlands Zweitstimmen"] <- "#DB4240"
btw_verluste$cols[btw_verluste$party %in% "BÃœNDNIS 90/DIE GRÃœNEN Zweitstimmen"] <- "#4BA345"
btw_verluste$cols[btw_verluste$party %in% "DIE LINKE Zweitstimmen"] <- "#96276E"
btw_verluste$cols[btw_verluste$party %in% "Freie Demokratische Partei Zweitstimmen"] <- "#F6BB00"
btw_verluste$cols[btw_verluste$party %in% "Alternative für Deutschland Zweitstimmen"] <- "#34A3D2"
btw_verluste$cols[btw_verluste$party %in% "UNION Zweitstimmen"] <- "#373737"
# and for loss
btw_verluste$cols_d[btw_verluste$party %in% "Sozialdemokratische Partei Deutschlands Zweitstimmen"] <- "#AB8F8F"
btw_verluste$cols_d[btw_verluste$party %in% "BÃœNDNIS 90/DIE GRÃœNEN Zweitstimmen"] <- "#A1B7A0"
btw_verluste$cols_d[btw_verluste$party %in% "DIE LINKE Zweitstimmen"] <- "#A997A3"
btw_verluste$cols_d[btw_verluste$party %in% "Freie Demokratische Partei Zweitstimmen"] <- "#C3B8A0"
btw_verluste$cols_d[btw_verluste$party %in% "Alternative für Deutschland Zweitstimmen"] <- "#93ABB5"
btw_verluste$cols_d[btw_verluste$party %in% "UNION Zweitstimmen"] <- "#999999"

Small multiple triangle plots

# loop through the parties and save each plot as an element of a list
plotlist <- list()
for(i in 1:length(unique(btw_verluste$party)))
  local({
    i <- i
    plotdata <- btw_verluste %>% filter(party %in% unique(btw_verluste$party)[i]) %>% 
      arrange(id,order)
    
    # calculate the three coordinates of each triangle
    verlust1 <- btw_verluste %>% filter(party %in% unique(btw_verluste$party)[i]) %>% 
      mutate(y=y+(verlust*10), t_id=1) # top of the triangle, y1
    verlust2 <- btw_verluste %>% filter(party %in% unique(btw_verluste$party)[i]) %>% 
      mutate(x=x-0.1, t_id=2) # left base, x2
    verlust3 <- btw_verluste %>% filter(party %in% unique(btw_verluste$party)[i]) %>% 
      mutate(x=x+0.1, t_id=3) # right base, x3
    # factorize gain and loss compared to 2013
    verlust <- rbind(verlust1,verlust2,verlust3) %>% arrange(id,t_id) %>% 
      mutate(td=ifelse(verlust>=0, "gain", "loss")) %>% 
      mutate(td=as.factor(td))
    
    # save plot as list element
    plotlist[[i]] <<- ggplot(data=plotdata, aes(x=long, y=lat, group=group)) +
      geom_polygon(fill=NA, color=NA) +
      geom_polygon(data=bund_shp, aes(x=long, y=lat, group=group), fill="#F0F0F0", color="#CCCCCC", size=0.5) +
      geom_polygon(data=verlust, mapping=aes(x=x, y=y, group=id, color = NA, fill = factor(td))) +
      # take different color for gain and loss
      scale_fill_manual(values = c("loss"=alpha(unique(plotdata$cols_d), 0.7),
                                   "gain"=alpha(unique(plotdata$cols),0.8))) +
      scale_color_manual(values = c("loss"=alpha(unique(plotdata$cols_d), 0.7),
                                    "gain"=alpha(unique(plotdata$cols),0.8))) +
      guides(size=F, shape=F, fill=F, color=F, alpha=F) +
      theme_void() +
      theme(line = element_blank(),
            text = element_blank(),
            title = element_blank())+
      coord_map()
  })
do.call("grid.arrange", c(plotlist, ncol=3)) # plot the list in a grid