# Supplemental material to: # Swimming with the Tide? Positional Claim Detection across Political Text Types. # In: Proceedings of the NLP+CSS workshop. Online, 2020. Accepted for publication # Nico Blokker, Erenay Dayanik, Gabriella Lapesa and Sebastian Padó. # Please cite the MARPOR project if you re-use the texts (see below): # Volkens, Andrea / Burst, Tobias / Krause, Werner / Lehmann, Pola / # Matthieß Theres / Merz, Nicolas / Regel, Sven / Weßels, Bernhard / # Zehnter, Lisa (2020): # The Manifesto Data Collection. Manifesto Project (MRG/CMP/MARPOR). Version 2020a. # Berlin: Wissenschaftszentrum Berlin für Sozialforschung (WZB). # https://doi.org/10.25522/manifesto.mpds.2020a # In order to download the texts you need access to the API of the MARPOR-Project # (https://manifestoproject.wzb.eu/information/documents/api) and an API-Key # (register here: https://manifestoproject.wzb.eu/login). # In the following script we use the R package 'manifestoR' # (https://manifestoproject.wzb.eu/information/documents/manifestoR) # to download the data. # For further information regarding the coding scheme refer to Lapesa et al. 2020: # hdl.handle.net/11022/1007-0000-0007-DB07-B # 0 preparation ----------------------------------------------------------- library(manifestoR) # version 1.4.0 library(dplyr) # version 1.0.0 # 1 load annotations ------------------------------------------------------ results <- readRDS("annotations.rds") # annotations sections <- readRDS("chapter_indices.rds") # corresponding chapters # 2 download raw text from MARPOR project --------------------------------- API_key <- "0123456789" # ENTER API KEY HERE mp_setapikey(key = API_key) # select data programs <- mp_corpus(countryname == "Germany" & edate > as.Date("2012-12-31") & edate < as.Date("2017-12-31") & party %in% c("41113", "41223", "41320", "41521", "41953")) # re-structure into data.frame lookup <- function(x){ dictionary <- c("41113" = "green", "41223" = "left", "41320" = "spd", "41420" = "fdp", "41521" = "cdu", "41952" = "pirates", "41953" = "afd") title <- names(programs[x]) party <- dictionary[gsub("_\\d+", "", title)] year <- gsub("\\d+_(\\d{4}).*", "\\1", title) df <- data.frame( party = party, year = year, quote = content(programs[[x]]), stringsAsFactors = F, row.names = NULL) return(df) } l <- lapply(1:length(programs), lookup) df <- do.call(rbind, l) %>% arrange(quote) # 3 merge with annotations -------------------------------------------------- results <- results %>% arrange(order_quote) %>% mutate(quote2 = df$quote) results <- results %>% arrange(fixed) results <- results[sections, ] %>% select(-fixed, - order_quote) results$claimvalues[563] <- "-401|402|999" # spd not opposing refugee protection, mislabeled View(results) # the resulting data.frame contains 5 variables: # party: party name # year: election year # claimvalues: annotated claim-category or claim-categories (separated by "|"). # Negative polarity indicated by "-". # detected: logical, whether our models identified the text snippet as claim # quote2: the text snippet