<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href='static/style.xsl' type='text/xsl'?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2026-04-04T06:11:01Z</responseDate><request verb="GetRecord" identifier="oai:www.clarin.si:11356/2006" metadataPrefix="oai_dc">http://www.clarin.si/repository/oai/request</request><GetRecord><record><header><identifier>oai:www.clarin.si:11356/2006</identifier><datestamp>2025-07-08T06:42:01Z</datestamp><setSpec>hdl_11356_1023</setSpec><setSpec>hdl_11356_1024</setSpec></header><metadata><oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:doc="http://www.lyncode.com/xoai" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
<dc:title>Linguistically annotated multilingual comparable corpora of parliamentary debates in English ParlaMint-en.ana 5.0</dc:title>
<dc:creator>Kuzman Pungeršek, Taja</dc:creator>
<dc:creator>Ljubešić, Nikola</dc:creator>
<dc:creator>Erjavec, Tomaž</dc:creator>
<dc:creator>Kopp, Matyáš</dc:creator>
<dc:creator>Ogrodniczuk, Maciej</dc:creator>
<dc:creator>Osenova, Petya</dc:creator>
<dc:creator>Rayson, Paul</dc:creator>
<dc:creator>Vidler, John</dc:creator>
<dc:creator>Agerri, Rodrigo</dc:creator>
<dc:creator>Agirrezabal, Manex</dc:creator>
<dc:creator>Agnoloni, Tommaso</dc:creator>
<dc:creator>Aires, José</dc:creator>
<dc:creator>Albini, Monica</dc:creator>
<dc:creator>Alkorta, Jon</dc:creator>
<dc:creator>Antiba-Cartazo, Iván</dc:creator>
<dc:creator>Arrieta, Ekain</dc:creator>
<dc:creator>Barcala, Mario</dc:creator>
<dc:creator>Bardanca, Daniel</dc:creator>
<dc:creator>Barkarson, Starkaður</dc:creator>
<dc:creator>Bartolini, Roberto</dc:creator>
<dc:creator>Battistoni, Roberto</dc:creator>
<dc:creator>Bel, Nuria</dc:creator>
<dc:creator>Bonet Ramos, Maria del Mar</dc:creator>
<dc:creator>Calzada Pérez, María</dc:creator>
<dc:creator>Cardoso, Aida</dc:creator>
<dc:creator>Çöltekin, Çağrı</dc:creator>
<dc:creator>Coole, Matthew</dc:creator>
<dc:creator>Darģis, Roberts</dc:creator>
<dc:creator>de Does, Jesse</dc:creator>
<dc:creator>de Libano, Ruben</dc:creator>
<dc:creator>Depoorter, Griet</dc:creator>
<dc:creator>Depuydt, Katrien</dc:creator>
<dc:creator>Diwersy, Sascha</dc:creator>
<dc:creator>Dodé, Réka</dc:creator>
<dc:creator>Fernandez, Kike</dc:creator>
<dc:creator>Fernández Rei, Elisa</dc:creator>
<dc:creator>Frontini, Francesca</dc:creator>
<dc:creator>Garcia, Marcos</dc:creator>
<dc:creator>García Díaz, Noelia</dc:creator>
<dc:creator>García Louzao, Pedro</dc:creator>
<dc:creator>Gavriilidou, Maria</dc:creator>
<dc:creator>Gkoumas, Dimitris</dc:creator>
<dc:creator>Grigorov, Ilko</dc:creator>
<dc:creator>Grigorova, Vladislava</dc:creator>
<dc:creator>Haltrup Hansen, Dorte</dc:creator>
<dc:creator>Iruskieta, Mikel</dc:creator>
<dc:creator>Jarlbrink, Johan</dc:creator>
<dc:creator>Jelencsik-Mátyus, Kinga</dc:creator>
<dc:creator>Jongejan, Bart</dc:creator>
<dc:creator>Kahusk, Neeme</dc:creator>
<dc:creator>Kirnbauer, Martin</dc:creator>
<dc:creator>Kryvenko, Anna</dc:creator>
<dc:creator>Ligeti-Nagy, Noémi</dc:creator>
<dc:creator>Luxardo, Giancarlo</dc:creator>
<dc:creator>Magariños, Carmen</dc:creator>
<dc:creator>Magnusson, Måns</dc:creator>
<dc:creator>Marchetti, Carlo</dc:creator>
<dc:creator>Marx, Maarten</dc:creator>
<dc:creator>Meden, Katja</dc:creator>
<dc:creator>Mendes, Amália</dc:creator>
<dc:creator>Mochtak, Michal</dc:creator>
<dc:creator>Mölder, Martin</dc:creator>
<dc:creator>Montemagni, Simonetta</dc:creator>
<dc:creator>Navarretta, Costanza</dc:creator>
<dc:creator>Nitoń, Bartłomiej</dc:creator>
<dc:creator>Norén, Fredrik Mohammadi</dc:creator>
<dc:creator>Nwadukwe, Amanda</dc:creator>
<dc:creator>Ojsteršek, Mihael</dc:creator>
<dc:creator>Pančur, Andrej</dc:creator>
<dc:creator>Papavassiliou, Vassilis</dc:creator>
<dc:creator>Pereira, Rui</dc:creator>
<dc:creator>Pérez Lago, María</dc:creator>
<dc:creator>Piperidis, Stelios</dc:creator>
<dc:creator>Pirker, Hannes</dc:creator>
<dc:creator>Pisani, Marilina</dc:creator>
<dc:creator>Pol, Henk van der</dc:creator>
<dc:creator>Prokopidis, Prokopis</dc:creator>
<dc:creator>Quochi, Valeria</dc:creator>
<dc:creator>Regueira, Xosé Luís</dc:creator>
<dc:creator>Rii, Andriana</dc:creator>
<dc:creator>Rudolf, Michał</dc:creator>
<dc:creator>Ruisi, Manuela</dc:creator>
<dc:creator>Rupnik, Peter</dc:creator>
<dc:creator>Schopper, Daniel</dc:creator>
<dc:creator>Simov, Kiril</dc:creator>
<dc:creator>Sinikallio, Laura</dc:creator>
<dc:creator>Skubic, Jure</dc:creator>
<dc:creator>Tamper, Minna</dc:creator>
<dc:creator>Tungland, Lars Magne</dc:creator>
<dc:creator>Tuominen, Jouni</dc:creator>
<dc:creator>van Heusden, Ruben</dc:creator>
<dc:creator>Varga, Zsófia</dc:creator>
<dc:creator>Vázquez Abuín, Marta</dc:creator>
<dc:creator>Venturi, Giulia</dc:creator>
<dc:creator>Vidal Miguéns, Adrián</dc:creator>
<dc:creator>Vider, Kadri</dc:creator>
<dc:creator>Vivel Couso, Ainhoa</dc:creator>
<dc:creator>Vladu, Adina Ioana</dc:creator>
<dc:creator>Wissik, Tanja</dc:creator>
<dc:creator>Yrjänäinen, Väinö</dc:creator>
<dc:creator>Zevallos, Rodolfo</dc:creator>
<dc:creator>Fišer, Darja</dc:creator>
<dc:subject>Parla-CLARIN</dc:subject>
<dc:subject>parliamentary debates</dc:subject>
<dc:subject>COVID-19</dc:subject>
<dc:subject>TEI</dc:subject>
<dc:subject>Bulgarian Parliament</dc:subject>
<dc:subject>Croatian Parliament</dc:subject>
<dc:subject>Polish Parliament</dc:subject>
<dc:subject>Slovenian Parliament</dc:subject>
<dc:subject>Czech Parliament</dc:subject>
<dc:subject>Icelandic Parliament</dc:subject>
<dc:subject>Belgian Parliament</dc:subject>
<dc:subject>Danish Parliament</dc:subject>
<dc:subject>Dutch Parliament</dc:subject>
<dc:subject>Turkish Parliament</dc:subject>
<dc:subject>Italian Parliament</dc:subject>
<dc:subject>Hungarian Parliament</dc:subject>
<dc:subject>Latvian Parliament</dc:subject>
<dc:subject>French Parliament</dc:subject>
<dc:subject>Bosnian Parliament</dc:subject>
<dc:subject>Catalonian Parliament</dc:subject>
<dc:subject>Galician Parliament</dc:subject>
<dc:subject>Greek Parliament</dc:subject>
<dc:subject>Norwegian Parliament</dc:subject>
<dc:subject>Portugese Parliament</dc:subject>
<dc:subject>Serbian Parliament</dc:subject>
<dc:subject>Swedish Parliament</dc:subject>
<dc:subject>Ukrainian Parliament</dc:subject>
<dc:subject>Austrian Parliament</dc:subject>
<dc:subject>Estonian Parliament</dc:subject>
<dc:subject>Spanish Parliament</dc:subject>
<dc:subject>Finnish Parliament</dc:subject>
<dc:subject>Basque Parliament</dc:subject>
<dc:subject>British Parliament</dc:subject>
<dc:description>ParlaMint-en.ana 5.0 is the English machine translation of the ParlaMint.ana 5.0 (http://hdl.handle.net/11356/2005) set of corpora of parliamentary debates across Europe. The translation keeps the structure and metadata of the original corpora and is linguistically annotated similarly to the original language corpora (but without UD syntax), and with the addition of USAS semantic tags (https://ucrel.lancs.ac.uk/usas/). Because of the addition of semantic tags the UK corpus (ParlaMint-GB) is also included, even though it has, of course, not been machine translated.&#xd;
&#xd;
The translation to English was done with EasyNMT (https://github.com/UKPLab/EasyNMT) using OPUS-MT models (https://github.com/Helsinki-NLP/Opus-MT). Machine translation was done on the sentence level over both speeches and transcriber notes, including headings. Note that corpus metadata is mostly available both in the source language and in English. The linguistic annotation of the speeches, i.e. tokenisation, tagging with UD PoS and morphological features, lemmatisation, and NER annotation was done with Stanza (https://stanfordnlp.github.io/stanza/) using the conll03 model (4 classes). The annotation of MWEs (phrases) and tokens with USAS tags was done with pyMusas (https://github.com/ucrel/pymusas).&#xd;
&#xd;
Note that the English in the corpora contains typical NMT errors, including factual errors even when high fluency is achieved, and any use of this corpus should take the machine translation limitations into account. &#xd;
&#xd;
The files associated with this entry include the machine translated and linguistically annotated corpora in several formats: the corpora in the canonical ParlaMint TEI XML encoding; the corpora in the derived vertical format (for use with CQP-based concordancers, such as CWB, noSketch Engine or KonText); and the corpora in the CoNLL-U format with TSV speech metadata. The CoNLL-U files include pyMusas USAS tags. Also included is the 5.0 release of the sample data and scripts available at the GitHub repository of the ParlaMint project at https://github.com/clarin-eric/ParlaMint and the log files produced in the process of building the corpora for this release. The log files show e.g. known errors in the corpora, while more information about known problems is available in the (open) issues at the GitHub repository of the project.&#xd;
&#xd;
As opposed to the previous version 4.1, this version adds information on the topic of each speech and the sentence-level sentiment for all corpora, changes the IDs of the categories in corpus-specific taxonomies to prevent ID clashes and corrects some other minor errors.</dc:description>
<dc:date>2025-07-08</dc:date>
<dc:type>corpus</dc:type>
<dc:identifier>http://hdl.handle.net/11356/2006</dc:identifier>
<dc:language>eng</dc:language>
<dc:relation>https://doi.org/10.1007/s10579-024-09798-w</dc:relation>
<dc:relation>http://hdl.handle.net/11356/1910</dc:relation>
<dc:rights>Creative Commons - Attribution 4.0 International (CC BY 4.0)</dc:rights>
<dc:rights>https://creativecommons.org/licenses/by/4.0/</dc:rights>
<dc:rights>PUB</dc:rights>
<dc:format>text/plain; charset=utf-8</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>downloadable_files_count: 31</dc:format>
<dc:publisher>CLARIN ERIC</dc:publisher>
<dc:source>https://www.clarin.eu/content/parlamint</dc:source>
</oai_dc:dc>
</metadata></record></GetRecord></OAI-PMH>