<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href='static/style.xsl' type='text/xsl'?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2026-04-04T11:31:21Z</responseDate><request verb="GetRecord" identifier="oai:www.clarin.si:11356/1810" metadataPrefix="oai_dc">http://www.clarin.si/repository/oai/request</request><GetRecord><record><header><identifier>oai:www.clarin.si:11356/1810</identifier><datestamp>2024-05-06T21:43:10Z</datestamp><setSpec>hdl_11356_1023</setSpec><setSpec>hdl_11356_1024</setSpec></header><metadata><oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:doc="http://www.lyncode.com/xoai" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
<dc:title>Linguistically annotated multilingual comparable corpora of parliamentary debates in English ParlaMint-en.ana 3.0</dc:title>
<dc:creator>Kuzman, Taja</dc:creator>
<dc:creator>Ljubešić, Nikola</dc:creator>
<dc:creator>Erjavec, Tomaž</dc:creator>
<dc:creator>Kopp, Matyáš</dc:creator>
<dc:creator>Ogrodniczuk, Maciej</dc:creator>
<dc:creator>Osenova, Petya</dc:creator>
<dc:creator>Fišer, Darja</dc:creator>
<dc:creator>Pirker, Hannes</dc:creator>
<dc:creator>Wissik, Tanja</dc:creator>
<dc:creator>Schopper, Daniel</dc:creator>
<dc:creator>Kirnbauer, Martin</dc:creator>
<dc:creator>Mochtak, Michal</dc:creator>
<dc:creator>Rupnik, Peter</dc:creator>
<dc:creator>Pol, Henk van der</dc:creator>
<dc:creator>Depoorter, Griet</dc:creator>
<dc:creator>de Does, Jesse</dc:creator>
<dc:creator>Simov, Kiril</dc:creator>
<dc:creator>Grigorova, Vladislava</dc:creator>
<dc:creator>Grigorov, Ilko</dc:creator>
<dc:creator>Jongejan, Bart</dc:creator>
<dc:creator>Haltrup Hansen, Dorte</dc:creator>
<dc:creator>Navarretta, Costanza</dc:creator>
<dc:creator>Mölder, Martin</dc:creator>
<dc:creator>Kahusk, Neeme</dc:creator>
<dc:creator>Vider, Kadri</dc:creator>
<dc:creator>Bel, Nuria</dc:creator>
<dc:creator>Antiba-Cartazo, Iván</dc:creator>
<dc:creator>Pisani, Marilina</dc:creator>
<dc:creator>Zevallos, Rodolfo</dc:creator>
<dc:creator>Regueira, Xosé Luís</dc:creator>
<dc:creator>Vladu, Adina Ioana</dc:creator>
<dc:creator>Magariños, Carmen</dc:creator>
<dc:creator>Bardanca, Daniel</dc:creator>
<dc:creator>Barcala, Mario</dc:creator>
<dc:creator>Garcia, Marcos</dc:creator>
<dc:creator>Pérez Lago, María</dc:creator>
<dc:creator>García Louzao, Pedro</dc:creator>
<dc:creator>Vivel Couso, Ainhoa</dc:creator>
<dc:creator>Vázquez Abuín, Marta</dc:creator>
<dc:creator>García Díaz, Noelia</dc:creator>
<dc:creator>Vidal Miguéns, Adrián</dc:creator>
<dc:creator>Fernández Rei, Elisa</dc:creator>
<dc:creator>Diwersy, Sascha</dc:creator>
<dc:creator>Luxardo, Giancarlo</dc:creator>
<dc:creator>Coole, Matthew</dc:creator>
<dc:creator>Rayson, Paul</dc:creator>
<dc:creator>Nwadukwe, Amanda</dc:creator>
<dc:creator>Gkoumas, Dimitris</dc:creator>
<dc:creator>Papavassiliou, Vassilis</dc:creator>
<dc:creator>Prokopidis, Prokopis</dc:creator>
<dc:creator>Gavriilidou, Maria</dc:creator>
<dc:creator>Piperidis, Stelios</dc:creator>
<dc:creator>Ligeti-Nagy, Noémi</dc:creator>
<dc:creator>Jelencsik-Mátyus, Kinga</dc:creator>
<dc:creator>Varga, Zsófia</dc:creator>
<dc:creator>Dodé, Réka</dc:creator>
<dc:creator>Barkarson, Starkaður</dc:creator>
<dc:creator>Agnoloni, Tommaso</dc:creator>
<dc:creator>Bartolini, Roberto</dc:creator>
<dc:creator>Frontini, Francesca</dc:creator>
<dc:creator>Montemagni, Simonetta</dc:creator>
<dc:creator>Quochi, Valeria</dc:creator>
<dc:creator>Venturi, Giulia</dc:creator>
<dc:creator>Ruisi, Manuela</dc:creator>
<dc:creator>Marchetti, Carlo</dc:creator>
<dc:creator>Battistoni, Roberto</dc:creator>
<dc:creator>Darģis, Roberts</dc:creator>
<dc:creator>van Heusden, Ruben</dc:creator>
<dc:creator>Marx, Maarten</dc:creator>
<dc:creator>Depuydt, Katrien</dc:creator>
<dc:creator>Tungland, Lars Magne</dc:creator>
<dc:creator>Rudolf, Michał</dc:creator>
<dc:creator>Nitoń, Bartłomiej</dc:creator>
<dc:creator>Aires, José</dc:creator>
<dc:creator>Mendes, Amália</dc:creator>
<dc:creator>Cardoso, Aida</dc:creator>
<dc:creator>Pereira, Rui</dc:creator>
<dc:creator>Yrjänäinen, Väinö</dc:creator>
<dc:creator>Norén, Fredrik Mohammadi</dc:creator>
<dc:creator>Magnusson, Måns</dc:creator>
<dc:creator>Jarlbrink, Johan</dc:creator>
<dc:creator>Meden, Katja</dc:creator>
<dc:creator>Pančur, Andrej</dc:creator>
<dc:creator>Ojsteršek, Mihael</dc:creator>
<dc:creator>Çöltekin, Çağrı</dc:creator>
<dc:creator>Kryvenko, Anna</dc:creator>
<dc:subject>Parla-CLARIN</dc:subject>
<dc:subject>parliamentary debates</dc:subject>
<dc:subject>COVID-19</dc:subject>
<dc:subject>TEI</dc:subject>
<dc:subject>Bulgarian Parliament</dc:subject>
<dc:subject>Croatian Parliament</dc:subject>
<dc:subject>Polish Parliament</dc:subject>
<dc:subject>Slovenian Parliament</dc:subject>
<dc:subject>Czech Parliament</dc:subject>
<dc:subject>Icelandic Parliament</dc:subject>
<dc:subject>Belgian Parliament</dc:subject>
<dc:subject>Danish Parliament</dc:subject>
<dc:subject>Dutch Parliament</dc:subject>
<dc:subject>Turkish Parliament</dc:subject>
<dc:subject>Italian Parliament</dc:subject>
<dc:subject>Hungarian Parliament</dc:subject>
<dc:subject>Latvian Parliament</dc:subject>
<dc:subject>French Parliament</dc:subject>
<dc:subject>Bosnian Parliament</dc:subject>
<dc:subject>Catalonian Parliament</dc:subject>
<dc:subject>Galician Parliament</dc:subject>
<dc:subject>Greek Parliament</dc:subject>
<dc:subject>Norwegian Parliament</dc:subject>
<dc:subject>Portuguese Parliament</dc:subject>
<dc:subject>Serbian Parliament</dc:subject>
<dc:subject>Swedish Parliament</dc:subject>
<dc:subject>Ukrainian Parliament</dc:subject>
<dc:subject>Austrian Parliament</dc:subject>
<dc:subject>Estonian Parliament</dc:subject>
<dc:description>ParlaMint-en 3.0 comprises linguistically annotated multilingual comparable corpora of parliamentary debates ParlaMint.ana 3.0 (http://hdl.handle.net/11356/1488) which were machine translated to English and the translation linguistically annotated.&#xd;
&#xd;
Except for the translation to English, small changes in the metadata and the absence of the British parliament corpus, the corpora included in this entry are all respects identical to the source language corpora, i.e. the entry  comprises the same 26 European parliamentary corpora, together with over 1.1 billion words.&#xd;
&#xd;
The translation to English was done with EasyNMT (https://github.com/UKPLab/EasyNMT) with OPUS-MT models (https://github.com/Helsinki-NLP/Opus-MT). Machine translation was done on the sentence level, and includes both speeches and transcriber notes, including headings. The linguistic annotation of the speeches, i.e. tokenisation, tagging with UD PoS and morphological features, lemmatisation, and NER annotation was done with Stanza (https://stanfordnlp.github.io/stanza/), using the English language model. For NER the conll03 model with 4 NE classes was used.&#xd;
&#xd;
Note that the automatically produced translation to English contains errors typical of neural machine translation, which also includes factual errors even when a high level of fluency is achieved, and any manual or automatic usage of this corpus should take the machine translation limitations into account. Note also that some metadata errors were noticed after the source 3.0 corpora were released, and were corrected for the MTed corpus, so there are slight differences in the metadata between the two.&#xd;
&#xd;
The files associated with this entry include the linguistically annotated corpora in several formats: the corpora in thje canonical ParlaMint TEI XML encoding; the corpora in the derived vertical format (for use with CQP-based concordancers, such as CWB, noSketch Engine or KonText); and the corproa in the CoNLL-U format with TSV speech metadata. In contrast to the source language corpora, the CoNLL-U files are not derived from the TEI encoded corpus but are the ones output by the machine translation and linguistic annotation pipeline as these also contain word-alignment information, which is not present in the TEI version. Also included is the ParlaMint-en-3.0 release of the scripts and samples available at the GitHub repository of the ParlaMint project.</dc:description>
<dc:date>2023-08-10</dc:date>
<dc:type>corpus</dc:type>
<dc:identifier>http://hdl.handle.net/11356/1810</dc:identifier>
<dc:language>eng</dc:language>
<dc:relation>http://hdl.handle.net/11356/1864</dc:relation>
<dc:rights>Creative Commons - Attribution 4.0 International (CC BY 4.0)</dc:rights>
<dc:rights>https://creativecommons.org/licenses/by/4.0/</dc:rights>
<dc:rights>PUB</dc:rights>
<dc:format>text/plain; charset=utf-8</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/gzip</dc:format>
<dc:format>downloadable_files_count: 26</dc:format>
<dc:publisher>CLARIN ERIC</dc:publisher>
<dc:source>https://www.clarin.eu/parlamint</dc:source>
</oai_dc:dc>
</metadata></record></GetRecord></OAI-PMH>