<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href='static/style.xsl' type='text/xsl'?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2026-04-04T07:40:05Z</responseDate><request verb="GetRecord" identifier="oai:www.clarin.si:11356/2004" metadataPrefix="oai_dc">http://www.clarin.si/repository/oai/request</request><GetRecord><record><header><identifier>oai:www.clarin.si:11356/2004</identifier><datestamp>2026-02-01T16:56:35Z</datestamp><setSpec>hdl_11356_1023</setSpec><setSpec>hdl_11356_1024</setSpec></header><metadata><oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:doc="http://www.lyncode.com/xoai" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
<dc:title>Multilingual comparable corpora of parliamentary debates ParlaMint 5.0</dc:title>
<dc:creator>Erjavec, Tomaž</dc:creator>
<dc:creator>Kopp, Matyáš</dc:creator>
<dc:creator>Kuzman Pungeršek, Taja</dc:creator>
<dc:creator>Ljubešić, Nikola</dc:creator>
<dc:creator>Ogrodniczuk, Maciej</dc:creator>
<dc:creator>Osenova, Petya</dc:creator>
<dc:creator>Agirrezabal, Manex</dc:creator>
<dc:creator>Agnoloni, Tommaso</dc:creator>
<dc:creator>Aires, José</dc:creator>
<dc:creator>Albini, Monica</dc:creator>
<dc:creator>Alkorta, Jon</dc:creator>
<dc:creator>Antiba-Cartazo, Iván</dc:creator>
<dc:creator>Arrieta, Ekain</dc:creator>
<dc:creator>Barcala, Mario</dc:creator>
<dc:creator>Bardanca, Daniel</dc:creator>
<dc:creator>Barkarson, Starkaður</dc:creator>
<dc:creator>Bartolini, Roberto</dc:creator>
<dc:creator>Battistoni, Roberto</dc:creator>
<dc:creator>Bel, Nuria</dc:creator>
<dc:creator>Bonet Ramos, Maria del Mar</dc:creator>
<dc:creator>Calzada Pérez, María</dc:creator>
<dc:creator>Cardoso, Aida</dc:creator>
<dc:creator>Çöltekin, Çağrı</dc:creator>
<dc:creator>Coole, Matthew</dc:creator>
<dc:creator>Darģis, Roberts</dc:creator>
<dc:creator>de Libano, Ruben</dc:creator>
<dc:creator>Depoorter, Griet</dc:creator>
<dc:creator>Diwersy, Sascha</dc:creator>
<dc:creator>Dodé, Réka</dc:creator>
<dc:creator>Fernandez, Kike</dc:creator>
<dc:creator>Fernández Rei, Elisa</dc:creator>
<dc:creator>Frontini, Francesca</dc:creator>
<dc:creator>Garcia, Marcos</dc:creator>
<dc:creator>García Díaz, Noelia</dc:creator>
<dc:creator>García Louzao, Pedro</dc:creator>
<dc:creator>Gavriilidou, Maria</dc:creator>
<dc:creator>Gkoumas, Dimitris</dc:creator>
<dc:creator>Grigorov, Ilko</dc:creator>
<dc:creator>Grigorova, Vladislava</dc:creator>
<dc:creator>Haltrup Hansen, Dorte</dc:creator>
<dc:creator>Iruskieta, Mikel</dc:creator>
<dc:creator>Jarlbrink, Johan</dc:creator>
<dc:creator>Jelencsik-Mátyus, Kinga</dc:creator>
<dc:creator>Jongejan, Bart</dc:creator>
<dc:creator>Kahusk, Neeme</dc:creator>
<dc:creator>Kirnbauer, Martin</dc:creator>
<dc:creator>Kryvenko, Anna</dc:creator>
<dc:creator>Ligeti-Nagy, Noémi</dc:creator>
<dc:creator>Luxardo, Giancarlo</dc:creator>
<dc:creator>Magariños, Carmen</dc:creator>
<dc:creator>Magnusson, Måns</dc:creator>
<dc:creator>Marchetti, Carlo</dc:creator>
<dc:creator>Marx, Maarten</dc:creator>
<dc:creator>Meden, Katja</dc:creator>
<dc:creator>Mendes, Amália</dc:creator>
<dc:creator>Mochtak, Michal</dc:creator>
<dc:creator>Mölder, Martin</dc:creator>
<dc:creator>Montemagni, Simonetta</dc:creator>
<dc:creator>Navarretta, Costanza</dc:creator>
<dc:creator>Nitoń, Bartłomiej</dc:creator>
<dc:creator>Norén, Fredrik Mohammadi</dc:creator>
<dc:creator>Nwadukwe, Amanda</dc:creator>
<dc:creator>Ojsteršek, Mihael</dc:creator>
<dc:creator>Pančur, Andrej</dc:creator>
<dc:creator>Papavassiliou, Vassilis</dc:creator>
<dc:creator>Pereira, Rui</dc:creator>
<dc:creator>Pérez Lago, María</dc:creator>
<dc:creator>Piperidis, Stelios</dc:creator>
<dc:creator>Pirker, Hannes</dc:creator>
<dc:creator>Pisani, Marilina</dc:creator>
<dc:creator>Pol, Henk van der</dc:creator>
<dc:creator>Prokopidis, Prokopis</dc:creator>
<dc:creator>Quochi, Valeria</dc:creator>
<dc:creator>Rayson, Paul</dc:creator>
<dc:creator>Regueira, Xosé Luís</dc:creator>
<dc:creator>Rii, Andriana</dc:creator>
<dc:creator>Rudolf, Michał</dc:creator>
<dc:creator>Ruisi, Manuela</dc:creator>
<dc:creator>Rupnik, Peter</dc:creator>
<dc:creator>Schopper, Daniel</dc:creator>
<dc:creator>Simov, Kiril</dc:creator>
<dc:creator>Sinikallio, Laura</dc:creator>
<dc:creator>Skubic, Jure</dc:creator>
<dc:creator>Tungland, Lars Magne</dc:creator>
<dc:creator>Tuominen, Jouni</dc:creator>
<dc:creator>van Heusden, Ruben</dc:creator>
<dc:creator>Varga, Zsófia</dc:creator>
<dc:creator>Vázquez Abuín, Marta</dc:creator>
<dc:creator>Venturi, Giulia</dc:creator>
<dc:creator>Vidal Miguéns, Adrián</dc:creator>
<dc:creator>Vider, Kadri</dc:creator>
<dc:creator>Vivel Couso, Ainhoa</dc:creator>
<dc:creator>Vladu, Adina Ioana</dc:creator>
<dc:creator>Wissik, Tanja</dc:creator>
<dc:creator>Yrjänäinen, Väinö</dc:creator>
<dc:creator>Zevallos, Rodolfo</dc:creator>
<dc:creator>Fišer, Darja</dc:creator>
<dc:subject>parliamentary debates</dc:subject>
<dc:subject>COVID-19</dc:subject>
<dc:subject>TEI</dc:subject>
<dc:subject>Parla-CLARIN</dc:subject>
<dc:subject>Czech Parliament</dc:subject>
<dc:subject>Icelandic Parliament</dc:subject>
<dc:subject>Belgian Parliament</dc:subject>
<dc:subject>Danish Parliament</dc:subject>
<dc:subject>Dutch Parliament</dc:subject>
<dc:subject>Turkish Parliament</dc:subject>
<dc:subject>Italian Parliament</dc:subject>
<dc:subject>Hungarian Parliament</dc:subject>
<dc:subject>Latvian Parliament</dc:subject>
<dc:subject>Bulgarian Parliament</dc:subject>
<dc:subject>Croatian Parliament</dc:subject>
<dc:subject>Polish Parliament</dc:subject>
<dc:subject>Slovenian Parliament</dc:subject>
<dc:subject>French Parliament</dc:subject>
<dc:subject>Austrian Parliament</dc:subject>
<dc:subject>Bosnian Parliament</dc:subject>
<dc:subject>Catalonian Parliament</dc:subject>
<dc:subject>Galician Parliament</dc:subject>
<dc:subject>Greek Parliament</dc:subject>
<dc:subject>Norwegian Parliament</dc:subject>
<dc:subject>Serbian Parliament</dc:subject>
<dc:subject>Swedish Parliament</dc:subject>
<dc:subject>Ukrainian Parliament</dc:subject>
<dc:subject>Finnish Parliament</dc:subject>
<dc:subject>Spanish Parliament</dc:subject>
<dc:subject>Estonian Parliament</dc:subject>
<dc:subject>Basque Parliament</dc:subject>
<dc:subject>Portuguese Parliament</dc:subject>
<dc:subject>UK Parliament</dc:subject>
<dc:description>ParlaMint 5.0 is a set of comparable corpora containing transcriptions of parliamentary debates of 29 European countries and autonomous regions, mostly starting in 2015 and extending to mid-2022. The individual corpora comprise between 9 and 126 million words and the complete set contains over 1.2 billion words.&#xd;
&#xd;
The transcriptions are divided by days with information on the term, session and meeting, and contain speeches marked by the speaker and their role (e.g. chair, regular speaker) as well as by their automatically assigned CAP (Comparative Agendas Project) top level topic. The speeches also contain marked-up transcriber comments, such as gaps in the transcription, interruptions, applause, etc. &#xd;
&#xd;
The corpora have extensive metadata, most importantly on speakers (name, gender, MP and minister status, party affiliation), on their political parties and parliamentary groups (name, coalition/opposition status, Wikipedia-sourced left-to-right political orientation, and CHES variables, https://www.chesdata.eu/). Note that some corpora have further metadata, e.g. the year of birth of the speakers, links to their Wikipedia articles, their membership in various committees, etc. The transcriptions are also marked with the subcorpora they belong to ("reference", until 2020-01-30, "covid", from 2020-01-31, and "war", from 2022-02-24). &#xd;
&#xd;
An overview of the statistics of the corpora is available on GitHub in the folder Build/Metadata, in particular for the release 5.0 at https://github.com/clarin-eric/ParlaMint/tree/v5.0/Build/Metadata.&#xd;
&#xd;
The corpora are encoded according to the ParlaMint encoding guidelines (https://clarin-eric.github.io/ParlaMint/) and schemas (included in the distribution).&#xd;
&#xd;
This entry contains the ParlaMint TEI-encoded corpora and their derived plain text versions along with TSV metadata of the speeches. Also included is the 5.0 release of the sample data and scripts available at the GitHub repository of the ParlaMint project at https://github.com/clarin-eric/ParlaMint.&#xd;
&#xd;
Note that there also exists the linguistically marked-up version of the 5.0 ParlaMint corpus (http://hdl.handle.net/11356/2005) as well as a version machine translated to English (http://hdl.handle.net/11356/2006). Both are linked with CLARIN.SI concordancers for on-line analysis. Furthermore, a version of the ParlaMint 5.0 metadata, topic and sentiment annotations formatted as TSV is available from the CROSSDA archive at https://doi.org/10.23669/1ZTELP.&#xd;
&#xd;
As opposed to the previous version 4.1, this version adds information on the topic of each speech for all corpora, changes the IDs of the categories in corpus-specific taxonomies to prevent ID clashes and corrects some other minor errors.</dc:description>
<dc:date>2025-07-08</dc:date>
<dc:type>corpus</dc:type>
<dc:identifier>http://hdl.handle.net/11356/2004</dc:identifier>
<dc:language>bul</dc:language>
<dc:language>hrv</dc:language>
<dc:language>pol</dc:language>
<dc:language>slv</dc:language>
<dc:language>ces</dc:language>
<dc:language>isl</dc:language>
<dc:language>fra</dc:language>
<dc:language>nld</dc:language>
<dc:language>dan</dc:language>
<dc:language>spa</dc:language>
<dc:language>tur</dc:language>
<dc:language>eng</dc:language>
<dc:language>ita</dc:language>
<dc:language>hun</dc:language>
<dc:language>lav</dc:language>
<dc:language>bos</dc:language>
<dc:language>cat</dc:language>
<dc:language>deu</dc:language>
<dc:language>ell</dc:language>
<dc:language>est</dc:language>
<dc:language>por</dc:language>
<dc:language>srp</dc:language>
<dc:language>swe</dc:language>
<dc:language>ukr</dc:language>
<dc:language>nor</dc:language>
<dc:language>glg</dc:language>
<dc:language>rus</dc:language>
<dc:language>fin</dc:language>
<dc:language>eus</dc:language>
<dc:relation>https://doi.org/10.1007/s10579-024-09798-w</dc:relation>
<dc:relation>http://hdl.handle.net/11356/1912</dc:relation>
<dc:rights>Creative Commons - Attribution 4.0 International (CC BY 4.0)</dc:rights>
<dc:rights>PUB</dc:rights>
<dc:rights>https://creativecommons.org/licenses/by/4.0/</dc:rights>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>text/plain; charset=utf-8</dc:format>
<dc:format>downloadable_files_count: 31</dc:format>
<dc:publisher>CLARIN ERIC</dc:publisher>
<dc:source>https://www.clarin.eu/content/parlamint</dc:source>
</oai_dc:dc>
</metadata></record></GetRecord></OAI-PMH>