<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href='static/style.xsl' type='text/xsl'?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2026-05-22T00:49:04Z</responseDate><request verb="GetRecord" identifier="oai:www.clarin.si:11356/1845" metadataPrefix="oai_dc">http://www.clarin.si/repository/oai/request</request><GetRecord><record><header><identifier>oai:www.clarin.si:11356/1845</identifier><datestamp>2024-05-06T21:43:56Z</datestamp><setSpec>hdl_11356_1023</setSpec><setSpec>hdl_11356_1024</setSpec></header><metadata><oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:doc="http://www.lyncode.com/xoai" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
<dc:title>Parliamentary corpus of first Yugoslavia (1919-1939) yu1Parl 1.0</dc:title>
<dc:creator>Kavčič, Alenka</dc:creator>
<dc:creator>Mundjar, Aleksander</dc:creator>
<dc:creator>Marolt, Matija</dc:creator>
<dc:subject>National Representation of Kingdom of Yugoslavia</dc:subject>
<dc:subject>parliamentary debates</dc:subject>
<dc:subject>TEI</dc:subject>
<dc:subject>Parla-CLARIN</dc:subject>
<dc:description>The corpus contains meeting proceedings of the National Representation of the Kingdom of Yugoslavia from 1919 to 1939 (Zbirka stenografskih beležk, zapisnikov sej predstavništev, senata in skupščine Kraljevine Jugoslavije 1919-1939), in particular:&#xd;
- Temporary National Representation of the Kingdom of Serbs, Croats, and Slovenes (1919-1920)&#xd;
- Legislative Committee of National Assembly of the Kingdom of Serbs, Croats, and Slovenes (1921-1922)&#xd;
- National Representation (National Assembly and Senate) of the Kingdom of Yugoslavia (1931-1939)&#xd;
&#xd;
The meeting proceedings of the National Assembly of the Kingdom of Serbs, Croats, and Slovenes between years 1923 and 1928 are not available and therefore not included in the corpus.&#xd;
&#xd;
The corpus comprises 714 sessions (15403 pages, approximately 13 million words).&#xd;
&#xd;
The source data (scanned images of printed Stenographic Minutes) come from the History of Slovenia - SIstory (https://www.sistory.si) portal. The images were OCR processed and the results saved as pdf, docx and txt. The documents are multilingual, in Serbo-Croatian and Slovenian, depending on the speaker. Serbo-Croatian is typeset in the Cyrillic (Serbian) or in the Latin (Croatian) alphabet.&#xd;
&#xd;
The documents were automatically processed and the following data extracted: titles, agenda, attending, start and end of the session, speakers, and comments. Lingua (https://github.com/pemistahl/lingua-py) was used for language detection on the sentence level. Roughly 59% of sentences are in Serbian (Cyrillic script), 38% in Croatian (Latin script) and 3% in Slovenian. Some sentences in German and French were also detected. Linguistic annotation (tokenisation, MSD tagging and lemmatisation) was added using CLASSLA (https://github.com/clarinsi/classla) for Serbian, Croatian and Slovenian. Words in Serbian (Cyrillic script) have lemmas in Latin script. &#xd;
&#xd;
The documents are in the Parla-CLARIN (https://github.com/clarin-eric/parla-clarin) compliant TEI XML format. Each session in one file.</dc:description>
<dc:date>2023-07-15</dc:date>
<dc:type>corpus</dc:type>
<dc:identifier>http://hdl.handle.net/11356/1845</dc:identifier>
<dc:language>srp</dc:language>
<dc:language>hrv</dc:language>
<dc:language>slv</dc:language>
<dc:rights>Creative Commons - Attribution 4.0 International (CC BY 4.0)</dc:rights>
<dc:rights>https://creativecommons.org/licenses/by/4.0/</dc:rights>
<dc:rights>PUB</dc:rights>
<dc:format>text/plain; charset=utf-8</dc:format>
<dc:format>application/zip</dc:format>
<dc:format>application/zip</dc:format>
<dc:format>application/zip</dc:format>
<dc:format>downloadable_files_count: 3</dc:format>
<dc:publisher>Faculty of Computer and Information Science, University of Ljubljana</dc:publisher>
<dc:source>https://www.inz.si/en/dihur/</dc:source>
</oai_dc:dc>
</metadata></record></GetRecord></OAI-PMH>