<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href='static/style.xsl' type='text/xsl'?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2026-06-25T05:29:11Z</responseDate><request verb="GetRecord" identifier="oai:www.clarin.si:11356/1840" metadataPrefix="oai_dc">http://www.clarin.si/repository/oai/request</request><GetRecord><record><header><identifier>oai:www.clarin.si:11356/1840</identifier><datestamp>2023-05-19T13:18:13Z</datestamp><setSpec>hdl_11356_1023</setSpec><setSpec>hdl_11356_1024</setSpec></header><metadata><oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:doc="http://www.lyncode.com/xoai" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
<dc:title>Slovenian Definition Extraction training dataset DF_NDF_wiki_slo 1.0</dc:title>
<dc:creator>Podpečan, Vid</dc:creator>
<dc:creator>Pollak, Senja</dc:creator>
<dc:creator>Fišer, Darja</dc:creator>
<dc:creator>Vintar, Špela</dc:creator>
<dc:creator>Tran, Thi Hong Hanh</dc:creator>
<dc:subject>definitions</dc:subject>
<dc:subject>Wikipedia</dc:subject>
<dc:subject>definition extraction</dc:subject>
<dc:description>The Slovenian definition extraction training dataset DF_NDF_wiki_slo contains 38613 sentences extracted from the Slovenian Wikipedia. The first sentence of a term's description on Wikipedia is considered a definition, and all other sentences are considered non-definitions.&#xd;
&#xd;
The corpus consists of the following files each containing one definition / non-definition sentence per line:&#xd;
&#xd;
1. Definitions: df_ndf_wiki_slo_Y.txt with 3251 definition sentences.&#xd;
2. Non-definitions: df_ndf_wiki_slo_N.txt with 14678 non-definition sentences which do not contain the term at the beginning of the sentence.&#xd;
3. Non-definitions: df_ndf_wiki_slo_N1.txt with 20684 non-definition sentences which may also contain the term at the beginning of the sentence.&#xd;
&#xd;
The dataset is described in more detail in Fišer et al. 2010. If you use this resource, please cite: &#xd;
&#xd;
Fišer, D.,  Pollak, S., Vintar, Š. (2010). Learning to Mine Definitions from Slovene Structured and Unstructured Knowledge-Rich Resources. Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC'10).  https://aclanthology.org/L10-1089/&#xd;
&#xd;
Reference to training Transformer-based definition extraction models using this dataset:&#xd;
Tran, T.H.H., Podpečan, V., Jemec Tomazin, M., Pollak, Senja (2023). Definition Extraction for Slovene: Patterns, Transformer Classifiers and ChatGPT. Proceedings of the ELEX 2023: Electronic lexicography in the 21st century. Invisible lexicography: everywhere lexical data is used without users realizing they make use of a “dictionary”.&#xd;
&#xd;
Related resources:&#xd;
Jemec Tomazin, M. et al. (2023). Slovenian Definition Extraction evaluation datasets RSDO-def 1.0, Slovenian language resource repository CLARIN.SI,  http://hdl.handle.net/11356/1841</dc:description>
<dc:date>2023-05-19</dc:date>
<dc:type>corpus</dc:type>
<dc:identifier>http://hdl.handle.net/11356/1840</dc:identifier>
<dc:language>slv</dc:language>
<dc:relation>https://aclanthology.org/L10-1089/</dc:relation>
<dc:rights>Creative Commons - Attribution-ShareAlike 4.0 International (CC BY-SA 4.0)</dc:rights>
<dc:rights>https://creativecommons.org/licenses/by-sa/4.0/</dc:rights>
<dc:rights>PUB</dc:rights>
<dc:format>text/plain; charset=utf-8</dc:format>
<dc:format>text/plain</dc:format>
<dc:format>text/plain</dc:format>
<dc:format>text/plain</dc:format>
<dc:format>downloadable_files_count: 3</dc:format>
<dc:publisher>Jožef Stefan Institute</dc:publisher>
<dc:publisher>Faculty of Arts, University of Ljubljana</dc:publisher>
</oai_dc:dc>
</metadata></record></GetRecord></OAI-PMH>