<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href='static/style.xsl' type='text/xsl'?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2026-05-21T21:50:34Z</responseDate><request verb="GetRecord" identifier="oai:www.clarin.si:11356/1983" metadataPrefix="oai_dc">http://www.clarin.si/repository/oai/request</request><GetRecord><record><header><identifier>oai:www.clarin.si:11356/1983</identifier><datestamp>2024-11-06T16:16:45Z</datestamp><setSpec>hdl_11356_1023</setSpec><setSpec>hdl_11356_1024</setSpec></header><metadata><oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:doc="http://www.lyncode.com/xoai" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
<dc:title>Dataset of Slovene medical texts PoVeJMo-VeMo-Med 1.0</dc:title>
<dc:creator>Malenšek, Miha</dc:creator>
<dc:creator>Bajec, Marko</dc:creator>
<dc:subject>specialised corpus</dc:subject>
<dc:subject>medical texts</dc:subject>
<dc:subject>large language models</dc:subject>
<dc:description>PoVeJMo-VeMo-Med is a dataset containing Slovene medical texts. The bulk of it is comprised of instructions of use for different prescribed drugs. The texts were extracted from the Slovene Central Drug Database (Centralna baza zdravil; http://www.cbz.si/), with a minority of documents from the National Institute of Public Health (Nacionalni inštitut za javno zdravje; https://nijz.si/). The documents were converted from PDF-files to text format. The dataset can be used to fine-tune large language models for the medical domain.&#xd;
&#xd;
Version 1.0 contains two subversions of the corpus: the original (with 17,701 texts) and the deduplicated version (with 5,841 texts), in which duplicate texts have been removed.&#xd;
&#xd;
Please note that this dataset was also the basis for the automatic generation of the Slovene instruction-following dataset for large language models GaMS-Instruct-MED 1.0 (http://hdl.handle.net/11356/1982). For more information on how the two are related, please consult the entry for GaMS-Instruct-MED 1.0.</dc:description>
<dc:date>2024-09-25</dc:date>
<dc:type>corpus</dc:type>
<dc:identifier>http://hdl.handle.net/11356/1983</dc:identifier>
<dc:language>slv</dc:language>
<dc:rights>Creative Commons - Attribution 4.0 International (CC BY 4.0)</dc:rights>
<dc:rights>https://creativecommons.org/licenses/by/4.0/</dc:rights>
<dc:rights>PUB</dc:rights>
<dc:format>text/plain; charset=utf-8</dc:format>
<dc:format>application/zip</dc:format>
<dc:format>downloadable_files_count: 1</dc:format>
<dc:publisher>Faculty of Computer and Information Science, University of Ljubljana</dc:publisher>
<dc:publisher>VITASIS, d.o.o.</dc:publisher>
<dc:source>https://www.cjvt.si/povejmo/en/project/</dc:source>
</oai_dc:dc>
</metadata></record></GetRecord></OAI-PMH>