<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href='static/style.xsl' type='text/xsl'?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2026-06-25T12:16:40Z</responseDate><request verb="GetRecord" identifier="oai:www.clarin.si:11356/1387" metadataPrefix="oai_dc">http://www.clarin.si/repository/oai/request</request><GetRecord><record><header><identifier>oai:www.clarin.si:11356/1387</identifier><datestamp>2022-12-06T15:24:02Z</datestamp><setSpec>hdl_11356_1023</setSpec><setSpec>hdl_11356_1024</setSpec></header><metadata><oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:doc="http://www.lyncode.com/xoai" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
<dc:title>Slovenian RoBERTa contextual embeddings model: SloBERTa 1.0</dc:title>
<dc:creator>Ulčar, Matej</dc:creator>
<dc:creator>Robnik-Šikonja, Marko</dc:creator>
<dc:subject>BERT</dc:subject>
<dc:subject>RoBERTa</dc:subject>
<dc:subject>word embeddings</dc:subject>
<dc:subject>language model</dc:subject>
<dc:subject>contextual embeddings</dc:subject>
<dc:description>The monolingual Slovene RoBERTa (A Robustly Optimized Bidirectional Encoder Representations from Transformers) model is a state-of-the-art model representing words/tokens as contextually dependent word embeddings, used for various NLP tasks. Word embeddings can be extracted for every word occurrence and then used in training a model for an end task, but typically the whole RoBERTa model is fine-tuned end-to-end.&#xd;
&#xd;
SloBERTa model is closely related to French Camembert model https://camembert-model.fr/. The corpora used for training the model have 3.47 billion tokens in total. The subword vocabulary contains 32,000 tokens. The scripts and programs used for data preparation and training the model are available on https://github.com/clarinsi/Slovene-BERT-Tool&#xd;
&#xd;
The released model here is a pytorch neural network model, intended for usage with the transformers library https://github.com/huggingface/transformers.</dc:description>
<dc:date>2020-12-29</dc:date>
<dc:type>toolService</dc:type>
<dc:identifier>http://hdl.handle.net/11356/1387</dc:identifier>
<dc:language>slv</dc:language>
<dc:relation>info:eu-repo/grantAgreement/EC/H2020/825153</dc:relation>
<dc:relation>http://hdl.handle.net/11356/1397</dc:relation>
<dc:rights>The MIT License (MIT)</dc:rights>
<dc:rights>https://opensource.org/licenses/mit-license.php</dc:rights>
<dc:rights>PUB</dc:rights>
<dc:format>text/plain; charset=utf-8</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>text/plain</dc:format>
<dc:format>downloadable_files_count: 4</dc:format>
<dc:publisher>Faculty of Computer and Information Science, University of Ljubljana</dc:publisher>
<dc:source>https://rsdo.slovenscina.eu/en/semantic-resources-and-technologies</dc:source>
</oai_dc:dc>
</metadata></record></GetRecord></OAI-PMH>