<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href='static/style.xsl' type='text/xsl'?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2026-05-21T22:42:42Z</responseDate><request verb="GetRecord" identifier="oai:www.clarin.si:11356/1987" metadataPrefix="oai_dc">http://www.clarin.si/repository/oai/request</request><GetRecord><record><header><identifier>oai:www.clarin.si:11356/1987</identifier><datestamp>2024-11-13T11:22:31Z</datestamp><setSpec>hdl_11356_1023</setSpec><setSpec>hdl_11356_1024</setSpec></header><metadata><oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:doc="http://www.lyncode.com/xoai" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
<dc:title>News sentiment analysis datasets for Serbian, Bosnian, Macedonian, Albanian and Estonian SADEmma 1.0</dc:title>
<dc:creator>Ivačič, Nikola</dc:creator>
<dc:creator>Pelicon, Andraž</dc:creator>
<dc:creator>Koloski, Boshko</dc:creator>
<dc:creator>Pollak, Senja</dc:creator>
<dc:creator>Purver, Matthew</dc:creator>
<dc:subject>closely related languages</dc:subject>
<dc:subject>sentiment analysis</dc:subject>
<dc:subject>sentiment classification</dc:subject>
<dc:description>We provide annotated datasets on a three-point sentiment scale (positive, neutral and negative) for Serbian, Bosnian, Macedonian, Albanian, and Estonian. For all languages except Estonian, we include pairs of source URL (where corresponding text can be found) and sentiment label.&#xd;
&#xd;
For Estonian, we randomly sampled 100 articles from "Ekspress news article archive (in Estonian and Russian) 1.0" (http://hdl.handle.net/11356/1408).&#xd;
&#xd;
The data is organized in Tab-Separated Values (TSV) format. For Serbian, Bosnian, Macedonian, and Albanian, the dataset contains two columns: sourceURL and sentiment. For Estonian, the dataset consists of three columns: text ID (from the CLARIN.SI reference above), body text, and sentiment label.</dc:description>
<dc:date>2024-11-13</dc:date>
<dc:type>corpus</dc:type>
<dc:identifier>http://hdl.handle.net/11356/1987</dc:identifier>
<dc:language>bos</dc:language>
<dc:language>srp</dc:language>
<dc:language>mkd</dc:language>
<dc:language>sqi</dc:language>
<dc:language>est</dc:language>
<dc:rights>Creative Commons - Attribution-ShareAlike 4.0 International (CC BY-SA 4.0)</dc:rights>
<dc:rights>https://creativecommons.org/licenses/by-sa/4.0/</dc:rights>
<dc:rights>PUB</dc:rights>
<dc:format>text/plain; charset=utf-8</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>application/octet-stream</dc:format>
<dc:format>downloadable_files_count: 5</dc:format>
<dc:publisher>Jožef Stefan Institute</dc:publisher>
<dc:source>https://emma.ijs.si/</dc:source>
</oai_dc:dc>
</metadata></record></GetRecord></OAI-PMH>