<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href='static/style.xsl' type='text/xsl'?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2026-05-22T00:02:43Z</responseDate><request verb="GetRecord" identifier="oai:www.clarin.si:11356/1719" metadataPrefix="oai_dc">http://www.clarin.si/repository/oai/request</request><GetRecord><record><header><identifier>oai:www.clarin.si:11356/1719</identifier><datestamp>2023-03-27T17:01:16Z</datestamp><setSpec>hdl_11356_1023</setSpec><setSpec>hdl_11356_1024</setSpec></header><metadata><oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:doc="http://www.lyncode.com/xoai" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
<dc:title>Frequency list of textbook vocabulary by level of education in elementary and secondary schools</dc:title>
<dc:creator>Kosem, Iztok</dc:creator>
<dc:creator>Pori, Eva</dc:creator>
<dc:creator>Arhar Holdt, Špela</dc:creator>
<dc:subject>textbook corpus</dc:subject>
<dc:subject>vocabulary</dc:subject>
<dc:subject>diachronic</dc:subject>
<dc:subject>school</dc:subject>
<dc:subject>language didactics</dc:subject>
<dc:description>The dataset contains a list of 11906 words (lemmas with part of speech information) and their frequency of occurrence in a corpus of Slovenian textobooks, covering elementary school (Grade 1 to 9) and secondary school (Year 1 to 4). The corpus contains 4,302,857 words (5,373,268 tokens), and consists of 127 textbooks from 16 different subjects. The distribution per school level is as follows:&#xd;
- Grade 1: 17949 tokens&#xd;
- Grade 2: 46317 tokens&#xd;
- Grade 3: 84222 tokens&#xd;
- Grade 4: 305454 tokens&#xd;
- Grade 5: 357400 tokens&#xd;
- Grade 6: 351463 tokens&#xd;
- Grade 7: 537359 tokens&#xd;
- Grade 8: 592068 tokens&#xd;
- Grade 9: 765574 tokens&#xd;
- Year 1: 665093 tokens&#xd;
- Year 2: 200267 tokens&#xd;
- Year 3: 149442 tokens&#xd;
- Year 4: 23406 tokens&#xd;
- Year 1-4: 206843 tokens (these are textbooks that are used in all the years of secondary school and were not divided according to different years)&#xd;
&#xd;
The purpose of the dataset is to facilitate research into vocabularly use at different levels of education, and to enable comparative studies of student language reception and production in Slovene.</dc:description>
<dc:date>2023-02-28</dc:date>
<dc:type>lexicalConceptualResource</dc:type>
<dc:identifier>http://hdl.handle.net/11356/1719</dc:identifier>
<dc:language>slv</dc:language>
<dc:rights>Creative Commons - Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0)</dc:rights>
<dc:rights>https://creativecommons.org/licenses/by-nc-sa/4.0/</dc:rights>
<dc:rights>PUB</dc:rights>
<dc:format>text/plain; charset=utf-8</dc:format>
<dc:format>text/plain</dc:format>
<dc:format>text/plain</dc:format>
<dc:format>downloadable_files_count: 2</dc:format>
<dc:publisher>Centre for Language Resources and Technologies, University of Ljubljana</dc:publisher>
<dc:source>https://www.cjvt.si/prop/en/</dc:source>
</oai_dc:dc>
</metadata></record></GetRecord></OAI-PMH>