{"value":"\n<cmd:CMD xmlns:cmd=\"http://www.clarin.eu/cmd/\" xmlns:lindat=\"http://lindat.mff.cuni.cz/ns/experimental/cmdi\" xmlns:olac=\"http://www.clarin.eu/cmd/\" xmlns:ms=\"http://www.clarin.eu/cmd/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" CMDVersion=\"1.1\" xsi:schemaLocation=\"http://www.clarin.eu/cmd/ http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1403526079380/xsd\">  \n  <cmd:Header> \n    <cmd:MdCreationDate>2023-04-14</cmd:MdCreationDate>  \n    <cmd:MdSelfLink>https://hdl.handle.net/11356/1796@format=cmdi</cmd:MdSelfLink>  \n    <cmd:MdProfile>clarin.eu:cr1:p_1403526079380</cmd:MdProfile>  \n    <cmd:MdCollectionDisplayName>CLARIN.SI data &amp; tools</cmd:MdCollectionDisplayName> \n  </cmd:Header>  \n  <cmd:Resources> \n    <cmd:ResourceProxyList> \n      <cmd:ResourceProxy id=\"lp_2330\"> \n        <cmd:ResourceType>LandingPage</cmd:ResourceType>  \n        <cmd:ResourceRef>https://hdl.handle.net/11356/1796</cmd:ResourceRef> \n      </cmd:ResourceProxy>  \n      <cmd:ResourceProxy id=\"_5079\"> \n        <cmd:ResourceType mimetype=\"application/zip\">Resource</cmd:ResourceType>  \n        <cmd:ResourceRef lindat:md5_checksum=\"1f4b18ae186532369ba43e40a80eb6bf\">https://www.clarin.si/repository/xmlui/bitstream/handle/11356/1796/all-token-bg-prelim.ft.sg.vec.zip?sequence=1</cmd:ResourceRef> \n      </cmd:ResourceProxy>  \n      <cmd:ResourceProxy id=\"_5083\"> \n        <cmd:ResourceType mimetype=\"application/zip\">Resource</cmd:ResourceType>  \n        <cmd:ResourceRef lindat:md5_checksum=\"5a93e54d5276e9141e8467f7c36d2cfc\">https://www.clarin.si/repository/xmlui/bitstream/handle/11356/1796/all-token-bg-prelim.ft.sg.bin.zip?sequence=2</cmd:ResourceRef> \n      </cmd:ResourceProxy> \n    </cmd:ResourceProxyList>  \n    <cmd:JournalFileProxyList/>  \n    <cmd:ResourceRelationList/> \n  </cmd:Resources>  \n  <cmd:Components> \n    <cmd:LINDAT_CLARIN> \n      <cmd:bibliographicInfo> \n        <cmd:titles> \n          <cmd:title xml:lang=\"en\">Word embeddings CLARIN.SI-embed.bg 1.0</cmd:title> \n        </cmd:titles>  \n        <cmd:authors> \n          <author xmlns=\"http://www.clarin.eu/cmd/\">  \n            <lastName>Terčon</lastName>  \n            <firstName>Luka</firstName> \n          </author>  \n          <author xmlns=\"http://www.clarin.eu/cmd/\">  \n            <lastName>Ljubešić</lastName>  \n            <firstName>Nikola</firstName> \n          </author> \n        </cmd:authors>  \n        <cmd:dates> \n          <cmd:dateIssued>2023-04-11</cmd:dateIssued> \n        </cmd:dates>  \n        <cmd:identifiers> \n          <cmd:identifier type=\"Handle\">https://hdl.handle.net/11356/1796</cmd:identifier> \n        </cmd:identifiers>  \n        <cmd:funds> \n          <funding xmlns=\"http://www.clarin.eu/cmd/\">  \n            <organization>Jožef Stefan Institute</organization>  \n            <code>CLARIN</code>  \n            <projectName>CLARIN.SI</projectName>  \n            <fundsType>nationalFunds</fundsType> \n          </funding>  \n          <funding xmlns=\"http://www.clarin.eu/cmd/\">  \n            <organization>ARRS (Slovenian Research Agency)</organization>  \n            <code>J7-4642</code>  \n            <projectName>MEZZANINE</projectName>  \n            <fundsType>nationalFunds</fundsType> \n          </funding>  \n          <funding xmlns=\"http://www.clarin.eu/cmd/\">  \n            <organization>Connecting Europe Facility (CEF) Telecom</organization>  \n            <code>INEA/CEF/ICT/A2020/2278341</code>  \n            <projectName>MaCoCu - Massive collection and curation of monolingual and bilingual data: focus on under-resourced languages</projectName>  \n            <fundsType>Other</fundsType> \n          </funding>  \n          <funding xmlns=\"http://www.clarin.eu/cmd/\">  \n            <organization>ARRS (Slovenian Research Agency)</organization>  \n            <code>P6-0411</code>  \n            <projectName>Language Resources and Technologies for Slovene</projectName>  \n            <fundsType>nationalFunds</fundsType> \n          </funding> \n        </cmd:funds>  \n        <contactPerson xmlns=\"http://www.clarin.eu/cmd/\">  \n          <firstName>Luka</firstName>  \n          <lastName>Terčon</lastName>  \n          <email>luka.tercon@gmail.com</email>  \n          <affiliation>Faculty of Computer and Information Science, University of Ljubljana</affiliation> \n        </contactPerson>  \n        <cmd:publishers> \n          <cmd:publisher>Jožef Stefan Institute</cmd:publisher> \n        </cmd:publishers> \n      </cmd:bibliographicInfo>  \n      <cmd:dataInfo> \n        <cmd:type>lexicalConceptualResource</cmd:type>  \n        <cmd:detailedType>computationalLexicon</cmd:detailedType>  \n        <cmd:description>CLARIN.SI-embed.bg contains word embeddings for Bulgarian induced from the MaCoCu-bg web crawl corpus (http://hdl.handle.net/11356/1515). The embeddings are based on the skip-gram model of fastText trained on 4,120,343,820 tokens of running text for 2,746,640 lowercased surface forms.</cmd:description>  \n        <cmd:languages> \n          <cmd:language> \n            <cmd:code>bul</cmd:code>  \n            <cmd:name>Bulgarian</cmd:name> \n          </cmd:language> \n        </cmd:languages>  \n        <cmd:keywords> \n          <cmd:keyword>word embeddings</cmd:keyword> \n        </cmd:keywords>  \n        <cmd:sizeInfo> \n          <size xmlns=\"http://www.clarin.eu/cmd/\">  \n            <size>2746640</size>  \n            <unit>entries</unit> \n          </size> \n        </cmd:sizeInfo> \n      </cmd:dataInfo>  \n      <cmd:licenseInfo> \n        <cmd:license> \n          <cmd:uri>https://creativecommons.org/licenses/by-sa/4.0/</cmd:uri> \n        </cmd:license> \n      </cmd:licenseInfo> \n    </cmd:LINDAT_CLARIN> \n  </cmd:Components> \n</cmd:CMD>"}