dc.contributor.author | Borovič, Mladen |
dc.contributor.author | Žagar, Kristjan |
dc.contributor.author | Ferme, Marko |
dc.contributor.author | Majninger, Sandi |
dc.contributor.author | Ojsteršek, Milan |
dc.date.accessioned | 2023-03-01T12:41:28Z |
dc.date.available | 2023-03-01T12:41:28Z |
dc.date.issued | 2022-09-22 |
dc.identifier.uri | http://hdl.handle.net/11356/1778 |
dc.description | 6 different fine-tuned Transformer-based models that solve the downstream task of extractive question answering in the Slovenian language. The fine-tuned models included are: bert-base-cased-squad2-SLO, bert-base-multilingual-cased-squad2-SLO, electra-base-squad2-SLO, roberta-base-squad2-SLO, sloberta-squad2-SLO and xlm-roberta-base-squad2-SLO. The models were trained and evaluated using the Slovene translation of the SQuAD2.0 dataset (https://www.clarin.si/repository/xmlui/handle/11356/1756). The models achieve these metric values: sloberta-squad2-SLO: EM=67.1, F1=73.56 xlm-roberta-base-squad2-SLO: EM=62.52, F1=69.51 bert-base-multilingual-cased-squad2-SLO: EM=61.37, F1=68.1 roberta-base-squad2-SLO: EM=58.23, F1=64.62 bert-base-cased-squad2-SLO: EM=55.12, F1=60.52 electra-base-squad2-SLO: EM=53.69, F1=60.85 |
dc.language.iso | slv |
dc.publisher | Faculty of Electrical Engineering and Computer Science, University of Maribor |
dc.rights | Creative Commons - Attribution 4.0 International (CC BY 4.0) |
dc.rights.uri | https://creativecommons.org/licenses/by/4.0/ |
dc.rights.label | PUB |
dc.source.uri | https://rsdo.slovenscina.eu/en/semantic-resources-and-technologies |
dc.subject | question answering |
dc.subject | slovenian language |
dc.subject | SQuAD |
dc.subject | extractive question answering |
dc.subject | language model |
dc.title | Fine-tuned models for extractive question answering in the Slovenian language |
dc.type | toolService |
metashare.ResourceInfo#ContentInfo.detailedType | tool |
metashare.ResourceInfo#ResourceComponentType#ToolServiceInfo.languageDependent | true |
has.files | yes |
branding | CLARIN.SI data & tools |
demo.uri | https://slovenscina.eu/odgovarjanje-na-vprasanja |
contact.person | Mladen Borovič mladen.borovic@um.si UM FERI |
sponsor | Ministry of Culture C3340-20-278001 Development of Slovene in a Digital Environment Other |
files.count | 6 |
files.size | 3199258696 |
Datoteke v tem vnosu
To je vnos
Creative Commons - Attribution 4.0 International (CC BY 4.0)
Publicly Available
z licenco:Creative Commons - Attribution 4.0 International (CC BY 4.0)



- Ime
- bert-base-cased-squad2-SLO.zip
- Velikost
- 388.99 MB
- Format
- application/zip
- Opis
- bert-base-cased-squad2-SLO model weights and tokenizer settings
- MD5
- 71899094d6de396594ae2b58aa0caf94
- bert-base-cased-squad2-SLO
- README.md1 kB
- tokenizer_config.json553 B
- all_results.json698 B
- pytorch_model.bin410 MB
- train_results.json197 B
- config.json722 B
- training_args.bin3 kB
- eval_nbest_predictions.json48 MB
- tokenizer.json653 kB
- vocab.txt208 kB
- special_tokens_map.json125 B
- eval_null_odds.json591 kB
- trainer_state.json7 kB
- eval_results.json503 B
- eval_predictions.json506 kB

- Ime
- sloberta-squad2-SLO.zip
- Velikost
- 396.31 MB
- Format
- application/zip
- Opis
- sloberta-squad2-SLO model weights and tokenizer settings
- MD5
- 094683353dc635a22f189ff7995b5750
- sloberta-squad2-SLO
- README.md1 kB
- tokenizer_config.json588 B
- all_results.json692 B
- pytorch_model.bin419 MB
- train_results.json191 B
- config.json742 B
- training_args.bin3 kB
- eval_nbest_predictions.json52 MB
- tokenizer.json2 MB
- special_tokens_map.json353 B
- eval_null_odds.json591 kB
- trainer_state.json586 B
- eval_results.json503 B
- eval_predictions.json529 kB

- Ime
- roberta-base-squad2-SLO.zip
- Velikost
- 445.73 MB
- Format
- application/zip
- Opis
- roberta-base-squad2-SLO model weights and tokenizer settings
- MD5
- c3988fd4ba2d3c85e3f2a94a16d12c14
- roberta-base-squad2-SLO
- README.md1 kB
- tokenizer_config.json1 kB
- all_results.json697 B
- pytorch_model.bin473 MB
- train_results.json197 B
- merges.txt445 kB
- vocab.json779 kB
- config.json782 B
- training_args.bin3 kB
- eval_nbest_predictions.json48 MB
- tokenizer.json2 MB
- special_tokens_map.json957 B
- eval_null_odds.json591 kB
- trainer_state.json6 kB
- eval_results.json502 B
- eval_predictions.json512 kB

- Ime
- electra-base-squad2-SLO.zip
- Velikost
- 393.07 MB
- Format
- application/zip
- Opis
- electra-base-squad2-SLO model weights and tokenizer settings
- MD5
- 283c80c273f7bfb2f5540315eaf387c2
- electra-base-squad2-SLO
- README.md1 kB
- tokenizer_config.json534 B
- all_results.json696 B
- pytorch_model.bin415 MB
- train_results.json196 B
- config.json846 B
- training_args.bin3 kB
- eval_nbest_predictions.json49 MB
- tokenizer.json694 kB
- vocab.txt226 kB
- special_tokens_map.json125 B
- eval_null_odds.json593 kB
- trainer_state.json6 kB
- eval_results.json502 B
- eval_predictions.json549 kB

- Ime
- xlm-roberta-base-squad2-SLO.zip
- Velikost
- 790.68 MB
- Format
- application/zip
- Opis
- xlm-roberta-base-squad2-SLO model weights and tokenizer settings
- MD5
- 5768a0d2aee33029dabf05b81a54db4a
- xlm-roberta-base-squad2-SLO
- README.md1 kB
- tokenizer_config.json451 B
- all_results.json696 B
- pytorch_model.bin1 GB
- train_results.json196 B
- config.json721 B
- training_args.bin3 kB
- eval_nbest_predictions.json50 MB
- tokenizer.json16 MB
- special_tokens_map.json280 B
- eval_null_odds.json592 kB
- trainer_state.json8 kB
- eval_results.json502 B
- eval_predictions.json538 kB

- Ime
- bert-base-multilingual-cased-squad2-SLO.zip
- Velikost
- 636.27 MB
- Format
- application/zip
- Opis
- bert-base-multilingual-cased-squad2-SLO model weights and tokenizer settings
- MD5
- 0d9eea42f6cda732c6a87195fb7a4782
- bert-base-multilingual-cased-squad2-SLO
- README.md1 kB
- tokenizer_config.json360 B
- all_results.json695 B
- pytorch_model.bin676 MB
- train_results.json195 B
- config.json846 B
- training_args.bin3 kB
- eval_nbest_predictions.json49 MB
- tokenizer.json2 MB
- vocab.txt972 kB
- special_tokens_map.json125 B
- eval_null_odds.json591 kB
- trainer_state.json4 kB
- eval_results.json502 B
- eval_predictions.json526 kB