Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Shared config for OpenSubtitles multilingual translation tasks.
# Included by pairs/*.yaml files.
output_type: generate_until
custom_dataset: !function utils.load_opensubtitles_parallel
doc_to_text: !function utils.doc_to_text
doc_to_target: !function utils.doc_to_target
test_split: devtest
target_delimiter: ''
generation_kwargs:
until:
- "\n"
- "<|im_end|>"
- "</s>"
- "<|endoftext|>"
- "<|eot_id|>"
- "<|end_of_text|>"
do_sample: false
temperature: 0.0
max_gen_toks: 128
metric_list:
- metric: bleu
aggregation: bleu
higher_is_better: true
- metric: chrf
aggregation: chrf
higher_is_better: true
metadata:
version: 1
dataset_dir: Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies
split: devtest
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_bg_to_en
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "bg"
tgt_lang: "en"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_cs_to_en
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "cs"
tgt_lang: "en"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_da_to_en
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "da"
tgt_lang: "en"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_de_to_en
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "de"
tgt_lang: "en"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_el_to_en
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "el"
tgt_lang: "en"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_en_to_bg
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "en"
tgt_lang: "bg"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_en_to_cs
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "en"
tgt_lang: "cs"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_en_to_da
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "en"
tgt_lang: "da"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_en_to_de
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "en"
tgt_lang: "de"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_en_to_el
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "en"
tgt_lang: "el"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_en_to_es
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "en"
tgt_lang: "es"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_en_to_et
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "en"
tgt_lang: "et"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_en_to_fi
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "en"
tgt_lang: "fi"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_en_to_fr
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "en"
tgt_lang: "fr"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_en_to_hr
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "en"
tgt_lang: "hr"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_en_to_hu
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "en"
tgt_lang: "hu"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_en_to_it
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "en"
tgt_lang: "it"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_en_to_lt
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "en"
tgt_lang: "lt"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_en_to_lv
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "en"
tgt_lang: "lv"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_en_to_nl
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "en"
tgt_lang: "nl"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_en_to_no
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "en"
tgt_lang: "no"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_en_to_pl
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "en"
tgt_lang: "pl"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_en_to_pt
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "en"
tgt_lang: "pt"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_en_to_ro
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "en"
tgt_lang: "ro"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_en_to_sk
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "en"
tgt_lang: "sk"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_en_to_sl
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "en"
tgt_lang: "sl"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_en_to_sr
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "en"
tgt_lang: "sr"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_en_to_sv
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "en"
tgt_lang: "sv"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_en_to_tr
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "en"
tgt_lang: "tr"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_en_to_uk
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "en"
tgt_lang: "uk"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_es_to_en
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "es"
tgt_lang: "en"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_et_to_en
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "et"
tgt_lang: "en"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_fi_to_en
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "fi"
tgt_lang: "en"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_fr_to_en
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "fr"
tgt_lang: "en"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_hr_to_en
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "hr"
tgt_lang: "en"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_hu_to_en
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "hu"
tgt_lang: "en"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_it_to_en
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "it"
tgt_lang: "en"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_lt_to_en
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "lt"
tgt_lang: "en"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_lv_to_en
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "lv"
tgt_lang: "en"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_nl_to_en
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "nl"
tgt_lang: "en"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
include: ../_opensubtitles_multi40_common.yaml
task: opensubtitles_multi40_no_to_en
metadata:
version: 1
dataset_dir: "Helsinki-NLP/OpenSubtitles2024-40-langs-15-movies"
split: "devtest"
src_lang: "no"
tgt_lang: "en"
Loading
Loading