Skip to content

Commit f349357

Browse files
committed
Add flops information (round 4)
1 parent 0169202 commit f349357

19 files changed

Lines changed: 46 additions & 22 deletions

33x-coder.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# - Contributor type: Non-academic (Research community)
1515

1616
# Training compute:
17-
# - Base model training compute: ~4.08e+23 FLOP +- 0.5 OoM (Epoch AI)
17+
# - Base model training compute: ~4.08e+23 FLOP +- 0.5 OoM (param count & dataset size) [Epoch AI]
1818
# - End model training compute: unknown (likely negligible)
1919

2020
system:

Arabic StableLM.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# - Contributor type: Non-academic (Company)
1515

1616
# Training compute:
17-
# - Base model training compute: ~1.92e+22 FLOP +- 0.5 OoM (Epoch AI)
17+
# - Base model training compute: ~1.92e+22 FLOP +- 0.5 OoM (param count & dataset site) [Epoch AI]
1818
# - End model training compute: unknown (likely negligible)
1919

2020
system:

BELLE.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# - Contributor type: Non-academic (Company)
1515

1616
# Training compute:
17-
# - Base model training compute: ~1.6e+23 FLOP +- 0.5 OoM (Epoch AI)
17+
# - Base model training compute: ~1.6e+23 FLOP +- 0.5 OoM (param count & dataset size) [Epoch AI]
1818
# - End model training compute: unknown (likely negligible)
1919

2020
system:

CT-LLM.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@
1313
# - National origin: China
1414
# - Contributor type: Academic (Research community)
1515

16+
# Training compute:
17+
# - Base model training compute: >= 1.81e+22 FLOP +- 0.5 OoM (param count & dataset size)
18+
# - End model training compute: unknown (likely negligible)
19+
1620
system:
1721
name: CT-LLM
1822
link: https://huggingface.co/m-a-p/CT-LLM-SFT-DPO

ChatMusician.yaml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,15 @@
1414
# - Contributor type: Academic (Research community)
1515

1616
# Training compute:
17-
# - Base model training compute: ~8.4e+22 FLOP +- 0.5 OoM (Epoch AI)
17+
# - Base model training compute: ~8.4e+22 FLOP +- 0.5 OoM (param count & dataset size) [Epoch AI]
1818
# - End model training compute: unknown (likely negligible)
1919

2020
system:
2121
name: ChatMusician
2222
link: https://huggingface.co/m-a-p/ChatMusician
2323
type: audio
2424
performanceclass: limited
25-
basemodelname: ChatMusician-Base
25+
basemodelname: Llama-2-7B
2626
endmodelname: ChatMusician
2727
endmodellicense: MIT License
2828
releasedate: 2024-04
@@ -35,19 +35,19 @@ org:
3535

3636
# availability:
3737
datasources_basemodel:
38-
class: partial
39-
link: https://huggingface.co/datasets/m-a-p/MusicPile
40-
notes:
38+
class: closed
39+
link:
40+
notes: No dataset information found.
4141

4242
datasources_endmodel:
4343
class: partial
4444
link: https://huggingface.co/datasets/m-a-p/MusicPile
4545
notes:
4646

4747
weights_basemodel:
48-
class: open
49-
link: https://huggingface.co/m-a-p/ChatMusician
50-
notes:
48+
class: partial
49+
link: https://huggingface.co/meta-llama/Llama-2-7b
50+
notes: Gated model on HuggingFace.
5151

5252
weights_endmodel:
5353
class: open

Claire.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# - Contributor type: Academic (Research community)
1515

1616
# Training compute:
17-
# - Base model training compute: ~6.3e+22 FLOP +- 0.5 OoM (Epoch AI)
17+
# - Base model training compute: ~6.3e+22 FLOP +- 0.5 OoM (both param count & dataset site, and hardware figures) [Epoch AI]
1818
# - End model training compute: unknown (likely negligible)
1919

2020
system:

CodeGeeX.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# - Contributor type: Non-academic (Chinese AI Tiger)
1515

1616
# Training compute:
17-
# - Base model training compute: ~6.63e+22 FLOP +- 0.5 OoM (Epoch AI)
17+
# - Base model training compute: ~8.1e+23 FLOP +- 1.0 OoM (param count & dataset size) [Epoch AI]
1818
# - End model training compute: unknown (likely negligible)
1919

2020
system:

CodeGemma.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# - Contributor type: Non-academic (American Big Tech)
1515

1616
# Training compute:
17-
# - Base model training compute: ~3.07e+23 FLOP +- 0.5 OoM (Epoch AI)
17+
# - Base model training compute: ~3.07e+23 FLOP +- 0.5 OoM (param count & dataset size) [Epoch AI]
1818
# - End model training compute: unknown (likely negligible)
1919

2020
system:

CodeLlama.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
# - Contributor type: Non-academic (American Big Tech)
1515

1616
# Training compute:
17-
# - Base model training compute: ~8.1e+23 FLOP +- 0.5 OoM (Epoch AI)
18-
# - End model training compute: ~4.2e+23 FLOP +- 0.3 OoM (param count & dataset size)
17+
# - Base model training compute: ~8.1e+23 FLOP +- 0.5 OoM (geometric mean of param count & dataset site, and hardware figures) [Epoch AI]
18+
# - End model training compute: ~4.2e+23 FLOP +- 0.5 OoM (param count & dataset size)
1919

2020
system:
2121
name: CodeLlama

CodeUp.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# - Contributor type: Academic (University)
1515

1616
# Training compute:
17-
# - Base model training compute: ~7.2e+23 FLOP +- 0.5 OoM (Epoch AI)
17+
# - Base model training compute: ~7.2e+23 FLOP +- 0.5 OoM (hardware figures) [Epoch AI]
1818
# - End model training compute: unknown (likely negligible)
1919

2020
system:

0 commit comments

Comments
 (0)