crumb_clean_instruct_440k.jsonl
databricks_dolly15k.jsonl
huggingface_ultrachat200k.jsonl
openorca_4m.jsonl
squad-train-v2.jsonl
wizardlm_evol_instruct_70k.jsonl