#!/usr/bin/env python3

# creating a privat dataset using
# the standard format

from datasets import Dataset, DatasetDict, load_from_disk

# defining data for each split
train_data = [
    {'text': 'First training example', 'label': 0},
    {'text': 'Second training example', 'label': 1}
             ]

test_data = [
    {'text': 'First test example', 'label': 1},
    {'text': 'Second test example', 'label': 0}
            ]

# creating datasets objects for each split
# :: could be stored separatly
train_dataset = Dataset.from_list(train_data)
test_dataset  = Dataset.from_list(test_data)

# DatasetDict combining splits
dataset_dict = DatasetDict(
   {'train': train_dataset, 'test': test_dataset}
                          )
# saving entire DatasetDict
dataset_dict.save_to_disk('./my_dataset')

# loading entire dataset
loaded_dataset = load_from_disk('./my_dataset')
print(loaded_dataset)
