Skip to content

Examples

Copy a sub-production

This example demonstrates how to copy a sub-production to another.

Source code in examples/copy.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
def main() -> None:
    """
    This example demonstrates how to copy a sub-production to another.
    """

    with (FSDataProvider("./prod_1") as src,  # open the source
          FSDataProvider("./prod_2") as dst):  # open the destination

        # manually copying the jobs and theirs data
        for job in src:  # iterate over the jobs in the source
            out = dst.create(job.job)  # create a new job in the destination
            out.update_info(job.info)

            for data in job:  # iterate over the data in the job

                with (out.create(data.name).writer() as writer,  # create the destination data
                      data.reader() as reader):  # read the data from the source
                    writer.write(reader.read())  # write the data to the destination

        # automatically copying
        src.transfer(dst)  # transfer the source to the destination (also transfers zstd-dict for new format)

        # automatically limited copying
        src.transfer(dst, limit=500)  # transfer 500 jobs from the source to the destination

Dump a sub-production

This example demontrates how to dump / extract jobs from any sub-production to a local directory.

Source code in examples/dump.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def main() -> None:
    """
    This example demontrates how to dump / extract jobs from any sub-production to a local directory.
    """

    with RootDataProvider(url) as src:

        with FSDataProvider("./prod_1") as dst:

            src.transfer(dst)  # dump & extract old ZIP jobs to the local directory

    with SQLDataProvider(SQLDriver.create(path)) as src:

        with FSDataProvider("./prod_2",
                            FSDictProvider("./zstd-dict")) as dst:

            src.transfer(dst)  # dump & extract new Zstandard+SQLite jobs to the local directory
            # tranfered data is stored in ./prod_2 and is still compressed in zstd
            # also transfers zstd-dict automatically  to ./zstd-dict

        with FSDataProvider("./prod_3") as dst:
            # dump & extract new Zstandard+SQLite jobs to the local directory
            # tranfered data is stored in ./prod_3 and is decompressed
            pool = DecompressorPool(src.dict_provider)

            for job in src:
                out = dst.create(job.job)  # create a new job
                out.update_info(job.info)  # update the job metadata

                for data in job:  # iterate over the data
                    pool.add(data, out.create(data.name))  # add the data to the pool

            pool.flush()  # flush the remaining training queues
            pool.stop()  # ask the pool to stop
            pool.join()  # join the pool

            print("jobs rejected:", pool.rejected)  # print rejected jobs and the error (if any)
            print("src size:", src.data_size, src.size)  # print the source size
            print("dst size:", dst.data_size, dst.size)  # print the destination size

Migrate a sub-production

This example demontrates how to do a full migration to the old ZIP format to the new Zstandard+SQLite format of a sub-production.

Source code in examples/migrate.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
def main() -> None:
    """
    This example demontrates how to do a full migration to the old ZIP format to the new Zstandard+SQLite format of a sub-production.
    """

    url = "root://xxxx.xxxx.xxxx//xxxxxxxx"  # the URL targetting a distant input sub-production
    path = "~/xxxxxxxx/xxxxxxxx_xxxx.db"  # the path to the local output SQLite database file

    with RootDataProvider(url) as src:

        with SQLDataProvider(SQLDriver.create(path)) as dst:

            pool = CompressorPool(dst.dict_provider)

            for job in src:
                out = dst.create(job.job)  # create a new job
                out.update_info(job.info)  # update the job metadata

                for data in job:  # iterate over the data
                    pool.add(data, out.create(data.name))  # add the data to the pool

            pool.flush()  # flush the remaining training queues
            pool.stop()  # ask the pool to stop
            pool.join()  # join the pool

            print("jobs rejected:", pool.rejected)  # print rejected jobs and the error (if any)
            print("src size:", src.data_size, src.size)  # print the source size
            print("dst size:", dst.data_size, dst.size)  # print the destination size

Manage a sub-production

This example demonstrates how to create a job and a file and write and read data to it and delete it.

Source code in examples/data.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
def main() -> None:
    """
    This example demonstrates how to create a job and a file and write and read data to it and delete it.
    """

    with MemoryDataProvider("example") as pr:
        job = pr.create(42)  # create a new job with the ID 42

        data = job.create("my-data.txt")  # create a new data with the name "my-data.txt"

        with data.writer() as writer:
            writer.write(b"Hello, World!")  # write the data

        with data.reader() as reader:
            print(reader.read())  # prints b"Hello, World!"

        data.delete()  # delete the data
        job.delete("my-data.txt")  # same as data.delete()

        pr.delete(42)  # delete the job