Pipestat API Quickstart Guide

This example is for quickly reporting results to a results.yaml filebackend.

from pipestat import PipestatManager

#File Backend requires a results.yaml file
result_file = "../tests/data/results_docs_example.yaml"

#Every pipestat manager requires an output schema to know the format of results
schema_file = "../tests/data/sample_output_schema.yaml"

# With these two files, we can initialize a PipestatManager object and begin reporting results

psm = PipestatManager(results_file_path=result_file, schema_path=schema_file)

Initialize FileBackend

# Let's look at our output schema. Notice that the schema is only for reporting sample-level results
print(psm.schema)

ParsedSchema (default_pipeline_name)
 Project-level properties:
 - None
 Sample-level properties:
 - number_of_things : {'type': 'integer', 'description': 'Number of things'}
 - percentage_of_things : {'type': 'number', 'description': 'Percentage of things'}
 - name_of_something : {'type': 'string', 'description': 'Name of something'}
 - switch_value : {'type': 'boolean', 'description': 'Is the switch on or off'}
 - output_file : {'description': 'This a path to the output file', 'type': 'object', 'object_type': 'file', 'properties': {'path': {'type': 'string'}, 'title': {'type': 'string'}}, 'required': ['path', 'title']}
 - output_image : {'description': 'This a path to the output image', 'type': 'object', 'object_type': 'image', 'properties': {'path': {'type': 'string'}, 'thumbnail_path': {'type': 'string'}, 'title': {'type': 'string'}}, 'required': ['path', 'thumbnail_path', 'title']}
 - md5sum : {'type': 'string', 'description': 'MD5SUM of an object', 'highlight': True}
 Status properties:
 - None

# Let's report a result. The result_identifier (e.g. percentage_of_things) must be in the output schema.
# When reporting a result, a record_identifier must be provided either at the time of reporting 
# or upon PipestatManager creation.

psm.report(record_identifier="my_sample_name_1", values={"percentage_of_things": 100})

["Reported records for 'my_sample_name_1' in 'default_pipeline_name' :\n - percentage_of_things: 100"]

# Pipestat reports the result as well as a created time and a modified time.
# We can overwrite the modified time by reporting a new result. This is because force_overwrite defaults to True
psm.report(record_identifier="my_sample_name_1", values={"percentage_of_things": 50})

These results exist for 'my_sample_name_1': percentage_of_things
Overwriting existing results: percentage_of_things





["Reported records for 'my_sample_name_1' in 'default_pipeline_name' :\n - percentage_of_things: 50"]

# If you set the flag to false and attempt to report results for a result that already exists...
psm.report(record_identifier="my_sample_name_1", values={"percentage_of_things": 50}, force_overwrite=False)

These results exist for 'my_sample_name_1': percentage_of_things





False

# Let's look at the reported data
# Note that history recording is turned on by default and lives under meta -> history keys
psm.data

default_pipeline_name:
  project: {}
  sample:
    my_sample_name_1:
      meta:
        pipestat_modified_time: '2024-04-18 14:17:08'
        pipestat_created_time: '2024-04-18 14:17:07'
        history:
          percentage_of_things:
            '2024-04-18 14:17:08': 100
      percentage_of_things: 50

# You can also retrieve a result:
result = psm.retrieve_one(record_identifier="my_sample_name_1")
print(result)

{'percentage_of_things': 50, 'record_identifier': 'my_sample_name_1'}

# Similarly you can retrieve historical results as well
result = psm.retrieve_history(record_identifier="my_sample_name_1")
print(result)

{'percentage_of_things': {'2024-04-18 14:17:08': 100}}