Upload text predictions

How to upload predictions on text data in a model run and sample upload formats.

You can use the Python SDK to upload predictions on text data.

This page shows how to declare the predictions and demonstrates the upload process.

A Python notebook demonstrates these steps and can be run directly with Google CoLab.

Supported prediction types

To upload predictions in Labelbox, you need to create a predictions payload. This section shows how to declare payloads for each supported prediction type. You can declare payloads using Python annotation types (preferred) or as NDJSON objects.

Confidence scores are optional. If you do not include confidence scores in your prediction payloads, the prediction is treated as if it had a confidence value of one (1).

Entity

named_entity = lb_types.TextEntity(start=10, end=20)
entities_prediction = lb_types.ObjectAnnotation(value=named_entity, name = "named_entity", confidence=0.5)
entities_prediction_ndjson = { 
    "name": "named_entity",
    "confidence": 0.5, 
    "location": { 
        "start": 10, 
        "end": 20 
    }
}

Classification: radio (single choice)

radio_prediction = lb_types.ClassificationAnnotation(
    name="radio_question",
    value=lb_types.Radio(answer =
        lb_types.ClassificationAnswer(name = "first_radio_answer", confidence=0.5)
    )
)
radio_prediction_ndjson = {
  "name": "radio_question",
  "answer": {"name": "first_radio_answer", "confidence": 0.5}
} 

Classification: radio nested

nested_radio_prediction = lb_types.ClassificationAnnotation(
  name="nested_radio_question",
  value=lb_types.Radio(
    answer=lb_types.ClassificationAnswer(
      name="first_radio_answer",
      confidence=0.5, # Confidence scores should be added to the answer 
      classifications=[
        lb_types.ClassificationAnnotation(
          name="sub_radio_question",
          value=lb_types.Radio(
            answer=lb_types.ClassificationAnswer(
              name="first_sub_radio_answer",
              confidence=0.5 
            )
          )
        )
      ]
    )
  )
)
nested_radio_prediction_ndjson= {
  "name": "nested_radio_question",
  "answer": {
      "name": "first_radio_answer",
      "confidence": 0.5, # Confidence scores should be added to the answer 
      "classifications": [{
          "name":"sub_radio_question",
          "answer": { "name" : "first_sub_radio_answer", 
                     "confidence": 0.5}
        }]
    }
}

Classification: checklist nested

nested_checklist_prediction = lb_types.ClassificationAnnotation(
  name="nested_checklist_question",
  value=lb_types.Checklist(
    answer=[lb_types.ClassificationAnswer(
      name="first_checklist_answer",
      confidence=0.5, # Confidence scores should be added to the answer 
      classifications=[
        lb_types.ClassificationAnnotation(
          name="sub_checklist_question",
          value=lb_types.Checklist(
            answer=[lb_types.ClassificationAnswer(
            name="first_sub_checklist_answer",
            confidence=0.5,
          )]
        ))
      ]
    )]
  )
)
nested_checklist_prediction_ndjson = {
  "name": "nested_checklist_question",
  "answer": [{
      "name": "first_checklist_answer", 
      "confidence": 0.5, # Confidence scores should be added to the answer 
      "classifications" : [
        {
          "name": "sub_checklist_question", 
          "answer": {"name": "first_sub_checklist_answer", 
                     "confidence": 0.5}
        }          
      ]         
  }]
}

Classification: checklist (multiple choice)

checklist_prediction = lb_types.ClassificationAnnotation(
    name="checklist_question",
    value=lb_types.Checklist(
        answer = [
            lb_types.ClassificationAnswer(
                name = "first_checklist_answer",
                confidence=0.5
            ),
            lb_types.ClassificationAnswer(
                name = "second_checklist_answer", 
                confidence=0.5
            ),
            lb_types.ClassificationAnswer(
                name = "third_checklist_answer", 
                confidence=0.5
            )
    ])
  )
checklist_prediction_ndjson = {
  "name": "checklist_question",
  "answer": [
    {"name": "first_checklist_answer", "confidence": 0.5}
  ]
}

Classification: free-form text

text_prediction = lb_types.ClassificationAnnotation(
    name = "free_text", 
    value = lb_types.Text(answer="sample text", confidence=0.5)
)
text_prediction_ndjson = {
  "name": "free_text",
  "answer": "sample text",
  "confidence": 0.5
}

Example: Upload predictions to model run

To upload predictions to a model run:

Before you start

These examples require the following libraries:

import labelbox as lb
import labelbox.data.annotation_types as lb_types
import labelbox.data.serialization as lb_serializers
import uuid

Replace API key

Paste your API key as the value of the API_KEY variable.

API_KEY = ""
client = lb.Client(API_KEY)

Step 1: Import data rows into Catalog

global_key = "lorem-ipsum.txt"
test_img_url = {
    "row_data": "https://storage.googleapis.com/labelbox-sample-datasets/nlp/lorem-ipsum.txt",
    "global_key": global_key
}
dataset = client.create_dataset(
    name="text prediction demo dataset",
    iam_integration=None # Removing this argument will default to the organziation's default iam integration
    )
task = dataset.create_data_rows([test_img_url])
task.wait_till_done()
print("Errors:",task.errors)
print("Failed data rows:", task.failed_data_rows)

Step 2: Set up ontology for predictions

Your model run ontology should support all tools and classifications required by your predictions.

this example shows how to create an ontology containing all supported prediction types.

## Set up the ontology and link the tools created above.

ontology_builder = lb.OntologyBuilder(
  classifications=[ # List of Classification objects
    lb.Classification(
      class_type=lb.Classification.Type.RADIO,
      name="radio_question", 
      options=[lb.Option(value="first_radio_answer")]
    ),
    lb.Classification(
      class_type=lb.Classification.Type.RADIO,
      name="nested_radio_question", 
      options=[
        lb.Option(value="first_radio_answer",
          options=[
              lb.Classification(
                class_type=lb.Classification.Type.RADIO,
                name="sub_radio_question",
                options=[
                  lb.Option(value="first_sub_radio_answer")
                ]
            ),
          ]
        )
      ],
    ),
    lb.Classification(
      class_type=lb.Classification.Type.CHECKLIST,
      name="checklist_question", 
      options=[
        lb.Option(value="first_checklist_answer"),
        lb.Option(value="second_checklist_answer"),
        lb.Option(value="third_checklist_answer")
      ]
    ), 
     lb.Classification(
      class_type=lb.Classification.Type.TEXT,
      name="free_text"
    ),
    lb.Classification(
      class_type=lb.Classification.Type.CHECKLIST,
      name="nested_checklist_question",
      options=[
          lb.Option("first_checklist_answer",
            options=[
              lb.Classification(
                  class_type=lb.Classification.Type.CHECKLIST,
                  name="sub_checklist_question", 
                  options=[lb.Option("first_sub_checklist_answer")]
              )
          ]
        )
      ]
    )
  ],
  tools=[ # List of Tool objects
         lb.Tool(tool=lb.Tool.Type.NER,
              name="named_entity")
    ]
)

ontology = client.create_ontology("Ontology Text Predictions", ontology_builder.asdict() , media_type=lb.MediaType.Text)

Step 3: Create model and model run

model = client.create_model(name="text_model_run_"+ str(uuid.uuid4()), 
                            ontology_id=ontology.uid)
model_run = model.create_model_run("iteration 1")

Step 4: Send data rows to model run

model_run.upsert_data_rows(global_keys=[global_key])

Step 5: Create prediction payloads

See supported prediction types for help creating prediction payloads. You can declare predictions as Python annotation types (preferred) or NDJSON objects. These examples show each type and describe how to compose predictions into labels attached to the data rows.

The resulting label_ndjson_predictions and label_predictions payloads should have exactly the same prediction content (except for the uuid string values).

label_predictions = []
label_predictions.append(
  lb_types.Label(
    data=lb_types.TextData(global_key=global_key),
    annotations = [
      entities_prediction, 
      nested_radio_prediction,
      radio_prediction, 
      checklist_prediction,
      nested_checklist_prediction,
      text_prediction,
    ]
  )
)
label_ndjson_predictions= []
for annot in [
    entities_prediction_ndjson, 
    radio_prediction_ndjson, 
    checklist_prediction_ndjson,
    text_prediction_ndjson, 
    nested_radio_prediction_ndjson,
    nested_checklist_prediction_ndjson
  ]:
  annot.update({
      "dataRow": {"globalKey": global_key}
  })
  label_ndjson_predictions.append(annot)

Step 6: Upload payload to model run

# Upload the prediction label to the Model Run
upload_job_prediction = model_run.add_predictions(
    name="prediction_upload_job"+str(uuid.uuid4()),
    predictions=label_predictions)

# Errors will appear for annotation uploads that failed.
print("Errors:", upload_job_prediction.errors)
print("Status of uploads: ", upload_job_prediction.statuses)

Step 7: Send annotations to model run

This step is optional.

# 7.1. Create a labelbox project
project = client.create_project(name="Text Prediction Import Demo",
                                    media_type=lb.MediaType.Text)
project.setup_editor(ontology)

# 7.2. Create a batch to send to the project
project.create_batch(
  "batch_text_prediction_demo", # Each batch in a project must have a unique name
  global_keys=[global_key], # Paginated collection of data row objects, list of data row ids or global keys
  priority=5 # priority between 1(Highest) - 5(lowest)
)

# 7.3 Create your annotation payload as explained in:
# https://docs.labelbox.com/reference/import-text-annotations#supported-annotations
entities_annotation ... 
nested_radio_annotation ...
radio_annotation ...
checklist_annotation ...
nested_checklist_annotation ...
text_annotation ...

# 7.4. Create the label object 
label = []
label.append(
  lb_types.Label(
    data=lb_types.TextData(global_key=global_key),
    annotations = [
      entities_annotation, 
      nested_radio_annotation,
      radio_annotation, 
      checklist_annotation,
      nested_checklist_annotation,
      text_annotation,
    ]
  )
)

# 7.5 Upload annotations to the project using Label Import 
upload_job_annotation = lb.LabelImport.create_from_objects(
    client = client,
    project_id = project.uid,
    name="text_label_import_job"+ str(uuid.uuid4()),
    labels=label)

upload_job_annotation.wait_until_done()
# Errors will appear for annotation uploads that failed.
print("Errors:", upload_job_annotation.errors)
print("Status of uploads: ", upload_job_annotation.statuses)


# 7.6 Send the annotations to the model run

# get the labels id from the project
model_run.upsert_labels(project_id=project.uid)