Upload HTML predictions

How to upload predictions on HTML data in a model run and sample upload formats.

You can use the Python SDK to upload predictions on HTML data.

This page shows how to declare different annotation types (as Python dictionaries and NDJSON objects) and demonstrates the upload process.

A Python notebook demonstrates these steps and can be run directly with Google CoLab.

Supported annotations

To import annotations in Labelbox, you need to create the annotations payload. This section shows how to declare the payloads for each supported annotation type.

Classification: Radio (single-choice)

radio_prediction = lb_types.ClassificationAnnotation(
  name="radio_question", 
  value=lb_types.Radio(answer = lb_types.ClassificationAnswer(name = "second_radio_answer", confidence=0.5))
)

radio_prediction_ndjson = {
  'name': 'radio_question',
  'answer': {'name': 'first_radio_answer'}
}

Classification: Nested radio

nested_radio_prediction = lb_types.ClassificationAnnotation(
  name="nested_radio_question",
  value=lb_types.Radio(
    answer=lb_types.ClassificationAnswer(
      name="first_radio_answer",
      confidence=0.5,# Confidence scores should be added to the answer 
      classifications=[
        lb_types.ClassificationAnnotation(
          name="sub_radio_question",
          value=lb_types.Radio(
            answer=lb_types.ClassificationAnswer(
              name="first_sub_radio_answer", confidence=0.5)
          )
        )
      ]
    )
  )
)
nested_radio_prediction_ndjson = {
  "name": "nested_radio_question",
  "answer": {
      "name": "first_radio_answer",
      "confidence": 0.5, # Confidence scores should be added to the answer 
      "classifications": [{
          "name":"sub_radio_question",
          "answer": { "name" : "first_sub_radio_answer", "confidence": 0.5 }
        }]
    }
}

Classification: Nested checklist

nested_checklist_prediction = lb_types.ClassificationAnnotation(
  name="nested_checklist_question",
  value=lb_types.Checklist(
    answer=[lb_types.ClassificationAnswer(
      name="first_checklist_answer",
      confidence=0.5, # Confidence scores should be added to the answer 
      classifications=[
        lb_types.ClassificationAnnotation(
          name="sub_checklist_question",
          value=lb_types.Checklist(
            answer=[lb_types.ClassificationAnswer(
            name="first_sub_checklist_answer",
            confidence=0.5
          )]
        ))
      ]
    )]
  )
)
nested_checklist_prediction_ndjson = {
  "name": "nested_checklist_question",
  "answer": [{
      "name": "first_checklist_answer", 
      "confidence": 0.5, # Confidence scores should be added to the answer 
      "classifications" : [
        {
          "name": "sub_checklist_question", 
          "answer": {"name": "first_sub_checklist_answer", "confidence": 0.5 }
        }          
      ]         
  }]
}

Classification: Checklist (multi-choice)

checklist_prediction = lb_types.ClassificationAnnotation(
    name="checklist_question",
    value=lb_types.Checklist(
        answer = [
            lb_types.ClassificationAnswer(
                name = "first_checklist_answer",
                confidence=0.5
            ),
            lb_types.ClassificationAnswer(
                name = "second_checklist_answer", 
                confidence=0.5
            ),
            lb_types.ClassificationAnswer(
                name = "third_checklist_answer", 
                confidence=0.5
            )
    ])
  )
checklist_prediction_ndjson = {
  'name': 'checklist_question',
  'answer': [
    {'name': 'first_checklist_answer', 'confidence': 0.5}
  ]
}

Classification: Free-form text

text_prediction = lb_types.ClassificationAnnotation(
    name = "free_text", 
    value = lb_types.Text(answer="sample text", confidence=0.5)
)
text_prediction_ndjson = {
  'name': 'free_text',
  'answer': 'sample text',
  'confidence':0.5
}

Example: Upload predictions to model run

To upload predictions to a model run:

Before you start

This examples requires the following libraries:

import labelbox as lb
import labelbox.types as lb_types
import uuid
import numpy as np

Replace API Key

Paste your API key into the variable shown here.

API_KEY = ""
client = lb.Client(API_KEY)

Step 1: Import data rows into Catalog

global_key ="sample_html_2.html"

test_img_url = {
    "row_data": "https://storage.googleapis.com/labelbox-datasets/html_sample_data/sample_html_2.html",
    "global_key": global_key
}
dataset = client.create_dataset(
    name="html prediction demo dataset", 
    iam_integration=None # Removing this argument will default to the organziation's default iam integration
)
task = dataset.create_data_rows([test_img_url])
task.wait_till_done()
print("Errors:",task.errors)
print("Failed data rows:", task.failed_data_rows)

Step 2: Set up ontology

Your project should include an ontology that supports your annotations. To ensure feature schema matches, the tool names and classification names should match the name field in your annotations.

ontology_builder = lb.OntologyBuilder(
  classifications=[ # List of Classification objects
    lb.Classification( 
      class_type=lb.Classification.Type.RADIO, 
      name="radio_question",  # name matching the tool used in the annotation
      options=[lb.Option(value="first_radio_answer")]
    ),
    lb.Classification( 
      class_type=lb.Classification.Type.RADIO, 
      name="nested_radio_question", 
      options=[
        lb.Option(value="first_radio_answer",
          options=[
              lb.Classification(
                class_type=lb.Classification.Type.RADIO,
                name="sub_radio_question",
                options=[
                  lb.Option(value="first_sub_radio_answer")
                ]
            ),
          ]
        )
      ],
    ),
    lb.Classification( 
      class_type=lb.Classification.Type.CHECKLIST, 
      name="checklist_question", 
      options=[
        lb.Option(value="first_checklist_answer"),
        lb.Option(value="second_checklist_answer"), 
        lb.Option(value="third_checklist_answer")            
      ]
    ), 
     lb.Classification( 
      class_type=lb.Classification.Type.TEXT,
      name="free_text"
    ),
    lb.Classification(
      class_type=lb.Classification.Type.CHECKLIST, 
      name="nested_checklist_question",
      options=[
          lb.Option("first_checklist_answer",
            options=[
              lb.Classification(
                  class_type=lb.Classification.Type.CHECKLIST, 
                  name="sub_checklist_question", 
                  options=[lb.Option("first_sub_checklist_answer")]
              )
          ]
        )
      ]
    )
  ]
)

ontology = client.create_ontology("Ontology HTML Predictions", 
                                  ontology_builder.asdict(), 
                                  media_type=lb.MediaType.Html)

     

Step 3: Create model and model run

#create Model
model = client.create_model(name="HTML_model_run_" + str(uuid.uuid4()), 
                            ontology_id=ontology.uid)
#create Model Run
model_run = model.create_model_run("iteration 1")

Step 4: Send data rows to model run

model_run.upsert_data_rows(global_keys=[global_key])

Step 5: Create prediction payload

Use the examples in Supported annotations to create your annotation payloads; you can use declare them as Python dictionaries or NDJSON objects. Examples of each type are shown here; they also show how to compose annotations into labels attached to the data rows.

The resulting label_prediction and label_prediction_ndjson from each approach demonstrates each supported annotation type.


label_prediction = []
label_prediction.append(
  lb_types.Label(
    data=lb_types.HTMLData(global_key=global_key),
    annotations = [
      radio_prediction, 
      checklist_prediction,
      text_prediction,
      nested_checklist_prediction,
      nested_radio_prediction
    ]
  )
)
label_prediction_ndjson = []
for annot in [
    radio_prediction_ndjson, 
    nested_radio_prediction_ndjson,
    checklist_prediction_ndjson,
    text_prediction_ndjson,
    nested_checklist_prediction_ndjson
]:
  annot.update({
      "dataRow": {"globalKey": global_key},
  })
  label_prediction_ndjson.append(annot)

Step 6: Upload predictions payload to model run

# Upload the prediction label to the Model Run
upload_job_prediction = model_run.add_predictions(
    name="prediction_upload_job"+str(uuid.uuid4()),
    predictions=label_prediction)

# Errors will appear for annotation uploads that failed.
print("Errors:", upload_job_prediction.errors)
print("Status of uploads: ", upload_job_prediction.statuses)

Step 7: Send annotations to model run

(Optional) To send annotations to a model run:

  1. Import them into a project
  2. Create a label payload
  3. Send Send them to the model run
#7.1. Create a labelbox project
project = client.create_project(name="HTML prediction import demo",                                    
                                    queue_mode=lb.QueueMode.Batch,
                                    media_type=lb.MediaType.Html)
project.setup_editor(ontology)

# 7.2. Create a batch to send to the project

project.create_batch(
  "batch_prediction_html", # Each batch in a project must have a unique name
  global_keys=[global_key], # Paginated collection of data row objects, list of data row ids or global keys
  priority=5 # priority between 1(Highest) - 5(lowest)
)

# 7.3 Create the annotations payload
radio_annotation...
nested_radio_annotation...
nested_checklist_annotation...
checklist_annotation...
text_annotation...

# 7.4 Create the label object 
label.append(
  lb_types.Label(
    data=lb_types.HTMLData(
      global_key=global_key
    ),
    annotations=[
      text_annotation,
      checklist_annotation,
      radio_annotation,
      nested_checklist_annotation,
      nested_radio_annotation
    ]
  )
)

#7.5. Upload annotations to the project using Label Import
upload_job_annotation = lb.LabelImport.create_from_objects(
    client = client,
    project_id = project.uid,
    name="html_annotation_import" + str(uuid.uuid4()),
    labels=label)

upload_job_annotation.wait_until_done()
# Errors will appear for annotation uploads that failed.
print("Errors:", upload_job_annotation.errors)
print("Status of uploads: ", upload_job_annotation.statuses)

# 7.6 Send the annotations to the Model Run
model_run.upsert_labels(project_id=project.uid)