How to upload predictions on HTML data in a model run and sample upload formats.
You can use the Python SDK to upload predictions on HTML data.
This page shows how to declare different annotation types (as Python dictionaries and NDJSON objects) and demonstrates the upload process.
A Python notebook demonstrates these steps and can be run directly with Google CoLab.
Supported annotations
To import annotations in Labelbox, you need to create the annotations payload. This section shows how to declare the payloads for each supported annotation type.
Classification: Radio (single-choice)
radio_prediction = lb_types.ClassificationAnnotation(
name="radio_question",
value=lb_types.Radio(answer = lb_types.ClassificationAnswer(name = "second_radio_answer", confidence=0.5))
)
radio_prediction_ndjson = {
'name': 'radio_question',
'answer': {'name': 'first_radio_answer'}
}
Classification: Nested radio
nested_radio_prediction = lb_types.ClassificationAnnotation(
name="nested_radio_question",
value=lb_types.Radio(
answer=lb_types.ClassificationAnswer(
name="first_radio_answer",
confidence=0.5,# Confidence scores should be added to the answer
classifications=[
lb_types.ClassificationAnnotation(
name="sub_radio_question",
value=lb_types.Radio(
answer=lb_types.ClassificationAnswer(
name="first_sub_radio_answer", confidence=0.5)
)
)
]
)
)
)
nested_radio_prediction_ndjson = {
"name": "nested_radio_question",
"answer": {
"name": "first_radio_answer",
"confidence": 0.5, # Confidence scores should be added to the answer
"classifications": [{
"name":"sub_radio_question",
"answer": { "name" : "first_sub_radio_answer", "confidence": 0.5 }
}]
}
}
Classification: Nested checklist
nested_checklist_prediction = lb_types.ClassificationAnnotation(
name="nested_checklist_question",
value=lb_types.Checklist(
answer=[lb_types.ClassificationAnswer(
name="first_checklist_answer",
confidence=0.5, # Confidence scores should be added to the answer
classifications=[
lb_types.ClassificationAnnotation(
name="sub_checklist_question",
value=lb_types.Checklist(
answer=[lb_types.ClassificationAnswer(
name="first_sub_checklist_answer",
confidence=0.5
)]
))
]
)]
)
)
nested_checklist_prediction_ndjson = {
"name": "nested_checklist_question",
"answer": [{
"name": "first_checklist_answer",
"confidence": 0.5, # Confidence scores should be added to the answer
"classifications" : [
{
"name": "sub_checklist_question",
"answer": {"name": "first_sub_checklist_answer", "confidence": 0.5 }
}
]
}]
}
Classification: Checklist (multi-choice)
checklist_prediction = lb_types.ClassificationAnnotation(
name="checklist_question",
value=lb_types.Checklist(
answer = [
lb_types.ClassificationAnswer(
name = "first_checklist_answer",
confidence=0.5
),
lb_types.ClassificationAnswer(
name = "second_checklist_answer",
confidence=0.5
),
lb_types.ClassificationAnswer(
name = "third_checklist_answer",
confidence=0.5
)
])
)
checklist_prediction_ndjson = {
'name': 'checklist_question',
'answer': [
{'name': 'first_checklist_answer', 'confidence': 0.5}
]
}
Classification: Free-form text
text_prediction = lb_types.ClassificationAnnotation(
name = "free_text",
value = lb_types.Text(answer="sample text", confidence=0.5)
)
text_prediction_ndjson = {
'name': 'free_text',
'answer': 'sample text',
'confidence':0.5
}
Example: Upload predictions to model run
To upload predictions to a model run:
Before you start
This examples requires the following libraries:
import labelbox as lb
import labelbox.types as lb_types
import uuid
import numpy as np
Replace API Key
Paste your API key into the variable shown here.
API_KEY = ""
client = lb.Client(API_KEY)
Step 1: Import data rows into Catalog
global_key ="sample_html_2.html"
test_img_url = {
"row_data": "https://storage.googleapis.com/labelbox-datasets/html_sample_data/sample_html_2.html",
"global_key": global_key
}
dataset = client.create_dataset(
name="html prediction demo dataset",
iam_integration=None # Removing this argument will default to the organziation's default iam integration
)
task = dataset.create_data_rows([test_img_url])
task.wait_till_done()
print("Errors:",task.errors)
print("Failed data rows:", task.failed_data_rows)
Step 2: Set up ontology
Your project should include an ontology that supports your annotations. To ensure feature schema matches, the tool names and classification names should match the name
field in your annotations.
ontology_builder = lb.OntologyBuilder(
classifications=[ # List of Classification objects
lb.Classification(
class_type=lb.Classification.Type.RADIO,
name="radio_question", # name matching the tool used in the annotation
options=[lb.Option(value="first_radio_answer")]
),
lb.Classification(
class_type=lb.Classification.Type.RADIO,
name="nested_radio_question",
options=[
lb.Option(value="first_radio_answer",
options=[
lb.Classification(
class_type=lb.Classification.Type.RADIO,
name="sub_radio_question",
options=[
lb.Option(value="first_sub_radio_answer")
]
),
]
)
],
),
lb.Classification(
class_type=lb.Classification.Type.CHECKLIST,
name="checklist_question",
options=[
lb.Option(value="first_checklist_answer"),
lb.Option(value="second_checklist_answer"),
lb.Option(value="third_checklist_answer")
]
),
lb.Classification(
class_type=lb.Classification.Type.TEXT,
name="free_text"
),
lb.Classification(
class_type=lb.Classification.Type.CHECKLIST,
name="nested_checklist_question",
options=[
lb.Option("first_checklist_answer",
options=[
lb.Classification(
class_type=lb.Classification.Type.CHECKLIST,
name="sub_checklist_question",
options=[lb.Option("first_sub_checklist_answer")]
)
]
)
]
)
]
)
ontology = client.create_ontology("Ontology HTML Predictions",
ontology_builder.asdict(),
media_type=lb.MediaType.Html)
Step 3: Create model and model run
#create Model
model = client.create_model(name="HTML_model_run_" + str(uuid.uuid4()),
ontology_id=ontology.uid)
#create Model Run
model_run = model.create_model_run("iteration 1")
Step 4: Send data rows to model run
model_run.upsert_data_rows(global_keys=[global_key])
Step 5: Create prediction payload
Use the examples in Supported annotations to create your annotation payloads; you can use declare them as Python dictionaries or NDJSON objects. Examples of each type are shown here; they also show how to compose annotations into labels attached to the data rows.
The resulting label_prediction
and label_prediction_ndjson
from each approach demonstrates each supported annotation type.
label_prediction = []
label_prediction.append(
lb_types.Label(
data=lb_types.HTMLData(global_key=global_key),
annotations = [
radio_prediction,
checklist_prediction,
text_prediction,
nested_checklist_prediction,
nested_radio_prediction
]
)
)
label_prediction_ndjson = []
for annot in [
radio_prediction_ndjson,
nested_radio_prediction_ndjson,
checklist_prediction_ndjson,
text_prediction_ndjson,
nested_checklist_prediction_ndjson
]:
annot.update({
"dataRow": {"globalKey": global_key},
})
label_prediction_ndjson.append(annot)
Step 6: Upload predictions payload to model run
# Upload the prediction label to the Model Run
upload_job_prediction = model_run.add_predictions(
name="prediction_upload_job"+str(uuid.uuid4()),
predictions=label_prediction)
# Errors will appear for annotation uploads that failed.
print("Errors:", upload_job_prediction.errors)
print("Status of uploads: ", upload_job_prediction.statuses)
Step 7: Send annotations to model run
(Optional) To send annotations to a model run:
- Import them into a project
- Create a label payload
- Send Send them to the model run
#7.1. Create a labelbox project
project = client.create_project(name="HTML prediction import demo",
queue_mode=lb.QueueMode.Batch,
media_type=lb.MediaType.Html)
project.setup_editor(ontology)
# 7.2. Create a batch to send to the project
project.create_batch(
"batch_prediction_html", # Each batch in a project must have a unique name
global_keys=[global_key], # Paginated collection of data row objects, list of data row ids or global keys
priority=5 # priority between 1(Highest) - 5(lowest)
)
# 7.3 Create the annotations payload
radio_annotation...
nested_radio_annotation...
nested_checklist_annotation...
checklist_annotation...
text_annotation...
# 7.4 Create the label object
label.append(
lb_types.Label(
data=lb_types.HTMLData(
global_key=global_key
),
annotations=[
text_annotation,
checklist_annotation,
radio_annotation,
nested_checklist_annotation,
nested_radio_annotation
]
)
)
#7.5. Upload annotations to the project using Label Import
upload_job_annotation = lb.LabelImport.create_from_objects(
client = client,
project_id = project.uid,
name="html_annotation_import" + str(uuid.uuid4()),
labels=label)
upload_job_annotation.wait_until_done()
# Errors will appear for annotation uploads that failed.
print("Errors:", upload_job_annotation.errors)
print("Status of uploads: ", upload_job_annotation.statuses)
# 7.6 Send the annotations to the Model Run
model_run.upsert_labels(project_id=project.uid)