Skip to main content
Extract structured data from the current page by providing a schema. The extraction uses the current page state, making it ideal for capturing data mid-workflow.

Usage

from smooth import SmoothClient

client = SmoothClient()

with client.session() as session:
    session.goto("https://news.ycombinator.com")

    result = session.extract(
        schema={
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "title": {"type": "string", "description": "The title of the story"},
                    "url": {"type": "string", "description": "Link to the story"},
                    "points": {"type": "integer", "description": "Number of upvotes"}
                }
            }
        },
        prompt="Extract the top 5 stories from the front page"
    )

    for story in result.output:
        print(f"{story['title']} - {story['points']} points")

Request

schema
dict
required
A JSON schema describing the structure of the data to extract. Include description fields to help guide the extraction.Example:
{
  "type": "object",
  "properties": {
    "title": {"type": "string", "description": "The product title"},
    "price": {"type": "number", "description": "Price in USD"},
    "in_stock": {"type": "boolean", "description": "Whether the item is available"}
  }
}
prompt
string
Optional prompt to guide the extraction. Use this to filter results, specify quantity, or handle ambiguous cases.Example: Extract only the products that are currently in stock and under $50

Response

Returns an object with the following attributes.
output
any
The extracted data conforming to the provided schema.
credits_used
float
The number of credits used for this action. 1 credit corresponds to $0.01.
duration
float
The duration in seconds taken to perform the extraction.

Examples

Extract a single object:
with client.session() as session:
    session.goto("https://example.com/product/12345")

    result = session.extract(
        schema={
            "type": "object",
            "properties": {
                "name": {"type": "string", "description": "Product name"},
                "price": {"type": "number", "description": "Price in USD"},
                "rating": {"type": "number", "description": "Average rating out of 5"},
                "reviews_count": {"type": "integer", "description": "Number of reviews"}
            }
        },
        prompt="Extract the main product details from this page"
    )

    product = result.output
    print(f"{product['name']}: ${product['price']} ({product['rating']} stars)")
Extract a list of items:
with client.session() as session:
    session.goto("https://example.com/search?q=laptops")

    result = session.extract(
        schema={
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "name": {"type": "string", "description": "Product name"},
                    "price": {"type": "number", "description": "Price in USD"},
                    "specs": {"type": "string", "description": "Key specifications"}
                }
            }
        },
        prompt="Extract the first 10 laptops from the search results"
    )

    for laptop in result.output:
        print(f"{laptop['name']}: ${laptop['price']}")
Filtering with prompt: Use the prompt to filter, sort, or apply conditions that go beyond what the schema can express.
with client.session() as session:
    session.run_task(
        task="Search for one-way flights from NYC to LA on March 15",
        url="https://www.google.com/travel/flights"
    )

    result = session.extract(
        schema={
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "airline": {"type": "string", "description": "Airline name"},
                    "departure": {"type": "string", "description": "Departure time"},
                    "arrival": {"type": "string", "description": "Arrival time"},
                    "price": {"type": "number", "description": "Price in USD"}
                }
            }
        },
        prompt="Extract only non-stop flights under $300, sorted by price from lowest to highest"
    )

    for flight in result.output:
        print(f"{flight['airline']}: ${flight['price']} ({flight['departure']} - {flight['arrival']})")
Extract and use data to guide next action:
with client.session() as session:
    session.goto("https://shop.example.com/category/electronics")

    result = session.extract(
        schema={
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "name": {"type": "string", "description": "Product name"},
                    "price": {"type": "number", "description": "Price in USD"},
                    "in_stock": {"type": "boolean", "description": "Whether the item is available"}
                }
            }
        },
        prompt="Extract all products that are in stock"
    )

    # Find the cheapest in-stock item
    products = result.output
    if products:
        cheapest = min(products, key=lambda p: p['price'])
        session.run_task(f"Add '{cheapest['name']}' to cart")