Get started with LangSmith
LangSmith is a platform for building production-grade LLM applications. It allows you to closely monitor and evaluate your application, so you can ship quickly and with confidence. Use of LangChain is not necessary - LangSmith works on its own!
1. Install LangSmith
- Python
- TypeScript
pip install -U langsmith
yarn add langchain langsmith
2. Create an API key
To create an API key head to the Settings page. Then click Create API Key.
3. Set up your environment
- Shell
export LANGCHAIN_TRACING_V2=true
export LANGCHAIN_API_KEY=<your-api-key>
# The below examples use the OpenAI API, though it's not necessary in general
export OPENAI_API_KEY=<your-openai-api-key>
4. Log your first trace
We provide multiple ways to log traces to LangSmith. Below, we'll highlight
how to use traceable. See more on the Annotate code for tracing page.
- Python
- TypeScript
import openai
from langsmith.wrappers import wrap_openai
from langsmith import traceable
# Auto-trace LLM calls in-context
client = wrap_openai(openai.Client())
@traceable # Auto-trace this function
def pipeline(user_input: str):
    result = client.chat.completions.create(
        messages=[{"role": "user", "content": user_input}],
        model="gpt-3.5-turbo"
    )
    return result.choices[0].message.content
pipeline("Hello, world!")
# Out:  Hello there! How can I assist you today?
import { OpenAI } from "openai";
import { traceable } from "langsmith/traceable";
import { wrapOpenAI } from "langsmith/wrappers";
// Auto-trace LLM calls in-context
const client = wrapOpenAI(new OpenAI());
// Auto-trace this function
const pipeline = traceable(async (user_input) => {
    const result = await client.chat.completions.create({
        messages: [{ role: "user", content: user_input }],
        model: "gpt-3.5-turbo",
    });
    return result.choices[0].message.content;
});
await pipeline("Hello, world!")
// Out: Hello there! How can I assist you today?
- View a sample output trace.
- Learn more about tracing in the how-to guides.
5. Run your first evaluation
Evaluation requires a system to test, data to serve as test cases, and optionally evaluators to grade the results. Here we use a built-in accuracy evaluator.
- Python
- TypeScript
from langsmith import Client
from langsmith.evaluation import evaluate
client = Client()
# Define dataset: these are your test cases
dataset_name = "Sample Dataset"
dataset = client.create_dataset(dataset_name, description="A sample dataset in LangSmith.")
client.create_examples(
    inputs=[
        {"postfix": "to LangSmith"},
        {"postfix": "to Evaluations in LangSmith"},
    ],
    outputs=[
        {"output": "Welcome to LangSmith"},
        {"output": "Welcome to Evaluations in LangSmith"},
    ],
    dataset_id=dataset.id,
)
# Define your evaluator
def exact_match(run, example):
    return {"score": run.outputs["output"] == example.outputs["output"]}
experiment_results = evaluate(
    lambda input: "Welcome " + input['postfix'], # Your AI system goes here
    data=dataset_name, # The data to predict and grade over
    evaluators=[exact_match], # The evaluators to score the results
    experiment_prefix="sample-experiment", # The name of the experiment
    metadata={
      "version": "1.0.0",
      "revision_id": "beta"
    },
)
import { Client, Run, Example } from "langsmith";
import { evaluate } from "langsmith/evaluation";
import { EvaluationResult } from "langsmith/evaluation";
const client = new Client();
// Define dataset: these are your test cases
const datasetName = "Sample Dataset";
const dataset = await client.createDataset(datasetName, {
  description: "A sample dataset in LangSmith.",
});
await client.createExamples({
  inputs: [
    { postfix: "to LangSmith" },
    { postfix: "to Evaluations in LangSmith" },
  ],
  outputs: [
    { output: "Welcome to LangSmith" },
    { output: "Welcome to Evaluations in LangSmith" },
  ],
  datasetId: dataset.id,
});
// Define your evaluator
const exactMatch = async (
  run: Run,
  example: Example
): Promise<EvaluationResult> => {
  return {
    key: "exact_match",
    score: run.outputs?.output === example?.outputs?.output,
  };
};
await evaluate(
  (input: { postfix: string }) => ({ output: `Welcome ${input.postfix}` }),
  {
    data: datasetName,
    evaluators: [exactMatch],
    metadata: {
      version: "1.0.0",
      revision_id: "beta",
    },
  }
);
- Learn more about evaluation in the how-to guides.