classPerson(BaseModel): """Information about a person."""
# ^ Doc-string for the entity Person. # This doc-string is sent to the LLM as the description of the schema Person, # and it can help to improve extraction results.
# Note that: # 1. Each field is an `optional` -- this allows the model to decline to extract it! # 2. Each field has a `description` -- this description is used by the LLM. # Having a good description can help improve extraction results. name: Optional[str] = Field(default=None, description="The name of the person") hair_color: Optional[str] = Field( default=None, description="The color of the person's hair if known" ) height_in_meters: Optional[str] = Field( default=None, description="Height measured in meters" )
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder from pydantic import BaseModel, Field
# Define a custom prompt to provide instructions and any additional context. # 1) You can add examples into the prompt template to improve extraction quality # 2) Introduce additional parameters to take context into account (e.g., include metadata # about the document from which the text was extracted.) prompt_template = ChatPromptTemplate.from_messages( [ ( "system", "You are an expert extraction algorithm. " "Only extract relevant information from the text. " "If you do not know the value of an attribute asked to extract, " "return null for the attribute's value.", ), # Please see the how-to about improving performance with # reference examples. # MessagesPlaceholder('examples'), ("human", "{text}"), ] )
使用大模型来提取信息
1 2 3 4 5 6 7 8 9 10
import getpass import os
ifnot os.environ.get("OPENAI_API_KEY"): os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")
classPerson(BaseModel): """Information about a person."""
# ^ Doc-string for the entity Person. # This doc-string is sent to the LLM as the description of the schema Person, # and it can help to improve extraction results.
# Note that: # 1. Each field is an `optional` -- this allows the model to decline to extract it! # 2. Each field has a `description` -- this description is used by the LLM. # Having a good description can help improve extraction results. name: Optional[str] = Field(default=None, description="The name of the person") hair_color: Optional[str] = Field( default=None, description="The color of the person's hair if known" ) height_in_meters: Optional[str] = Field( default=None, description="Height measured in meters" )
classData(BaseModel): """Extracted data about people."""
# Creates a model so that we can extract multiple entities. people: List[Person]
1 2 3 4
structured_llm = llm.with_structured_output(schema=Data) text = "My name is Jeff, my hair is black and i am 6 feet tall. Anna has the same color hair as me." prompt = prompt_template.invoke({"text": text}) structured_llm.invoke(prompt)
from langchain_core.utils.function_calling import tool_example_to_messages
examples = [ ( "The ocean is vast and blue. It's more than 20,000 feet deep.", Data(people=[]), ), ( "Fiona traveled far from France to Spain.", Data(people=[Person(name="Fiona", height_in_meters=None, hair_color=None)]), ), ]
messages = []
for txt, tool_call in examples: if tool_call.people: # This final message is optional for some providers ai_response = "Detected people." else: ai_response = "Detected no people." messages.extend(tool_example_to_messages(txt, [tool_call], ai_response=ai_response))