123456789101112131415161718192021222324252627 |
- import os
- import logging
- import sys
- from llama_index import SimpleDirectoryReader, GPTSimpleVectorIndex
- logging.basicConfig(stream=sys.stdout, level=logging.INFO)
- logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
- os.environ['OPENAI_API_KEY'] = 'sk-xx'
- def build_index():
- documents = SimpleDirectoryReader('data').load_data()
- # 按最大token数500来把原文档切分为多个小的chunk,每个chunk转为向量,并构建索引
- index = GPTSimpleVectorIndex(documents, chunk_size_limit=500)
- # 保存索引
- index.save_to_disk('index.json')
- def query():
- # 加载索引
- new_index = GPTSimpleVectorIndex.load_from_disk('index.json')
- # 查询索引
- response = new_index.query("What did the author do in 9th grade?")
- # 打印答案
- print(response)
- if __name__=='__main__':
- build_index()
|