packages = ["pydantic", "./wheels/pymarc-4.2.2-py3-none-any.whl", "./wheels/rdflib-7.0.0-py3-none-any.whl", "./wheels/paradag-1.2.0-py2.py3-none-any.whl"] [[fetch]] from = "src/catalog_chat" files = ["chat.py", "controls.py", "folio.py", "github.py", "sinopia.py", "workflows/core.py", "workflows/executors.py", "workflows/assign_lcsh.py", "workflows/marc21_to_folio.py", "workflows/new_folio_resource.py", "workflows/sinopia_to_folio.py"] __version__ = "1.0.0" import asyncio import json import sys from js import alert, console, document, sessionStorage from chat import ( add_history, delete_key, update_chat_modal, update_parameters ) from chat import login as chat_gpt_login, ChatGPT from controls import ( clear_chat_prompt, download, init_workflow, load_chat_session, load_folio_default, load_marc_record, new_example, run_prompt ) from folio import Okapi, get_instance, logout_folio from folio import login as okapi_login version_span = document.getElementById("version") version_span.innerHTML = __version__ chat_gpt_instance = None existing_token = sessionStorage.getItem("chat_gpt_token") if existing_token: chat_gpt_instance = ChatGPT(key=existing_token) update_chat_modal(chat_gpt_instance) chatgpt_button = document.getElementById("chatGPTButton") chatgpt_button.classList.remove("btn-outline-danger") chatgpt_button.classList.add("btn-outline-success") workflow = None okapi = None existing_okapi = sessionStorage.getItem("okapi") if existing_okapi: okapi = Okapi.parse_obj(json.loads(existing_okapi)) folio_btn = document.getElementById("folioButton") folio_btn.classList.remove("btn-outline-danger") folio_btn.classList.add("btn-outline-success") folio_iframe = document.getElementById("folio-system-frame") folio_iframe.src = okapi.folio default_folio = document.getElementById("folio-default") default_folio.classList.add("d-none") marc_file = Element("marc-upload-btn") def download_history(): if chat_gpt_instance is not None and workflow is not None: download(chat_gpt_instance, workflow) else: alert("Need chat gpt instance and workflow to download history") def load_workflow(workflow_slug): global workflow workflow = init_workflow(workflow_slug) async def login_chatgpt(): global chat_gpt_instance chat_gpt_instance = await chat_gpt_login() async def login_okapi(): folio_iframe = document.getElementById("folio-system-frame") folio_url_elem = document.getElementById("folioURI") folio_iframe.src = folio_url_elem.value folio_logged_in = await okapi_login(Okapi()) console.log(f"FOLIO Login? {folio_logged_in}") async def lcsh_conversation(): instance_uuid_elem = document.getElementById("instance-uuid") instance = await get_instance(okapi, instance_uuid_elem.value) raw_instance_elem = document.getElementById("raw-instance") raw_instance_elem.innerHTML = instance instance_subjects = document.getElementById("instance-subjects") h3 = document.createElement("h3") h3.innerHTML = "Instance Subjects" instance_subjects.appendChild(h3) subjects_ul = document.createElement("ul") for subject in instance["subjects"]: li = document.createElement("li") li.innerHTML = subject subjects_ul.appendChild(li) instance_subjects.appendChild(subjects_ul) results_div = document.getElementById("lcsh-result") conversation = await chat_gpt_instance("\n".join(instance["subjects"])) if "error" in conversation: results_div.innerHTML = f"""Error status {conversation["error"]}\n{conversation["message"]}""" else: for choice in conversation.get("choices"): pre_elem = document.createElement("pre") pre_elem.innerHTML = choice["text"] results_div.appendChild(pre_elem) async def load_mrc_file(): marc_str = await load_marc_record(marc_file) main_prompt = document.getElementById("mainChatPrompt") main_prompt.value = marc_str def load_sinopia(): sinopia_url_select = document.getElementById("sinopiaURI") sinopia_iframe = document.getElementById("sinopia-frame") sinopia_iframe.src = sinopia_url_select.value def save_edge_ai(): edge_ai_api_url_elem = document.getElementById("edgeAIURI") edge_ai_api_url = edge_ai_api_url_elem.value print(f"Saves Edge AI API URI at {edge_ai_api_url}")

Chatting with Catalogs

An exploration of using LLMs with Library Systems
AI4LAM Hex Sticker and Logo

System Context
System Message

System messages helps in setting the behavior of the model

Additional Context

For few-shot prompts, provides examples of question and responses. De-select all examples for a zero-shot prompt.

Workflows

Select a prompt template to launch a specific workflow.

Bugs? Ideas for improvment? Please add a ticket here.

Vector stores

To improve the context of the chat and reduce model hallucinations, select one or more of the following vector datastores:

Loading...

Chat History

To use the FOLIO Demo site, use the following credentials:

FOLIO URL:
https://folio-orchid.dev.folio.org
Okapi URL:
https://folio-orchid-okapi.dev.folio.org//
Tenant:
diku
Login:
diku_admin
Password:
admin

Help

A demonstration application for constructing library systems workflows using Large Language Models (LLM).

Workflows

Workflows in this app are inspired by Apache Airflow and Prefect.io use of DAGs (Directed Acyclic Graphs). Catalog Chat uses the pure-python package, paradag, to construct workflows as a DAG. A workflow wraps LLM API calls along with calling specific FOLIO Okapi and Sinopia APIs by building context and examples specific to the use case.

Basic Cataloging Workflow Flowchart

Roadmap

  • Support for more Large Language Models (Llama2, Claude, Bard, etc.) ticket #1
  • Provide API for LCSH Vector datastore
  • Implement Vector datastore for FOLIO
  • Implement Vector datastore for Sinopia

Documentation

About this App

The Chatting with Catalogs application is built as a static website using the pyscript project that loads a Python 3.11 environment into your web-browser through Webassembly. We use the following Python packages:

OpenAI API

This application uses the ChatGPT chat completion API using the functions parameter to emulate the ReAct pattern for calling functions that in turn, call FOLIO's Okapi APIs and Sinopia's APIs

Selected Bibliography

“...libraries are faced with the challenge of wrangling an infinite amount of data with finite time, funds, and personnel.”
“Many groups have used [automated] tools in a semi-mediated process that best balances out efficiency and quality.”
“... catalogers do not have to have a firm grasp on the inner statistical workings of each text mining algorithm or computation methods…Understanding the process of preparing a corpus, selecting features, and interpreting output is, perhaps, more important.”
“Catalogers and metadata experts among others in the information field have a role to play in developing, exploring and responsibly using AI.”
“Researchers have begun to show that for some professions, the wisest AI implementation scenario is augmenting existing expertise with semi-automated support, a form of “human compatible” AI.
“If automation is to be useful for the communities it seeks to support, it must be ushered in with profound appreciation for, and in collaboration with, the professionals the automation would support.”
Python REPL Python

Interact Programmatically with Tools through the REPL

Global Variables
  • workflow - The current workflow object, default is None
  • chat_gpt_instance - Chat GPT Instance