pr-agent/langchain.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.chat_models import ChatOpenAI\n",
    "from langchain.prompts.chat import (\n",
    "    ChatPromptTemplate,\n",
    "    HumanMessagePromptTemplate,\n",
    "    SystemMessagePromptTemplate,\n",
    ")\n",
    "\n",
    "chat = ChatOpenAI(temperature=0, openai_api_key=\"\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "template = \"\"\"You are a language model called PR-Code-Documentation Agent, that specializes in generating documentation for code.\n",
    "Your task is to generate meaningfull {{ docs_for_language }} to a PR (the '+' lines).\n",
    "\n",
    "Example for a PR Diff input:\n",
    "'\n",
    "## src/file1.py\n",
    "\n",
    "@@ -12,3 +12,5 @@ def func1():\n",
    "__new hunk__\n",
    "12  code line that already existed in the file...\n",
    "13  code line that already existed in the file....\n",
    "14 +new code line1 added in the PR\n",
    "15 +new code line2 added in the PR\n",
    "16  code line that already existed in the file...\n",
    "__old hunk__\n",
    " code line that already existed in the file...\n",
    "-code line that was removed in the PR\n",
    " code line that already existed in the file...\n",
    "\n",
    "\n",
    "@@ ... @@ def func2():\n",
    "__new hunk__\n",
    "...\n",
    "__old hunk__\n",
    "...\n",
    "\n",
    "\n",
    "## src/file2.py\n",
    "...\n",
    "'\n",
    "\n",
    "Specific instructions:\n",
    "- Try to identify edited/added code components (classes/functions/methods...) that are undocumented. and generate {{ docs_for_language }} for each one.\n",
    "- If there are documented (any type of {{ language }} documentation) code components in the PR, Don't generate {{ docs_for_language }} for them.\n",
    "- Ignore code components that don't appear fully in the '__new hunk__' section. For example. you must see the component header and body,\n",
    "- Make sure the {{ docs_for_language }} starts and ends with standart {{ language }} {{ docs_for_language }} signs.\n",
    "- The {{ docs_for_language }} should be in standard format.\n",
    "- Provide the exact line number (inclusive) where the {{ docs_for_language }} should be added.\n",
    "\n",
    "\n",
    "You must use the following YAML schema to format your answer:\n",
    "```yaml\n",
    "Code Documentation:\n",
    "  type: array\n",
    "  uniqueItems: true\n",
    "  items:\n",
    "    relevant file:\n",
    "      type: string\n",
    "      description: the relevant file full path\n",
    "    relevant line:\n",
    "      type: integer\n",
    "      description: |-\n",
    "        The relevant line number from a '__new hunk__' section where the {{ docs_for_language }} should be added.\n",
    "    doc placement:\n",
    "      type: string\n",
    "      enum:\n",
    "        - before\n",
    "        - after\n",
    "      description: |-\n",
    "        The {{ docs_for_language }} placement relative to the relevant line (code component).\n",
    "    documentation:\n",
    "      type: string\n",
    "      description: |-\n",
    "        The {{ docs_for_language }} content. It should be complete, correctly formatted and indented, and without line numbers.\n",
    "```\n",
    "\n",
    "Example output:\n",
    "```yaml\n",
    "Code Documentation:\n",
    "-   relevant file: |-\n",
    "        src/file1.py\n",
    "    relevant lines: 12\n",
    "    doc placement: after\n",
    "    documentation: |-\n",
    "        \\\"\\\"\\\"\n",
    "        This is a python docstring for func1.\n",
    "        \\\"\\\"\\\"\n",
    "- ...\n",
    "...\n",
    "```\n",
    "\n",
    "\n",
    "Each YAML output MUST be after a newline, indented, with block scalar indicator ('|-').\n",
    "Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields.\"\"\"\n",
    "\n",
    "system_message_prompt = SystemMessagePromptTemplate.from_template(template)\n",
    "human_template = \"{text}\"\n",
    "human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AIMessage(content='```yaml\\nCode Documentation:\\n-   relevant file: |-\\n        src/file1.py\\n    relevant line: 12\\n    doc placement: after\\n    documentation: |-\\n        \"\"\"\\n        This is a JavaScript console.log statement that prints \\'hello world\\'.\\n        \"\"\"\\n```')"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chat_prompt = ChatPromptTemplate.from_messages(\n",
    "    [system_message_prompt, human_message_prompt]\n",
    ")\n",
    "\n",
    "# get a chat completion from the formatted messages\n",
    "chat(\n",
    "    chat_prompt.format_prompt(\n",
    "        docs_for_language=\"JSDoc\", language=\"JavaScript\", text=\"console.log('hello world')\"\n",
    "    ).to_messages()\n",
    ")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.0.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}