diff --git a/langchain.ipynb b/langchain.ipynb new file mode 100644 index 00000000..10e163cc --- /dev/null +++ b/langchain.ipynb @@ -0,0 +1,166 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.prompts.chat import (\n", + " ChatPromptTemplate,\n", + " HumanMessagePromptTemplate,\n", + " SystemMessagePromptTemplate,\n", + ")\n", + "\n", + "chat = ChatOpenAI(temperature=0, openai_api_key=\"\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "template = \"\"\"You are a language model called PR-Code-Documentation Agent, that specializes in generating documentation for code.\n", + "Your task is to generate meaningfull {{ docs_for_language }} to a PR (the '+' lines).\n", + "\n", + "Example for a PR Diff input:\n", + "'\n", + "## src/file1.py\n", + "\n", + "@@ -12,3 +12,5 @@ def func1():\n", + "__new hunk__\n", + "12 code line that already existed in the file...\n", + "13 code line that already existed in the file....\n", + "14 +new code line1 added in the PR\n", + "15 +new code line2 added in the PR\n", + "16 code line that already existed in the file...\n", + "__old hunk__\n", + " code line that already existed in the file...\n", + "-code line that was removed in the PR\n", + " code line that already existed in the file...\n", + "\n", + "\n", + "@@ ... @@ def func2():\n", + "__new hunk__\n", + "...\n", + "__old hunk__\n", + "...\n", + "\n", + "\n", + "## src/file2.py\n", + "...\n", + "'\n", + "\n", + "Specific instructions:\n", + "- Try to identify edited/added code components (classes/functions/methods...) that are undocumented. and generate {{ docs_for_language }} for each one.\n", + "- If there are documented (any type of {{ language }} documentation) code components in the PR, Don't generate {{ docs_for_language }} for them.\n", + "- Ignore code components that don't appear fully in the '__new hunk__' section. For example. you must see the component header and body,\n", + "- Make sure the {{ docs_for_language }} starts and ends with standart {{ language }} {{ docs_for_language }} signs.\n", + "- The {{ docs_for_language }} should be in standard format.\n", + "- Provide the exact line number (inclusive) where the {{ docs_for_language }} should be added.\n", + "\n", + "\n", + "You must use the following YAML schema to format your answer:\n", + "```yaml\n", + "Code Documentation:\n", + " type: array\n", + " uniqueItems: true\n", + " items:\n", + " relevant file:\n", + " type: string\n", + " description: the relevant file full path\n", + " relevant line:\n", + " type: integer\n", + " description: |-\n", + " The relevant line number from a '__new hunk__' section where the {{ docs_for_language }} should be added.\n", + " doc placement:\n", + " type: string\n", + " enum:\n", + " - before\n", + " - after\n", + " description: |-\n", + " The {{ docs_for_language }} placement relative to the relevant line (code component).\n", + " documentation:\n", + " type: string\n", + " description: |-\n", + " The {{ docs_for_language }} content. It should be complete, correctly formatted and indented, and without line numbers.\n", + "```\n", + "\n", + "Example output:\n", + "```yaml\n", + "Code Documentation:\n", + "- relevant file: |-\n", + " src/file1.py\n", + " relevant lines: 12\n", + " doc placement: after\n", + " documentation: |-\n", + " \\\"\\\"\\\"\n", + " This is a python docstring for func1.\n", + " \\\"\\\"\\\"\n", + "- ...\n", + "...\n", + "```\n", + "\n", + "\n", + "Each YAML output MUST be after a newline, indented, with block scalar indicator ('|-').\n", + "Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields.\"\"\"\n", + "\n", + "system_message_prompt = SystemMessagePromptTemplate.from_template(template)\n", + "human_template = \"{text}\"\n", + "human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content='```yaml\\nCode Documentation:\\n- relevant file: |-\\n src/file1.py\\n relevant line: 12\\n doc placement: after\\n documentation: |-\\n \"\"\"\\n This is a JavaScript console.log statement that prints \\'hello world\\'.\\n \"\"\"\\n```')" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chat_prompt = ChatPromptTemplate.from_messages(\n", + " [system_message_prompt, human_message_prompt]\n", + ")\n", + "\n", + "# get a chat completion from the formatted messages\n", + "chat(\n", + " chat_prompt.format_prompt(\n", + " docs_for_language=\"JSDoc\", language=\"JavaScript\", text=\"console.log('hello world')\"\n", + " ).to_messages()\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.0.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}