Support running GitHub App using gunicorn, adjust Dockerfile accordingly

2025-07-21 04:50:39 +08:00 · 2024-06-18 20:15:48 +03:00
parent deda06866d
commit 663604daa5
3 changed files with 193 additions and 1 deletions
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@ -8,7 +8,7 @@ ENV PYTHONPATH=/app

 FROM base as github_app
 ADD pr_agent pr_agent
-CMD ["python", "pr_agent/servers/github_app.py"]
+CMD ["python", "-m", "gunicorn", "-k", "uvicorn.workers.UvicornWorker", "-c", "pr_agent/servers/gunicorn_config.py", "--forwarded-allow-ips", "*", "pr_agent.servers.github_app:app"]

 FROM base as bitbucket_app
 ADD pr_agent pr_agent
--- a/pr_agent/servers/gunicorn_config.py
+++ b/pr_agent/servers/gunicorn_config.py
@ -0,0 +1,191 @@
+import multiprocessing
+import os
+
+# from prometheus_client import multiprocess
+
+# Sample Gunicorn configuration file.
+
+#
+# Server socket
+#
+#   bind - The socket to bind.
+#
+#       A string of the form: 'HOST', 'HOST:PORT', 'unix:PATH'.
+#       An IP is a valid HOST.
+#
+#   backlog - The number of pending connections. This refers
+#       to the number of clients that can be waiting to be
+#       served. Exceeding this number results in the client
+#       getting an error when attempting to connect. It should
+#       only affect servers under significant load.
+#
+#       Must be a positive integer. Generally set in the 64-2048
+#       range.
+#
+
+# bind = '0.0.0.0:5000'
+bind = '0.0.0.0:3000'
+backlog = 2048
+
+#
+# Worker processes
+#
+#   workers - The number of worker processes that this server
+#       should keep alive for handling requests.
+#
+#       A positive integer generally in the 2-4 x $(NUM_CORES)
+#       range. You'll want to vary this a bit to find the best
+#       for your particular application's work load.
+#
+#   worker_class - The type of workers to use. The default
+#       sync class should handle most 'normal' types of work
+#       loads. You'll want to read
+#       http://docs.gunicorn.org/en/latest/design.html#choosing-a-worker-type
+#       for information on when you might want to choose one
+#       of the other worker classes.
+#
+#       A string referring to a Python path to a subclass of
+#       gunicorn.workers.base.Worker. The default provided values
+#       can be seen at
+#       http://docs.gunicorn.org/en/latest/settings.html#worker-class
+#
+#   worker_connections - For the eventlet and gevent worker classes
+#       this limits the maximum number of simultaneous clients that
+#       a single process can handle.
+#
+#       A positive integer generally set to around 1000.
+#
+#   timeout - If a worker does not notify the master process in this
+#       number of seconds it is killed and a new worker is spawned
+#       to replace it.
+#
+#       Generally set to thirty seconds. Only set this noticeably
+#       higher if you're sure of the repercussions for sync workers.
+#       For the non sync workers it just means that the worker
+#       process is still communicating and is not tied to the length
+#       of time required to handle a single request.
+#
+#   keepalive - The number of seconds to wait for the next request
+#       on a Keep-Alive HTTP connection.
+#
+#       A positive integer. Generally set in the 1-5 seconds range.
+#
+
+if os.getenv('GUNICORN_WORKERS', None):
+    workers = int(os.getenv('GUNICORN_WORKERS'))
+else:
+    cores = multiprocessing.cpu_count()
+    workers = cores * 2 + 1
+worker_connections = 1000
+timeout = 240
+keepalive = 2
+
+#
+#   spew - Install a trace function that spews every line of Python
+#       that is executed when running the server. This is the
+#       nuclear option.
+#
+#       True or False
+#
+
+spew = False
+
+#
+# Server mechanics
+#
+#   daemon - Detach the main Gunicorn process from the controlling
+#       terminal with a standard fork/fork sequence.
+#
+#       True or False
+#
+#   raw_env - Pass environment variables to the execution environment.
+#
+#   pidfile - The path to a pid file to write
+#
+#       A path string or None to not write a pid file.
+#
+#   user - Switch worker processes to run as this user.
+#
+#       A valid user id (as an integer) or the name of a user that
+#       can be retrieved with a call to pwd.getpwnam(value) or None
+#       to not change the worker process user.
+#
+#   group - Switch worker process to run as this group.
+#
+#       A valid group id (as an integer) or the name of a user that
+#       can be retrieved with a call to pwd.getgrnam(value) or None
+#       to change the worker processes group.
+#
+#   umask - A mask for file permissions written by Gunicorn. Note that
+#       this affects unix socket permissions.
+#
+#       A valid value for the os.umask(mode) call or a string
+#       compatible with int(value, 0) (0 means Python guesses
+#       the base, so values like "0", "0xFF", "0022" are valid
+#       for decimal, hex, and octal representations)
+#
+#   tmp_upload_dir - A directory to store temporary request data when
+#       requests are read. This will most likely be disappearing soon.
+#
+#       A path to a directory where the process owner can write. Or
+#       None to signal that Python should choose one on its own.
+#
+
+daemon = False
+raw_env = []
+pidfile = None
+umask = 0
+user = None
+group = None
+tmp_upload_dir = None
+
+#
+#   Logging
+#
+#   logfile - The path to a log file to write to.
+#
+#       A path string. "-" means log to stdout.
+#
+#   loglevel - The granularity of log output
+#
+#       A string of "debug", "info", "warning", "error", "critical"
+#
+
+errorlog = '-'
+loglevel = 'info'
+accesslog = None
+access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"'
+
+#
+# Process naming
+#
+#   proc_name - A base to use with setproctitle to change the way
+#       that Gunicorn processes are reported in the system process
+#       table. This affects things like 'ps' and 'top'. If you're
+#       going to be running more than one instance of Gunicorn you'll
+#       probably want to set a name to tell them apart. This requires
+#       that you install the setproctitle module.
+#
+#       A string or None to choose a default of something like 'gunicorn'.
+#
+
+proc_name = None
+
+
+#
+# Server hooks
+#
+#   post_fork - Called just after a worker has been forked.
+#
+#       A callable that takes a server and worker instance
+#       as arguments.
+#
+#   pre_fork - Called just prior to forking the worker subprocess.
+#
+#       A callable that accepts the same arguments as after_fork
+#
+#   pre_exec - Called just prior to forking off a secondary
+#       master process during things like config reloading.
+#
+#       A callable that takes a server instance as the sole argument.
+#
--- a/requirements.txt
+++ b/requirements.txt
@ -24,6 +24,7 @@ tiktoken==0.7.0
 ujson==5.8.0
 uvicorn==0.22.0
 tenacity==8.2.3
+gunicorn==20.1.0
 # Uncomment the following lines to enable the 'similar issue' tool
 # pinecone-client
 # pinecone-datasets @ git+https://github.com/mrT23/pinecone-datasets.git@main