Skip to content

Commit

Permalink
Speed up webserver boot time by delaying provider initialization (apa…
Browse files Browse the repository at this point in the history
…che#19709)

* Speed up webserver boot time by delaying provider initialization

This drops the time to first request from 37s to 20s by making the
following changes:

- Don't pre-load the app when not in daemon mode.

  The purpose of the call to `cached_app()` was to ensure that any
  errors are reported on the terminal before it is detached to make
  failures more obvious to the user (which is a good feature).

  However the comment about "pre-warm the cache" was incorrect and did
  not happen -- since we run gunicorn by spawning a whole new process
  it doesn't share any state from the current python interpreter.

- Don't load/initialize providers when only importing airflow.www.views

  As it was written it would load the providers hook's at import time.

  This changes it through a combination of cached properties and the
  existing `init_connection_form` function.

  (`extra_fields` is not set as a cached_property because of how FAB
  works -- it iterates over all attributes of the class looking for
  methods/routes and then looks at properties on it, meaning it would
  still access the property too early)
  • Loading branch information
ashb authored Nov 19, 2021
1 parent c1d5ea6 commit 7a676a1
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 10 deletions.
12 changes: 6 additions & 6 deletions airflow/cli/commands/webserver_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
from airflow.utils.cli import setup_locations, setup_logging
from airflow.utils.log.logging_mixin import LoggingMixin
from airflow.utils.process_utils import check_if_pidfile_process_is_running
from airflow.www.app import cached_app, create_app
from airflow.www.app import create_app

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -354,11 +354,6 @@ def webserver(args):
ssl_context=(ssl_cert, ssl_key) if ssl_cert and ssl_key else None,
)
else:
# This pre-warms the cache, and makes possible errors
# get reported earlier (i.e. before demonization)
os.environ['SKIP_DAGS_PARSING'] = 'True'
app = cached_app(None)
os.environ.pop('SKIP_DAGS_PARSING')

pid_file, stdout, stderr, log_file = setup_locations(
"webserver", args.pid, args.stdout, args.stderr, args.log_file
Expand Down Expand Up @@ -446,6 +441,11 @@ def monitor_gunicorn(gunicorn_master_pid: int):
).start()

if args.daemon:
# This makes possible errors get reported before daemonization
os.environ['SKIP_DAGS_PARSING'] = 'True'
app = create_app(None)
os.environ.pop('SKIP_DAGS_PARSING')

handle = setup_logging(log_file)

base, ext = os.path.splitext(pid_file)
Expand Down
7 changes: 7 additions & 0 deletions airflow/www/gunicorn_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,10 @@ def post_worker_init(_):
"""
old_title = setproctitle.getproctitle()
setproctitle.setproctitle(settings.GUNICORN_WORKER_READY_PREFIX + old_title)


def on_starting(server):
from airflow.providers_manager import ProvidersManager

# Load providers before forking workers
ProvidersManager().connection_form_widgets
17 changes: 13 additions & 4 deletions airflow/www/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@
set_dag_run_state_to_failed,
set_dag_run_state_to_success,
)
from airflow.compat.functools import cached_property
from airflow.configuration import AIRFLOW_CONFIG, conf
from airflow.exceptions import AirflowException
from airflow.executors.executor_loader import ExecutorLoader
Expand Down Expand Up @@ -3314,6 +3315,9 @@ def _get_connection_types() -> List[Tuple[str, str]]:
)
for key, value in ProvidersManager().connection_form_widgets.items():
setattr(ConnectionForm, key, value.field)
ConnectionModelView.add_columns.append(key)
ConnectionModelView.edit_columns.append(key)
ConnectionModelView.extra_fields.append(key)


# Used to store a dictionary of field behaviours used to dynamically change available
Expand All @@ -3323,7 +3327,9 @@ def _get_connection_types() -> List[Tuple[str, str]]:
class ConnectionFormWidget(FormWidget):
"""Form widget used to display connection"""

field_behaviours = json.dumps(ProvidersManager().field_behaviours)
@cached_property
def field_behaviours(self):
return json.dumps(ProvidersManager().field_behaviours)


class ConnectionModelView(AirflowModelView):
Expand Down Expand Up @@ -3351,7 +3357,6 @@ class ConnectionModelView(AirflowModelView):
permissions.ACTION_CAN_ACCESS_MENU,
]

extra_fields = list(ProvidersManager().connection_form_widgets.keys())
list_columns = [
'conn_id',
'conn_type',
Expand All @@ -3361,7 +3366,7 @@ class ConnectionModelView(AirflowModelView):
'is_encrypted',
'is_extra_encrypted',
]
add_columns = edit_columns = [
add_columns = [
'conn_id',
'conn_type',
'description',
Expand All @@ -3371,7 +3376,11 @@ class ConnectionModelView(AirflowModelView):
'password',
'port',
'extra',
] + extra_fields
]
edit_columns = add_columns.copy()

# Initialized later by lazy_add_provider_discovered_options_to_connection_form
extra_fields = []

add_form = edit_form = ConnectionForm
add_template = 'airflow/conn_create.html'
Expand Down

0 comments on commit 7a676a1

Please sign in to comment.