Skip to content

Commit 8ca94f8

Browse files
authored
Merge pull request #716 from MetaCell/feature/CH-110
Jupyterhub update to dev
2 parents b570cc6 + 734d410 commit 8ca94f8

52 files changed

Lines changed: 10169 additions & 909 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

applications/jupyterhub/Dockerfile

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,39 @@
11
ARG CLOUDHARNESS_BASE
22
FROM $CLOUDHARNESS_BASE as base
33

4-
FROM jupyterhub/k8s-hub:1.1.3
4+
FROM quay.io/jupyterhub/k8s-hub:3.2.1
55
USER root
66

77

88
COPY --from=base libraries/models/requirements.txt /libraries/models/requirements.txt
9-
RUN pip install -r /libraries/models/requirements.txt
9+
RUN --mount=type=cache,target=/root/.cache python -m pip install --upgrade pip &&\
10+
pip install -r /libraries/models/requirements.txt
1011
COPY --from=base libraries/cloudharness-common/requirements.txt /libraries/cloudharness-common/requirements.txt
11-
RUN pip install -r /libraries/cloudharness-common/requirements.txt
12+
RUN --mount=type=cache,target=/root/.cache python -m pip install --upgrade pip &&\
13+
pip install -r /libraries/cloudharness-common/requirements.txt
1214
COPY --from=base libraries/client/cloudharness_cli/requirements.txt /libraries/client/cloudharness_cli/requirements.txt
13-
RUN pip install -r /libraries/client/cloudharness_cli/requirements.txt
15+
RUN --mount=type=cache,target=/root/.cache python -m pip install --upgrade pip &&\
16+
pip install -r /libraries/client/cloudharness_cli/requirements.txt
1417

1518
COPY --from=base libraries/models /libraries/models
16-
RUN pip install -e /libraries/models
19+
RUN --mount=type=cache,target=/root/.cache python -m pip install --upgrade pip &&\
20+
pip install -e /libraries/models
1721

1822
COPY --from=base libraries/cloudharness-common /libraries/cloudharness-common
1923
COPY --from=base libraries/client/cloudharness_cli /libraries/client/cloudharness_cli
2024

2125
#
2226

23-
RUN pip install -e /libraries/cloudharness-common
24-
RUN pip install -e /libraries/client/cloudharness_cli
27+
RUN --mount=type=cache,target=/root/.cache python -m pip install --upgrade pip &&\
28+
pip install -e /libraries/cloudharness-common
29+
RUN --mount=type=cache,target=/root/.cache python -m pip install --upgrade pip &&\
30+
pip install -e /libraries/client/cloudharness_cli
2531

2632

2733
COPY src src
28-
RUN pip install ./src/harness_jupyter
29-
RUN pip install ./src/chauthenticator
34+
RUN --mount=type=cache,target=/root/.cache python -m pip install --upgrade pip &&\
35+
pip install ./src/harness_jupyter
36+
RUN --mount=type=cache,target=/root/.cache python -m pip install --upgrade pip &&\
37+
pip install ./src/chauthenticator
3038

3139
USER jovyan

applications/jupyterhub/README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,14 @@ To support the pre pulling of task images see (https://github.com/MetaCell/cloud
3131
the template `templates/image-puller/_helpers-daemonset.tpl` has been changed (see line 167 and on)
3232

3333
TODO: remember to implement/revise this code after you have updated/changed the templates of JupyterHub
34+
35+
## How to update
36+
37+
The helm chart is based on the [zero-to-jupyterhub](https://github.com/jupyterhub/zero-to-jupyterhub-k8s/) helm chart.
38+
39+
1. Run update.sh [TAG] # Do not use latest!
40+
2. Restore from the diff files with EDIT: CLOUDHARNESS. Use update.patch as a reference
41+
3. 3. Update Dockerfile to use the same base image you see on values.yaml: hub/image
42+
43+
Customize notebook image: quay.io/jupyterhub/k8s-singleuser-sample:[TAG]
44+

applications/jupyterhub/deploy/resources/hub/jupyterhub_config.py

Lines changed: 94 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,38 @@
1+
# load the config object (satisfies linters)
2+
c = get_config() # noqa
3+
4+
import glob
15
import os
26
import re
37
import sys
4-
import logging
58

6-
from tornado.httpclient import AsyncHTTPClient
7-
from kubernetes import client
89
from jupyterhub.utils import url_path_join
10+
from kubernetes_asyncio import client
11+
from tornado.httpclient import AsyncHTTPClient
12+
13+
#CLOUDHARNESS: EDIT START
14+
import logging
915

1016
try:
1117
from harness_jupyter.jupyterhub import harness_hub
1218
harness_hub() # activates harness hooks on jupyterhub
1319
except Exception as e:
1420
logging.error("could not import harness_jupyter", exc_info=True)
15-
21+
# CLOUDHARNESS: EDIT END
1622

1723
# Make sure that modules placed in the same directory as the jupyterhub config are added to the pythonpath
1824
configuration_directory = os.path.dirname(os.path.realpath(__file__))
1925
sys.path.insert(0, configuration_directory)
2026

2127
from z2jh import (
2228
get_config,
23-
set_config_if_not_none,
2429
get_name,
2530
get_name_env,
2631
get_secret_value,
32+
set_config_if_not_none,
2733
)
2834

2935

30-
print('Base url is', c.JupyterHub.get('base_url', '/'))
31-
32-
# Configure JupyterHub to use the curl backend for making HTTP requests,
33-
# rather than the pure-python implementations. The default one starts
34-
# being too slow to make a large number of requests to the proxy API
35-
# at the rate required.
36-
AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
37-
38-
c.JupyterHub.spawner_class = 'kubespawner.KubeSpawner'
39-
40-
# Connect to a proxy running in a different pod
41-
c.ConfigurableHTTPProxy.api_url = 'http://{}:{}'.format(os.environ['PROXY_API_SERVICE_HOST'], int(os.environ['PROXY_API_SERVICE_PORT']))
42-
c.ConfigurableHTTPProxy.should_start = False
43-
44-
# Do not shut down user pods when hub is restarted
45-
c.JupyterHub.cleanup_servers = False
46-
47-
# Check that the proxy has routes appropriately setup
48-
c.JupyterHub.last_activity_interval = 60
49-
50-
# Don't wait at all before redirecting a spawning user to the progress page
51-
c.JupyterHub.tornado_settings = {
52-
'slow_spawn_timeout': 0,
53-
}
54-
55-
5636
def camelCaseify(s):
5737
"""convert snake_case to camelCase
5838
@@ -173,6 +153,7 @@ def camelCaseify(s):
173153
("events_enabled", "events"),
174154
("extra_labels", None),
175155
("extra_annotations", None),
156+
# ("allow_privilege_escalation", None), # Managed manually below
176157
("uid", None),
177158
("fs_gid", None),
178159
("service_account", "serviceAccountName"),
@@ -206,10 +187,19 @@ def camelCaseify(s):
206187
if image:
207188
tag = get_config("singleuser.image.tag")
208189
if tag:
209-
image = "{}:{}".format(image, tag)
190+
image = f"{image}:{tag}"
210191

211192
c.KubeSpawner.image = image
212193

194+
# allow_privilege_escalation defaults to False in KubeSpawner 2+. Since its a
195+
# property where None, False, and True all are valid values that users of the
196+
# Helm chart may want to set, we can't use the set_config_if_not_none helper
197+
# function as someone may want to override the default False value to None.
198+
#
199+
c.KubeSpawner.allow_privilege_escalation = get_config(
200+
"singleuser.allowPrivilegeEscalation"
201+
)
202+
213203
# Combine imagePullSecret.create (single), imagePullSecrets (list), and
214204
# singleuser.image.pullSecrets (list).
215205
image_pull_secrets = []
@@ -255,7 +245,7 @@ def camelCaseify(s):
255245
pass
256246
else:
257247
raise ValueError(
258-
"Unrecognized value for matchNodePurpose: %r" % match_node_purpose
248+
f"Unrecognized value for matchNodePurpose: {match_node_purpose}"
259249
)
260250

261251
# Combine the common tolerations for user pods with singleuser tolerations
@@ -271,7 +261,7 @@ def camelCaseify(s):
271261
pvc_name_template = get_config("singleuser.storage.dynamic.pvcNameTemplate")
272262
c.KubeSpawner.pvc_name_template = pvc_name_template
273263
volume_name_template = get_config("singleuser.storage.dynamic.volumeNameTemplate")
274-
c.KubeSpawner.storage_pvc_ensure = False
264+
c.KubeSpawner.storage_pvc_ensure = True
275265
set_config_if_not_none(
276266
c.KubeSpawner, "storage_class", "singleuser.storage.dynamic.storageClass"
277267
)
@@ -354,41 +344,62 @@ def camelCaseify(s):
354344
)
355345

356346
c.JupyterHub.services = []
347+
c.JupyterHub.load_roles = []
357348

349+
# jupyterhub-idle-culler's permissions are scoped to what it needs only, see
350+
# https://github.com/jupyterhub/jupyterhub-idle-culler#permissions.
351+
#
358352
if get_config("cull.enabled", False):
353+
jupyterhub_idle_culler_role = {
354+
"name": "jupyterhub-idle-culler",
355+
"scopes": [
356+
"list:users",
357+
"read:users:activity",
358+
"read:servers",
359+
"delete:servers",
360+
# "admin:users", # dynamically added if --cull-users is passed
361+
],
362+
# assign the role to a jupyterhub service, so it gains these permissions
363+
"services": ["jupyterhub-idle-culler"],
364+
}
365+
359366
cull_cmd = ["python3", "-m", "jupyterhub_idle_culler"]
360367
base_url = c.JupyterHub.get("base_url", "/")
361368
cull_cmd.append("--url=http://localhost:8081" + url_path_join(base_url, "hub/api"))
362369

363370
cull_timeout = get_config("cull.timeout")
364371
if cull_timeout:
365-
cull_cmd.append("--timeout=%s" % cull_timeout)
372+
cull_cmd.append(f"--timeout={cull_timeout}")
366373

367374
cull_every = get_config("cull.every")
368375
if cull_every:
369-
cull_cmd.append("--cull-every=%s" % cull_every)
376+
cull_cmd.append(f"--cull-every={cull_every}")
370377

371378
cull_concurrency = get_config("cull.concurrency")
372379
if cull_concurrency:
373-
cull_cmd.append("--concurrency=%s" % cull_concurrency)
380+
cull_cmd.append(f"--concurrency={cull_concurrency}")
374381

375382
if get_config("cull.users"):
376383
cull_cmd.append("--cull-users")
384+
jupyterhub_idle_culler_role["scopes"].append("admin:users")
385+
386+
if not get_config("cull.adminUsers"):
387+
cull_cmd.append("--cull-admin-users=false")
377388

378389
if get_config("cull.removeNamedServers"):
379390
cull_cmd.append("--remove-named-servers")
380391

381392
cull_max_age = get_config("cull.maxAge")
382393
if cull_max_age:
383-
cull_cmd.append("--max-age=%s" % cull_max_age)
394+
cull_cmd.append(f"--max-age={cull_max_age}")
384395

385396
c.JupyterHub.services.append(
386397
{
387-
"name": "cull-idle",
388-
"admin": True,
398+
"name": "jupyterhub-idle-culler",
389399
"command": cull_cmd,
390400
}
391401
)
402+
c.JupyterHub.load_roles.append(jupyterhub_idle_culler_role)
392403

393404
for key, service in get_config("hub.services", {}).items():
394405
# c.JupyterHub.services is a list of dicts, but
@@ -402,33 +413,52 @@ def camelCaseify(s):
402413

403414
c.JupyterHub.services.append(service)
404415

416+
for key, role in get_config("hub.loadRoles", {}).items():
417+
# c.JupyterHub.load_roles is a list of dicts, but
418+
# hub.loadRoles is a dict of dicts to make the config mergable
419+
role.setdefault("name", key)
420+
421+
c.JupyterHub.load_roles.append(role)
422+
423+
# respect explicit null command (distinct from unspecified)
424+
# this avoids relying on KubeSpawner.cmd's default being None
425+
_unspecified = object()
426+
specified_cmd = get_config("singleuser.cmd", _unspecified)
427+
if specified_cmd is not _unspecified:
428+
c.Spawner.cmd = specified_cmd
405429

406-
set_config_if_not_none(c.Spawner, "cmd", "singleuser.cmd")
407430
set_config_if_not_none(c.Spawner, "default_url", "singleuser.defaultUrl")
408431

409-
cloud_metadata = get_config("singleuser.cloudMetadata", {})
432+
cloud_metadata = get_config("singleuser.cloudMetadata")
410433

411434
if cloud_metadata.get("blockWithIptables") == True:
412435
# Use iptables to block access to cloud metadata by default
413436
network_tools_image_name = get_config("singleuser.networkTools.image.name")
414437
network_tools_image_tag = get_config("singleuser.networkTools.image.tag")
438+
network_tools_resources = get_config("singleuser.networkTools.resources")
439+
ip = cloud_metadata["ip"]
415440
ip_block_container = client.V1Container(
416441
name="block-cloud-metadata",
417442
image=f"{network_tools_image_name}:{network_tools_image_tag}",
418443
command=[
419444
"iptables",
420-
"-A",
445+
"--append",
421446
"OUTPUT",
422-
"-d",
423-
cloud_metadata.get("ip", "169.254.169.254"),
424-
"-j",
447+
"--protocol",
448+
"tcp",
449+
"--destination",
450+
ip,
451+
"--destination-port",
452+
"80",
453+
"--jump",
425454
"DROP",
426455
],
427456
security_context=client.V1SecurityContext(
428457
privileged=True,
429458
run_as_user=0,
430459
capabilities=client.V1Capabilities(add=["NET_ADMIN"]),
431460
),
461+
resources=network_tools_resources,
432462
)
433463

434464
c.KubeSpawner.init_containers.append(ip_block_container)
@@ -438,17 +468,6 @@ def camelCaseify(s):
438468
c.JupyterHub.log_level = "DEBUG"
439469
c.Spawner.debug = True
440470

441-
# load /usr/local/etc/jupyterhub/jupyterhub_config.d config files
442-
config_dir = "/usr/local/etc/jupyterhub/jupyterhub_config.d"
443-
if os.path.isdir(config_dir):
444-
for file_path in sorted(glob.glob(f"{config_dir}/*.py")):
445-
file_name = os.path.basename(file_path)
446-
print(f"Loading {config_dir} config: {file_name}")
447-
with open(file_path) as f:
448-
file_content = f.read()
449-
# compiling makes debugging easier: https://stackoverflow.com/a/437857
450-
exec(compile(source=file_content, filename=file_name, mode="exec"))
451-
452471
# load potentially seeded secrets
453472
#
454473
# NOTE: ConfigurableHTTPProxy.auth_token is set through an environment variable
@@ -471,11 +490,24 @@ def camelCaseify(s):
471490
cfg.pop("keys", None)
472491
c[app].update(cfg)
473492

493+
494+
# load /usr/local/etc/jupyterhub/jupyterhub_config.d config files
495+
config_dir = "/usr/local/etc/jupyterhub/jupyterhub_config.d"
496+
if os.path.isdir(config_dir):
497+
for file_path in sorted(glob.glob(f"{config_dir}/*.py")):
498+
file_name = os.path.basename(file_path)
499+
print(f"Loading {config_dir} config: {file_name}")
500+
with open(file_path) as f:
501+
file_content = f.read()
502+
# compiling makes debugging easier: https://stackoverflow.com/a/437857
503+
exec(compile(source=file_content, filename=file_name, mode="exec"))
504+
474505
# execute hub.extraConfig entries
475506
for key, config_py in sorted(get_config("hub.extraConfig", {}).items()):
476-
print("Loading extra config: %s" % key)
507+
print(f"Loading extra config: {key}")
477508
exec(config_py)
478509

510+
# CLOUDHARNESS: EDIT START
479511
# Allow switching authenticators easily
480512
auth_type = get_config('hub.config.JupyterHub.authenticator_class')
481513
email_domain = 'local'
@@ -504,6 +536,7 @@ def camelCaseify(s):
504536
c.Authenticator.auto_login = True
505537
c.OAuthenticator.client_id = client_id
506538
c.OAuthenticator.client_secret = client_secret
539+
c.OAuthenticator.allow_all = True
507540

508541
c.GenericOAuthenticator.login_service = "CH"
509542
c.GenericOAuthenticator.username_key = "email"
@@ -525,4 +558,6 @@ def camelCaseify(s):
525558
c.apps = get_config('apps')
526559
c.registry = get_config('registry')
527560
c.domain = get_config('root.domain')
528-
c.namespace = get_config('root.namespace')
561+
c.namespace = get_config('root.namespace')
562+
# CLOUDHARNESS: EDIT END
563+

0 commit comments

Comments
 (0)