Skip to content

Commit 34f8ec6

Browse files
committed
CH-110 jupyterhub update wip
1 parent 439a9da commit 34f8ec6

42 files changed

Lines changed: 4013 additions & 617 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

applications/jupyterhub/deploy/resources/hub/jupyterhub_config.py

Lines changed: 91 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,17 @@
1+
# load the config object (satisfies linters)
2+
c = get_config() # noqa
3+
4+
import glob
15
import os
26
import re
37
import sys
4-
import logging
58

9+
from jupyterhub.utils import url_path_join
10+
from kubernetes_asyncio import client
611
from tornado.httpclient import AsyncHTTPClient
12+
13+
#CLOUDHARNESS: EDIT START
14+
import logging
715
from kubernetes import client
816
from jupyterhub.utils import url_path_join
917

@@ -12,47 +20,21 @@
1220
harness_hub() # activates harness hooks on jupyterhub
1321
except Exception as e:
1422
logging.error("could not import harness_jupyter", exc_info=True)
15-
23+
# CLOUDHARNESS: EDIT END
1624

1725
# Make sure that modules placed in the same directory as the jupyterhub config are added to the pythonpath
1826
configuration_directory = os.path.dirname(os.path.realpath(__file__))
1927
sys.path.insert(0, configuration_directory)
2028

2129
from z2jh import (
2230
get_config,
23-
set_config_if_not_none,
2431
get_name,
2532
get_name_env,
2633
get_secret_value,
34+
set_config_if_not_none,
2735
)
2836

2937

30-
print('Base url is', c.JupyterHub.get('base_url', '/'))
31-
32-
# Configure JupyterHub to use the curl backend for making HTTP requests,
33-
# rather than the pure-python implementations. The default one starts
34-
# being too slow to make a large number of requests to the proxy API
35-
# at the rate required.
36-
AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
37-
38-
c.JupyterHub.spawner_class = 'kubespawner.KubeSpawner'
39-
40-
# Connect to a proxy running in a different pod
41-
c.ConfigurableHTTPProxy.api_url = 'http://{}:{}'.format(os.environ['PROXY_API_SERVICE_HOST'], int(os.environ['PROXY_API_SERVICE_PORT']))
42-
c.ConfigurableHTTPProxy.should_start = False
43-
44-
# Do not shut down user pods when hub is restarted
45-
c.JupyterHub.cleanup_servers = False
46-
47-
# Check that the proxy has routes appropriately setup
48-
c.JupyterHub.last_activity_interval = 60
49-
50-
# Don't wait at all before redirecting a spawning user to the progress page
51-
c.JupyterHub.tornado_settings = {
52-
'slow_spawn_timeout': 0,
53-
}
54-
55-
5638
def camelCaseify(s):
5739
"""convert snake_case to camelCase
5840
@@ -173,6 +155,7 @@ def camelCaseify(s):
173155
("events_enabled", "events"),
174156
("extra_labels", None),
175157
("extra_annotations", None),
158+
# ("allow_privilege_escalation", None), # Managed manually below
176159
("uid", None),
177160
("fs_gid", None),
178161
("service_account", "serviceAccountName"),
@@ -206,10 +189,19 @@ def camelCaseify(s):
206189
if image:
207190
tag = get_config("singleuser.image.tag")
208191
if tag:
209-
image = "{}:{}".format(image, tag)
192+
image = f"{image}:{tag}"
210193

211194
c.KubeSpawner.image = image
212195

196+
# allow_privilege_escalation defaults to False in KubeSpawner 2+. Since its a
197+
# property where None, False, and True all are valid values that users of the
198+
# Helm chart may want to set, we can't use the set_config_if_not_none helper
199+
# function as someone may want to override the default False value to None.
200+
#
201+
c.KubeSpawner.allow_privilege_escalation = get_config(
202+
"singleuser.allowPrivilegeEscalation"
203+
)
204+
213205
# Combine imagePullSecret.create (single), imagePullSecrets (list), and
214206
# singleuser.image.pullSecrets (list).
215207
image_pull_secrets = []
@@ -255,7 +247,7 @@ def camelCaseify(s):
255247
pass
256248
else:
257249
raise ValueError(
258-
"Unrecognized value for matchNodePurpose: %r" % match_node_purpose
250+
f"Unrecognized value for matchNodePurpose: {match_node_purpose}"
259251
)
260252

261253
# Combine the common tolerations for user pods with singleuser tolerations
@@ -271,7 +263,7 @@ def camelCaseify(s):
271263
pvc_name_template = get_config("singleuser.storage.dynamic.pvcNameTemplate")
272264
c.KubeSpawner.pvc_name_template = pvc_name_template
273265
volume_name_template = get_config("singleuser.storage.dynamic.volumeNameTemplate")
274-
c.KubeSpawner.storage_pvc_ensure = False
266+
c.KubeSpawner.storage_pvc_ensure = True
275267
set_config_if_not_none(
276268
c.KubeSpawner, "storage_class", "singleuser.storage.dynamic.storageClass"
277269
)
@@ -354,41 +346,62 @@ def camelCaseify(s):
354346
)
355347

356348
c.JupyterHub.services = []
349+
c.JupyterHub.load_roles = []
357350

351+
# jupyterhub-idle-culler's permissions are scoped to what it needs only, see
352+
# https://github.com/jupyterhub/jupyterhub-idle-culler#permissions.
353+
#
358354
if get_config("cull.enabled", False):
355+
jupyterhub_idle_culler_role = {
356+
"name": "jupyterhub-idle-culler",
357+
"scopes": [
358+
"list:users",
359+
"read:users:activity",
360+
"read:servers",
361+
"delete:servers",
362+
# "admin:users", # dynamically added if --cull-users is passed
363+
],
364+
# assign the role to a jupyterhub service, so it gains these permissions
365+
"services": ["jupyterhub-idle-culler"],
366+
}
367+
359368
cull_cmd = ["python3", "-m", "jupyterhub_idle_culler"]
360369
base_url = c.JupyterHub.get("base_url", "/")
361370
cull_cmd.append("--url=http://localhost:8081" + url_path_join(base_url, "hub/api"))
362371

363372
cull_timeout = get_config("cull.timeout")
364373
if cull_timeout:
365-
cull_cmd.append("--timeout=%s" % cull_timeout)
374+
cull_cmd.append(f"--timeout={cull_timeout}")
366375

367376
cull_every = get_config("cull.every")
368377
if cull_every:
369-
cull_cmd.append("--cull-every=%s" % cull_every)
378+
cull_cmd.append(f"--cull-every={cull_every}")
370379

371380
cull_concurrency = get_config("cull.concurrency")
372381
if cull_concurrency:
373-
cull_cmd.append("--concurrency=%s" % cull_concurrency)
382+
cull_cmd.append(f"--concurrency={cull_concurrency}")
374383

375384
if get_config("cull.users"):
376385
cull_cmd.append("--cull-users")
386+
jupyterhub_idle_culler_role["scopes"].append("admin:users")
387+
388+
if not get_config("cull.adminUsers"):
389+
cull_cmd.append("--cull-admin-users=false")
377390

378391
if get_config("cull.removeNamedServers"):
379392
cull_cmd.append("--remove-named-servers")
380393

381394
cull_max_age = get_config("cull.maxAge")
382395
if cull_max_age:
383-
cull_cmd.append("--max-age=%s" % cull_max_age)
396+
cull_cmd.append(f"--max-age={cull_max_age}")
384397

385398
c.JupyterHub.services.append(
386399
{
387-
"name": "cull-idle",
388-
"admin": True,
400+
"name": "jupyterhub-idle-culler",
389401
"command": cull_cmd,
390402
}
391403
)
404+
c.JupyterHub.load_roles.append(jupyterhub_idle_culler_role)
392405

393406
for key, service in get_config("hub.services", {}).items():
394407
# c.JupyterHub.services is a list of dicts, but
@@ -402,33 +415,52 @@ def camelCaseify(s):
402415

403416
c.JupyterHub.services.append(service)
404417

418+
for key, role in get_config("hub.loadRoles", {}).items():
419+
# c.JupyterHub.load_roles is a list of dicts, but
420+
# hub.loadRoles is a dict of dicts to make the config mergable
421+
role.setdefault("name", key)
422+
423+
c.JupyterHub.load_roles.append(role)
424+
425+
# respect explicit null command (distinct from unspecified)
426+
# this avoids relying on KubeSpawner.cmd's default being None
427+
_unspecified = object()
428+
specified_cmd = get_config("singleuser.cmd", _unspecified)
429+
if specified_cmd is not _unspecified:
430+
c.Spawner.cmd = specified_cmd
405431

406-
set_config_if_not_none(c.Spawner, "cmd", "singleuser.cmd")
407432
set_config_if_not_none(c.Spawner, "default_url", "singleuser.defaultUrl")
408433

409-
cloud_metadata = get_config("singleuser.cloudMetadata", {})
434+
cloud_metadata = get_config("singleuser.cloudMetadata")
410435

411436
if cloud_metadata.get("blockWithIptables") == True:
412437
# Use iptables to block access to cloud metadata by default
413438
network_tools_image_name = get_config("singleuser.networkTools.image.name")
414439
network_tools_image_tag = get_config("singleuser.networkTools.image.tag")
440+
network_tools_resources = get_config("singleuser.networkTools.resources")
441+
ip = cloud_metadata["ip"]
415442
ip_block_container = client.V1Container(
416443
name="block-cloud-metadata",
417444
image=f"{network_tools_image_name}:{network_tools_image_tag}",
418445
command=[
419446
"iptables",
420-
"-A",
447+
"--append",
421448
"OUTPUT",
422-
"-d",
423-
cloud_metadata.get("ip", "169.254.169.254"),
424-
"-j",
449+
"--protocol",
450+
"tcp",
451+
"--destination",
452+
ip,
453+
"--destination-port",
454+
"80",
455+
"--jump",
425456
"DROP",
426457
],
427458
security_context=client.V1SecurityContext(
428459
privileged=True,
429460
run_as_user=0,
430461
capabilities=client.V1Capabilities(add=["NET_ADMIN"]),
431462
),
463+
resources=network_tools_resources,
432464
)
433465

434466
c.KubeSpawner.init_containers.append(ip_block_container)
@@ -438,17 +470,6 @@ def camelCaseify(s):
438470
c.JupyterHub.log_level = "DEBUG"
439471
c.Spawner.debug = True
440472

441-
# load /usr/local/etc/jupyterhub/jupyterhub_config.d config files
442-
config_dir = "/usr/local/etc/jupyterhub/jupyterhub_config.d"
443-
if os.path.isdir(config_dir):
444-
for file_path in sorted(glob.glob(f"{config_dir}/*.py")):
445-
file_name = os.path.basename(file_path)
446-
print(f"Loading {config_dir} config: {file_name}")
447-
with open(file_path) as f:
448-
file_content = f.read()
449-
# compiling makes debugging easier: https://stackoverflow.com/a/437857
450-
exec(compile(source=file_content, filename=file_name, mode="exec"))
451-
452473
# load potentially seeded secrets
453474
#
454475
# NOTE: ConfigurableHTTPProxy.auth_token is set through an environment variable
@@ -471,11 +492,23 @@ def camelCaseify(s):
471492
cfg.pop("keys", None)
472493
c[app].update(cfg)
473494

495+
# load /usr/local/etc/jupyterhub/jupyterhub_config.d config files
496+
config_dir = "/usr/local/etc/jupyterhub/jupyterhub_config.d"
497+
if os.path.isdir(config_dir):
498+
for file_path in sorted(glob.glob(f"{config_dir}/*.py")):
499+
file_name = os.path.basename(file_path)
500+
print(f"Loading {config_dir} config: {file_name}")
501+
with open(file_path) as f:
502+
file_content = f.read()
503+
# compiling makes debugging easier: https://stackoverflow.com/a/437857
504+
exec(compile(source=file_content, filename=file_name, mode="exec"))
505+
474506
# execute hub.extraConfig entries
475507
for key, config_py in sorted(get_config("hub.extraConfig", {}).items()):
476-
print("Loading extra config: %s" % key)
508+
print(f"Loading extra config: {key}")
477509
exec(config_py)
478510

511+
# CLOUDHARNESS: EDIT START
479512
# Allow switching authenticators easily
480513
auth_type = get_config('hub.config.JupyterHub.authenticator_class')
481514
email_domain = 'local'
@@ -525,4 +558,5 @@ def camelCaseify(s):
525558
c.apps = get_config('apps')
526559
c.registry = get_config('registry')
527560
c.domain = get_config('root.domain')
528-
c.namespace = get_config('root.namespace')
561+
c.namespace = get_config('root.namespace')
562+
# CLOUDHARNESS: EDIT END

applications/jupyterhub/deploy/resources/hub/z2jh.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,15 @@
33
44
Methods here can be imported by extraConfig in values.yaml
55
"""
6-
from collections import Mapping
7-
from functools import lru_cache
86
import os
9-
import re
7+
from collections.abc import Mapping
8+
from functools import lru_cache
109

1110
import yaml
1211

12+
1313
# memoize so we only load config once
14-
@lru_cache()
14+
@lru_cache
1515
def _load_config():
1616
"""Load the Helm chart configuration used to render the Helm templates of
1717
the chart from a mounted k8s Secret, and merge in values from an optionally
@@ -27,18 +27,19 @@ def _load_config():
2727
cfg = _merge_dictionaries(cfg, values)
2828
else:
2929
print(f"No config at {path}")
30+
# EDIT: CLOUDHARNESS START
3031
path = f"/opt/cloudharness/resources/allvalues.yaml"
3132
if os.path.exists(path):
3233
print("Loading global CloudHarness config at", path)
3334
with open(path) as f:
3435
values = yaml.safe_load(f)
3536
cfg = _merge_dictionaries(cfg, values)
3637
cfg['root'] = values
37-
38+
# EDIT: CLOUDHARNESS END
3839
return cfg
3940

4041

41-
@lru_cache()
42+
@lru_cache
4243
def _get_config_value(key):
4344
"""Load value from the k8s ConfigMap given a key."""
4445

@@ -50,7 +51,7 @@ def _get_config_value(key):
5051
raise Exception(f"{path} not found!")
5152

5253

53-
@lru_cache()
54+
@lru_cache
5455
def get_secret_value(key, default="never-explicitly-set"):
5556
"""Load value from the user managed k8s Secret or the default k8s Secret
5657
given a key."""
@@ -117,7 +118,7 @@ def get_config(key, default=None):
117118
else:
118119
value = value[level]
119120

120-
121+
# EDIT: CLOUDHARNESS START
121122
if value and isinstance(value, str):
122123
replace_var = re.search("{{.*?}}", value)
123124
if replace_var:
@@ -128,6 +129,7 @@ def get_config(key, default=None):
128129
if repl:
129130
print("replace", variable, "in", value, ":", repl)
130131
value = re.sub("{{.*?}}", repl, value)
132+
# EDIT: CLOUDHARNESS END
131133
return value
132134

133135

@@ -137,6 +139,5 @@ def set_config_if_not_none(cparent, name, key):
137139
configuration item if not None
138140
"""
139141
data = get_config(key)
140-
141142
if data is not None:
142-
setattr(cparent, name, data)
143+
setattr(cparent, name, data)

0 commit comments

Comments
 (0)