From 9991956986e22b7e8060dcb54f0942859628a336 Mon Sep 17 00:00:00 2001 From: martinkilbinger Date: Sun, 17 May 2026 19:39:08 +0200 Subject: [PATCH 01/24] testing/simulate: added Pujol denoising estimator of shear bias --- src/shapepipe/testing/simulate.py | 95 +++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/src/shapepipe/testing/simulate.py b/src/shapepipe/testing/simulate.py index 1b3d65ca5..e888ba8b3 100644 --- a/src/shapepipe/testing/simulate.py +++ b/src/shapepipe/testing/simulate.py @@ -93,3 +93,98 @@ def make_data( jacob_lists.append(wcs.jacobian()) return gals, psfs, psfs_sigmas, weights, flags, jacob_lists + + +def make_data_pujol( + rng, + shear_list, + noise=1e-5, + n_epochs=1, + share_shift=False, + gal_hlr=0.3, + gal_flux=1000.0, + psf_fwhm=0.55, + pixel_scale=0.1857, + img_size=201, +): + """Simulate galaxies at multiple shear values sharing the same noise per epoch. + + Implements the Pujol (2018) denoising approach: all shear variants receive + the identical noise and sub-pixel shift each epoch, so noise contributions + to the shear response R and to the m/c estimator cancel exactly. + + Parameters + ---------- + rng : numpy.random.RandomState + shear_list : list of tuple of float + List of (g1, g2) values to simulate simultaneously. + noise : float, optional + Per-pixel noise sigma. + n_epochs : int, optional + share_shift : bool, optional + If True all epochs share the same sub-pixel shift; the shift is + always shared across shear variants regardless of this flag. + gal_hlr, gal_flux, psf_fwhm, pixel_scale, img_size : float / int, optional + Same meaning as in make_data. + + Returns + ------- + list of tuple + One ``(gals, psfs, psfs_sigmas, weights, flags, jacob_lists)`` per + entry in ``shear_list``, all sharing identical per-epoch noise. + """ + psf_noise_sigma = 1.0e-6 + scale = pixel_scale + wcs = galsim.PixelScale(scale) + n_shears = len(shear_list) + + # output accumulators + all_gals = [[] for _ in range(n_shears)] + all_psfs = [[] for _ in range(n_shears)] + all_psfs_sigmas = [[] for _ in range(n_shears)] + all_weights = [[] for _ in range(n_shears)] + all_flags = [[] for _ in range(n_shears)] + all_jacobs = [[] for _ in range(n_shears)] + + if share_shift: + dy, dx = rng.uniform(low=-scale / 2, high=scale / 2, size=2) + + for epoch in range(n_epochs): + if not share_shift: + dy, dx = rng.uniform(low=-scale / 2, high=scale / 2, size=2) + + # One noise draw shared by all shear variants this epoch + noise_img = rng.normal(scale=noise, size=(img_size, img_size)) + psf_noise_img = rng.normal(scale=psf_noise_sigma, size=(img_size, img_size)) + + psf = galsim.Moffat(beta=2.5, fwhm=psf_fwhm) + psf_im_ = psf.drawImage(nx=img_size, ny=img_size, wcs=wcs) + psf_sigma = galsim.hsm.FindAdaptiveMom(psf_im_).moments_sigma + psf_im = psf_im_.array.astype(np.float64) + psf_noise_img + + jacob = wcs.jacobian() + weight = np.full((img_size, img_size), 1.0 / noise ** 2) + flag = np.zeros((img_size, img_size)) + + for j, shear in enumerate(shear_list): + obj = galsim.Convolve( + psf, + galsim.Exponential(half_light_radius=gal_hlr, flux=gal_flux).shear( + g1=shear[0], g2=shear[1] + ), + ).shift(dx, dy) + im = obj.drawImage(nx=img_size, ny=img_size, wcs=wcs).array.astype(np.float64) + im += noise_img # same noise for every shear variant + + all_gals[j].append(im) + all_psfs[j].append(psf_im) + all_psfs_sigmas[j].append(psf_sigma) + all_weights[j].append(weight.copy()) + all_flags[j].append(flag.copy()) + all_jacobs[j].append(jacob) + + return [ + (all_gals[j], all_psfs[j], all_psfs_sigmas[j], + all_weights[j], all_flags[j], all_jacobs[j]) + for j in range(n_shears) + ] From 46243dbfbbad0cd274b7c13af83335de0765c661 Mon Sep 17 00:00:00 2001 From: martinkilbinger Date: Mon, 1 Jun 2026 17:35:20 +0200 Subject: [PATCH 02/24] Running SP v2.0 on image simulations, up to -j 8 --- auxdir/CFIS/im_sims_202606/numbers.txt | 200 ++++++++++++++++++ auxdir/CFIS/im_sims_202606/numbers_run.txt | 200 ++++++++++++++++++ docs/source/pipeline_v2.0.md | 6 +- .../config_exp_Gie_symlink.ini | 90 ++++++++ example/cfis_image_sims/config_tile_Fe.ini | 68 ++++++ .../config_tile_Git_symlink.ini | 87 ++++++++ example/cfis_simu/readme.txt | 3 +- scripts/sh/init_run_v2.0.sh | 56 ++++- scripts/sh/job_sp_canfar_v2.0.bash | 18 +- scripts/sh/run_job_sp_canfar_v2.0.bash | 75 ++++++- 10 files changed, 778 insertions(+), 25 deletions(-) create mode 100644 auxdir/CFIS/im_sims_202606/numbers.txt create mode 100644 auxdir/CFIS/im_sims_202606/numbers_run.txt create mode 100644 example/cfis_image_sims/config_exp_Gie_symlink.ini create mode 100644 example/cfis_image_sims/config_tile_Fe.ini create mode 100644 example/cfis_image_sims/config_tile_Git_symlink.ini diff --git a/auxdir/CFIS/im_sims_202606/numbers.txt b/auxdir/CFIS/im_sims_202606/numbers.txt new file mode 100644 index 000000000..f5964b285 --- /dev/null +++ b/auxdir/CFIS/im_sims_202606/numbers.txt @@ -0,0 +1,200 @@ +233.293 +235.293 +236.293 +237.292 +237.293 +238.292 +238.293 +239.291 +239.292 +239.293 +240.291 +240.292 +240.293 +241.293 +242.290 +242.291 +242.292 +242.293 +243.290 +243.291 +243.292 +243.293 +244.293 +245.289 +245.290 +245.291 +245.292 +246.289 +246.290 +246.291 +247.289 +247.290 +247.291 +247.292 +247.293 +248.288 +248.289 +248.290 +248.291 +248.292 +249.288 +249.289 +249.290 +249.291 +249.292 +250.289 +250.290 +250.291 +250.292 +251.287 +251.288 +251.289 +251.290 +251.291 +252.287 +252.288 +252.289 +252.290 +252.291 +253.287 +253.288 +253.291 +254.286 +254.287 +254.288 +254.290 +255.286 +255.288 +255.289 +255.290 +256.286 +256.287 +256.289 +257.285 +257.286 +257.287 +257.288 +257.289 +258.287 +258.288 +258.289 +259.285 +259.286 +259.287 +259.288 +259.289 +260.284 +260.285 +260.286 +260.287 +260.288 +260.289 +261.284 +261.285 +261.286 +261.287 +261.288 +262.285 +262.286 +262.287 +262.288 +263.283 +263.284 +263.285 +263.286 +264.283 +264.284 +264.285 +264.286 +264.287 +265.283 +265.284 +265.285 +265.286 +265.287 +266.282 +266.283 +266.285 +266.286 +266.287 +267.283 +267.284 +267.285 +267.286 +268.282 +268.283 +268.284 +268.285 +269.281 +269.282 +269.283 +269.284 +269.286 +270.281 +270.282 +270.283 +271.281 +271.282 +271.283 +271.284 +271.285 +272.281 +272.282 +272.283 +272.284 +272.285 +273.280 +273.282 +273.283 +273.284 +274.279 +274.280 +274.282 +274.283 +274.284 +275.279 +275.280 +275.281 +275.282 +275.283 +275.284 +276.279 +276.280 +276.281 +276.282 +276.283 +277.279 +277.280 +277.281 +277.282 +278.279 +278.280 +278.281 +278.282 +278.283 +279.280 +279.281 +279.282 +280.279 +280.280 +280.281 +280.282 +281.279 +281.280 +281.281 +281.282 +282.280 +282.281 +283.279 +283.280 +284.279 +284.281 +285.279 +285.280 +286.279 +286.280 +287.279 +287.280 +288.279 +289.279 diff --git a/auxdir/CFIS/im_sims_202606/numbers_run.txt b/auxdir/CFIS/im_sims_202606/numbers_run.txt new file mode 100644 index 000000000..634931641 --- /dev/null +++ b/auxdir/CFIS/im_sims_202606/numbers_run.txt @@ -0,0 +1,200 @@ +233-293 +235-293 +236-293 +237-292 +237-293 +238-292 +238-293 +239-291 +239-292 +239-293 +240-291 +240-292 +240-293 +241-293 +242-290 +242-291 +242-292 +242-293 +243-290 +243-291 +243-292 +243-293 +244-293 +245-289 +245-290 +245-291 +245-292 +246-289 +246-290 +246-291 +247-289 +247-290 +247-291 +247-292 +247-293 +248-288 +248-289 +248-290 +248-291 +248-292 +249-288 +249-289 +249-290 +249-291 +249-292 +250-289 +250-290 +250-291 +250-292 +251-287 +251-288 +251-289 +251-290 +251-291 +252-287 +252-288 +252-289 +252-290 +252-291 +253-287 +253-288 +253-291 +254-286 +254-287 +254-288 +254-290 +255-286 +255-288 +255-289 +255-290 +256-286 +256-287 +256-289 +257-285 +257-286 +257-287 +257-288 +257-289 +258-287 +258-288 +258-289 +259-285 +259-286 +259-287 +259-288 +259-289 +260-284 +260-285 +260-286 +260-287 +260-288 +260-289 +261-284 +261-285 +261-286 +261-287 +261-288 +262-285 +262-286 +262-287 +262-288 +263-283 +263-284 +263-285 +263-286 +264-283 +264-284 +264-285 +264-286 +264-287 +265-283 +265-284 +265-285 +265-286 +265-287 +266-282 +266-283 +266-285 +266-286 +266-287 +267-283 +267-284 +267-285 +267-286 +268-282 +268-283 +268-284 +268-285 +269-281 +269-282 +269-283 +269-284 +269-286 +270-281 +270-282 +270-283 +271-281 +271-282 +271-283 +271-284 +271-285 +272-281 +272-282 +272-283 +272-284 +272-285 +273-280 +273-282 +273-283 +273-284 +274-279 +274-280 +274-282 +274-283 +274-284 +275-279 +275-280 +275-281 +275-282 +275-283 +275-284 +276-279 +276-280 +276-281 +276-282 +276-283 +277-279 +277-280 +277-281 +277-282 +278-279 +278-280 +278-281 +278-282 +278-283 +279-280 +279-281 +279-282 +280-279 +280-280 +280-281 +280-282 +281-279 +281-280 +281-281 +281-282 +282-280 +282-281 +283-279 +283-280 +284-279 +284-281 +285-279 +285-280 +286-279 +286-280 +287-279 +287-280 +288-279 +289-279 diff --git a/docs/source/pipeline_v2.0.md b/docs/source/pipeline_v2.0.md index bdcd07d08..f089f7e87 100644 --- a/docs/source/pipeline_v2.0.md +++ b/docs/source/pipeline_v2.0.md @@ -5,7 +5,7 @@ Documentation to create ShapePipe output products for catalogues v2.x. ## Initialise directory structure ```bash -init_run_v2.0.sh +init_run_v2.0.sh [-t data|image_sims] ``` sets up the directory structure. This will be @@ -19,8 +19,8 @@ v2.0/ │ ├── 21/ │ │ ├── 21163916 │ │ └── ... -├── cfis -> /arc/home/kilbinger/shapepipe/example/cfis -├── tile_numbers -> /arc/home/kilbinger/shapepipe/auxdir/CFIS/tiles_202604/tiles_r.txt +├── cfis -> +├── tile_numbers -> └── debug/ diff --git a/example/cfis_image_sims/config_exp_Gie_symlink.ini b/example/cfis_image_sims/config_exp_Gie_symlink.ini new file mode 100644 index 000000000..a6a3aed92 --- /dev/null +++ b/example/cfis_image_sims/config_exp_Gie_symlink.ini @@ -0,0 +1,90 @@ +# ShapePipe configuration file for: get images + + +## Default ShapePipe options +[DEFAULT] + +# verbose mode (optional), default: True, print messages on terminal +VERBOSE = False + +# Name of run (optional) default: shapepipe_run +RUN_NAME = run_sp_exp_Gie + +# Add date and time to RUN_NAME, optional, default: False +RUN_DATETIME = True + + +## ShapePipe execution options +[EXECUTION] + +# Module name, single string or comma-separated list of valid module runner names +MODULE = get_images_runner + +# Parallel processing mode, SMP or MPI +MODE = SMP + + +## ShapePipe file handling options +[FILE] + +# Log file master name, optional, default: shapepipe +LOG_NAME = log_sp + +# Runner log file name, optional, default: shapepipe_runs +RUN_LOG_NAME = log_run_sp + +# Input directory, containing input files, single string or list of names +INPUT_DIR = $SP_RUN + +# Output directory +OUTPUT_DIR = $SP_RUN/output + + +## ShapePipe job handling options +[JOB] + +# Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial +SMP_BATCH_SIZE = 1 + +# Timeout value (optional), default is None, i.e. no timeout limit applied +TIMEOUT = 96:00:00 + + +## Module options + +# Get exposures +[GET_IMAGES_RUNNER] + +INPUT_MODULE = last:find_exposures_runner + +FILE_PATTERN = exp_numbers + +FILE_EXT = .txt + +# NUMBERING_SCHEME (optional) string with numbering pattern for input files +NUMBERING_SCHEME = -000-000 + + +# Paths + +# Output path (optional, default is [FILE]:OUTPUT_DIR +# OUTPUT_PATH = input_images + +# Input path where original images are stored. Can be local path or vos url. +# Single string or list of strings +INPUT_PATH = $SP_DIR/input_exp/,$SP_DIR/input_exp/,$SP_DIR/input_exp/ + +# Input file pattern including tile number as dummy template +INPUT_FILE_PATTERN = simu_image-0000000,simu_weight-0000000,simu_flag-0000000 + +# Input file extensions +INPUT_FILE_EXT = .fits, .fits,.fits + +# Input numbering scheme, python regexp +INPUT_NUMBERING = \d{7} + +# Output file pattern without number +OUTPUT_FILE_PATTERN = image-,weight-,flag- + +# Method to retrieve images, one in 'vos', 'symlink' +RETRIEVE = symlink diff --git a/example/cfis_image_sims/config_tile_Fe.ini b/example/cfis_image_sims/config_tile_Fe.ini new file mode 100644 index 000000000..9c3cb8cd5 --- /dev/null +++ b/example/cfis_image_sims/config_tile_Fe.ini @@ -0,0 +1,68 @@ +# ShapePipe configuration file for: find exposures + + +## Default ShapePipe options +[DEFAULT] + +# verbose mode (optional), default: True, print messages on terminal +VERBOSE = False + +# Name of run (optional) default: shapepipe_run +RUN_NAME = run_sp_tile_Fe + +# Add date and time to RUN_NAME, optional, default: False +RUN_DATETIME = True + + +## ShapePipe execution options +[EXECUTION] + +# Module name, single string or comma-separated list of valid module runner names +MODULE = find_exposures_runner + +# Parallel processing mode, SMP or MPI +MODE = SMP + + +## ShapePipe file handling options +[FILE] + +# Log file master name, optional, default: shapepipe +LOG_NAME = log_sp + +# Runner log file name, optional, default: shapepipe_runs +RUN_LOG_NAME = log_run_sp + +# Input directory, containing input files, single string or list of names +INPUT_DIR = $SP_RUN + +# Output directory +OUTPUT_DIR = $SP_RUN/output + + +## ShapePipe job handling options +[JOB] + +# Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial +SMP_BATCH_SIZE = 1 + +# Timeout value (optional), default is None, i.e. no timeout limit applied +TIMEOUT = 96:00:00 + + +## Module options + +[FIND_EXPOSURES_RUNNER] + +INPUT_MODULE = last:get_images_runner + +FILE_PATTERN = CFIS_simu_image + +FILE_EXT = .fits + +# NUMBERING_SCHEME (optional) string with numbering pattern for input files +NUMBERING_SCHEME = -000-000 + +COLNUM = 2 + +EXP_PREFIX = simu_image- diff --git a/example/cfis_image_sims/config_tile_Git_symlink.ini b/example/cfis_image_sims/config_tile_Git_symlink.ini new file mode 100644 index 000000000..87c4a8816 --- /dev/null +++ b/example/cfis_image_sims/config_tile_Git_symlink.ini @@ -0,0 +1,87 @@ +# ShapePipe configuration file for: get tile images + + +## Default ShapePipe options +[DEFAULT] + +# verbose mode (optional), default: True, print messages on terminal +VERBOSE = False + +# Name of run (optional) default: shapepipe_run +RUN_NAME = run_sp_tile_Git + +# Add date and time to RUN_NAME, optional, default: False +RUN_DATETIME = True + + +## ShapePipe execution options +[EXECUTION] + +# Module name, single string or comma-separated list of valid module runner names +MODULE = get_images_runner + +# Parallel processing mode, SMP or MPI +MODE = SMP + + +## ShapePipe file handling options +[FILE] + +# Log file master name, optional, default: shapepipe +LOG_NAME = log_sp + +# Runner log file name, optional, default: shapepipe_runs +RUN_LOG_NAME = log_run_sp + +# Input directory, containing input files, single string or list of names +INPUT_DIR = $SP_RUN + +# Output directory +OUTPUT_DIR = $SP_RUN/output + + +## ShapePipe job handling options +[JOB] + +# Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial +SMP_BATCH_SIZE = 1 + +# Timeout value (optional), default is None, i.e. no timeout limit applied +TIMEOUT = 96:00:00 + + +## Module options + +# Get tiles +[GET_IMAGES_RUNNER] + +FILE_PATTERN = tile_numbers + +FILE_EXT = .txt + +# NUMBERING_SCHEME (optional) string with numbering pattern for input files +NUMBERING_SCHEME = + +# Paths + +# Input path where original images are stored. Can be local path or vos url. +# Single string or list of strings +INPUT_PATH = $SP_DIR/input_tiles, $SP_DIR/input_tiles + +# Input file pattern including tile number as dummy template +INPUT_FILE_PATTERN = CFIS_simu_image-000-000, CFIS_simu_weight-000-000 + +# Input file extensions +INPUT_FILE_EXT = .fits, .fits + +# Input numbering scheme, python regexp +INPUT_NUMBERING = \d{3}-\d{3} + +# Output file pattern without number +OUTPUT_FILE_PATTERN = CFIS_simu_image-, CFIS_simu_weight- + +# Copy/download method, one in 'vos', 'symlink' +RETRIEVE = symlink + +# Copy command options, optional +RETRIEVE_OPTIONS = -L diff --git a/example/cfis_simu/readme.txt b/example/cfis_simu/readme.txt index 719ca3052..dbf5475e2 100644 --- a/example/cfis_simu/readme.txt +++ b/example/cfis_simu/readme.txt @@ -1,5 +1,6 @@ +A newer version of this folder is ../cfis_image_sims This folder contains the configuration files used to run - shapepipe on simulated images. + shapepipe on simulated images (from 2024). The $SP_RUN and $SP_CONFIG can point to this directory. A link to the exposures and tiles can be called "input_exp" and "input_tiles" and added to this directory. diff --git a/scripts/sh/init_run_v2.0.sh b/scripts/sh/init_run_v2.0.sh index 74ab260c0..f2b82734f 100755 --- a/scripts/sh/init_run_v2.0.sh +++ b/scripts/sh/init_run_v2.0.sh @@ -9,6 +9,12 @@ # Version version="2.0" +# Input type: data or image_sims +type="data" + +# Subdir for image_sims +subdir="1z2z_grid_1" + # Default base run directory (permanent storage) #base_dir="$HOME/cosmostat/v2/v${version}" base_dir=`pwd` @@ -16,16 +22,13 @@ base_dir=`pwd` # ShapePipe repository root (for config symlink and tile list) sp_root="$HOME/shapepipe" -# Tile list source (full filenames, will be stripped to NNN.MMM) -tiles_src="$sp_root/auxdir/CFIS/tiles_202604/tiles_r.txt" - -# Config directory (will be symlinked as $base_dir/cfis) -config_dir="$sp_root/example/cfis" ## Help string usage="Usage: $(basename "$0") [OPTIONS] \nOptions:\n -h\t\tthis message\n + -t, --type TYPE input type, allowed are 'data', 'image_sims', default='$type'\n + -s, --subdir SUBDIR subdir for image simulations, default='$subdir'\n -d, --dir DIR\tbase run directory, default='$base_dir'\n " @@ -36,6 +39,14 @@ while [ $# -gt 0 ]; do echo -ne $usage exit 0 ;; + -t|--type) + type="$2" + shift + ;; + -s|--subdir) + subdir="$2" + shift + ;; -d|--dir) base_dir="$2" shift @@ -49,6 +60,34 @@ while [ $# -gt 0 ]; do shift done +# Check options +if [ "$type" == "data" ]; then + + # Config file directory + config_dir="$sp_root/example/cfis" + + # Input tile list + tiles_src="$sp_root/auxdir/CFIS/tiles_202604/tiles_r.txt" + +elif [ "$type" == "image_sims" ]; then + + config_dir="$sp_root/example/cfis_im_sims" + tiles_src="$sp_root/auxdir/CFIS/im_sims_202606/numbers.txt" + + input_dir_base="/n09data/hervas/skills_out" + input_dir_tiles="$input_dir_base/$subdir/images/SP_tiles" + input_dir_exp="$input_dir_base/$subdir/images/SP_exp" + ln -s $input_dir_tiles input_tiles + ln -s $input_dir_exp input_exp + +else + + echo "Invalid input type $type" + exit 3 + +fi + + echo "Initialising ShapePipe v${version} run directory: $base_dir" echo "" @@ -71,6 +110,9 @@ mkdir -p logs mkdir -p debug # --- Config symlink --- + +# Config directory (will be symlinked as $base_dir/cfis) + if [ -L cfis ]; then echo "cfis symlink already exists, skipping" elif [ -d cfis ]; then @@ -101,3 +143,7 @@ echo " ├── exp/" echo " ├── logs/" echo " ├── cfis -> ${config_dir}" echo " └── tile_numbers.txt -> ${tiles_src}" +if [ "$type" == "image_sims" ]; then + echo " ___ input_dir_tiles -> $input_dir_tiles" + echo " ___ input_dir_exp -> $input_dir_exp" +fi diff --git a/scripts/sh/job_sp_canfar_v2.0.bash b/scripts/sh/job_sp_canfar_v2.0.bash index c1fa6cd29..295722fdf 100755 --- a/scripts/sh/job_sp_canfar_v2.0.bash +++ b/scripts/sh/job_sp_canfar_v2.0.bash @@ -134,9 +134,8 @@ if [ "$star_cat_for_mask" != "onthefly" ] && [ "$star_cat_for_mask" != "save" ]; exit 4 fi -#if [ "$retrieve" != "vos" ] && [ "$retrieve" != "symlink" ]; then -if [ "$retrieve" != "vos" ]; then - echo "method to retrieve images (option -r) needs to be 'vos' for v2.0" +if [ "$retrieve" != "vos" ] && [ "$retrieve" != "symlink" ]; then + echo "Invalid method to retrieve images $retrieve (option -r), needs to be 'vos' or 'symlink'" exit 5 fi @@ -166,10 +165,11 @@ export SP_RUN=`pwd` export SP_CONFIG=$SP_RUN/cfis # Root directory for per-exposure work directories. -# Set SP_EXP in the environment to override; otherwise falls back to the -# conventional layout (SP_RUN = .../v2.0/tiles/IDra/ID, three levels up + exp). +# Set SP_EXP in the environment to override; otherwise derive it by stripping +# the /tiles/... suffix from SP_RUN — robust to any directory depth and to +# both data (.../v2.0/tiles/IDra/ID) and image_sims (.../grid_N/tiles/IDra/ID). if [ -z "${SP_EXP}" ]; then - export SP_EXP=$(realpath "$SP_RUN/../../../exp") + export SP_EXP="${SP_RUN%/tiles/*}/exp" echo "Setting SP_EXP to $SP_EXP" fi @@ -318,12 +318,12 @@ if [[ $do_job != 0 ]]; then fi -## Retrieve exposure images (online, vos) +## Retrieve exposure images (online if retrieve=vos) (( do_job = $job & 8 )) if [[ $do_job != 0 ]]; then command_cfg_shapepipe \ - "config_exp_Gie_vos.ini" \ + "config_exp_Gie_$retrieve.ini" \ "Run shapepipe (get exposure images)" \ $n_smp \ $exclusive @@ -390,7 +390,7 @@ if [[ $do_job != 0 ]]; then ### Download external catalogue from vos command_cfg_shapepipe \ - "config_tile_Git_cat_vos.ini" \ + "config_tile_Git_cat_$retrieve.ini" \ "Run shapepipe (download external tile catalogue)" \ -1 \ $exclusive diff --git a/scripts/sh/run_job_sp_canfar_v2.0.bash b/scripts/sh/run_job_sp_canfar_v2.0.bash index 9da08f598..03df41ff6 100755 --- a/scripts/sh/run_job_sp_canfar_v2.0.bash +++ b/scripts/sh/run_job_sp_canfar_v2.0.bash @@ -20,6 +20,10 @@ N_SMP=1 dry_run=0 dir=`pwd` debug_out="" + +# Input type: data or image_sims +type="data" + #scratch="/scratch/$USER/shapepipe/v${version}" scratch="" test_only=0 @@ -40,6 +44,7 @@ ${JOB_LIST_HELP} -e, --exclusive ID\timage ID\n \t\t\tPSF model, one in ['psfex'|'mccd'], default='$psf'\n --tile_det DET\t\ttile detection mode, one in ['sx'|'uc'], default='$tile_det'\n --tile_mask MASK\ttile masking, default='$tile_mask'\n + -t, --type TYPE input type, allowed are 'data', 'image_sims', default='$type'\n -N, --N_SMP N_SMP\tnumber of SMP jobs, default from original config files\n -d, --directory DIR\trun directory, default is pwd ($dir)\n -S, --scratch DIR\tprocessing scratch directory, default=none\n @@ -75,6 +80,10 @@ while [ $# -gt 0 ]; do psf="$2" shift ;; + -t|--type) + type="$2" + shift + ;; --tile_det) tile_det="$2" shift @@ -128,6 +137,9 @@ function message() { if [ -n "$my_debug_out" ]; then echo ${pat}$msg >> $my_debug_out fi + if [ -n "$log_file" ]; then + echo ${pat}$msg >> $log_file + fi if [ "$my_exit" != "-1" ]; then if [ -n "$my_debug_out" ]; then @@ -135,6 +147,9 @@ function message() { else echo "${pat}exiting with code $my_exit" fi + if [ -n "$log_file" ]; then + echo "${pat}exiting with code $my_exit" >> $log_file + fi exit $my_exit fi } @@ -158,7 +173,7 @@ function init_exp_work_dir() { fi if [ ! -e "$exp_work_dir/cfis" ]; then - ln -sf ~/shapepipe/example/cfis "$exp_work_dir/cfis" + ln -sf $config_dir "$exp_work_dir/cfis" fi } @@ -312,7 +327,7 @@ function run_exp_job() { echo "$(basename "$0") -j $exp_job -e $exp_id" > "$exp_log_file" echo "pwd=`pwd`" - command "job_sp_canfar_v2.0.bash -p $psf --tile_det $tile_det --tile_mask $tile_mask -j $exp_job --n_smp $N_SMP --nsh_jobs $N_SMP $debug_flag" $dry_run 2>&1 | tee -a "$exp_log_file" + command "job_sp_canfar_v2.0.bash -p $psf -r $retrieve --tile_det $tile_det --tile_mask $tile_mask -j $exp_job --n_smp $N_SMP --nsh_jobs $N_SMP $debug_flag" $dry_run 2>&1 | tee -a "$exp_log_file" echo "Done with job_sp_canfar_v2.0.bash" done < "$exp_numbers_file" @@ -440,15 +455,41 @@ function run_tile_job() { [ -n "$debug_out" ] && debug_flag="--debug_out $debug_out" if [ ! -e "cfis" ]; then - ln -sf ~/shapepipe/example/cfis "cfis" + ln -sf $config_dir cfis fi command "update_runs_log_file.py" $dry_run # Run job script - command "job_sp_canfar_v2.0.bash -p $psf --tile_det $tile_det --tile_mask $tile_mask -j $tile_job --n_smp $N_SMP --nsh_jobs $N_SMP $debug_flag" $dry_run 2>&1 | tee -a "$log_file" + command "job_sp_canfar_v2.0.bash -p $psf -r $retrieve --tile_det $tile_det --tile_mask $tile_mask -j $tile_job --n_smp $N_SMP --nsh_jobs $N_SMP $debug_flag" $dry_run 2>&1 | tee -a "$log_file" } + +if [ "$type" == "data" ]; then + + echo "Running on data" + retrieve="vos" + config_dir=$HOME/shapepipe/example/cfis + +elif [ "$type" == "image_sims" ]; then + + echo "Running on image simulations" + retrieve="symlink" + config_dir=$HOME/shapepipe/example/cfis_image_sims + # SP_DIR points to the run directory (grid level) where input_tiles and + # input_exp live; configs use $SP_DIR/input_* so those dirs stay outside + # SP_RUN and are not found twice by ShapePipe's recursive glob scan. + export SP_DIR=$dir + +else + + echo "Invalid input type $type" + +fi + +echo "config_dir=$config_dir" + + # Init message if [ "$test_only" == "1" ]; then message "$(basename "$0") test mode, exiting." "$debug_out" 0 @@ -503,10 +544,18 @@ log_file="$work_dir/job_sp_canfar_v2.0.log" # Create tile work directory [ ! -d "$work_dir" ] && command "mkdir -p $work_dir" $dry_run cd $work_dir +echo "$0 $@" > "$log_file" # Write ID to first input +# Image sims use dash format (e.g. 233-293); real data uses dot format (233.293) +# which ShapePipe's in2out_pattern converts to dashes for output naming only, +# not for input file lookup — so write the format that matches the actual files. if [ ! -e tile_numbers.txt ]; then - echo $ID > tile_numbers.txt + if [ "$type" == "image_sims" ]; then + echo ${ID//./-} > tile_numbers.txt + else + echo $ID > tile_numbers.txt + fi fi # Output directory @@ -514,6 +563,7 @@ if [ ! -d "output" ]; then command "mkdir output" $dry_run fi + echo -n "pwd: "; pwd @@ -542,8 +592,19 @@ fi (( do_job = job & 2 )) if [[ $do_job != 0 ]]; then - # Job 2: uncompress tile weights - run_tile_job 2 "Uz" "uncompress_fits_runner:1" + if [ "$type" == "image_sims" ]; then + # Image sims weights are already uncompressed; fake the Uz output directory + # so downstream jobs can find the weight via last:uncompress_fits_runner. + uz_out="$work_dir/output/run_sp_tile_Uz/uncompress_fits_runner/output" + command "mkdir -p $uz_out" $dry_run + weight_src="$dir/input_tiles/CFIS_simu_weight-${ID//./-}.fits" + if [ -e "$weight_src" ] && [ ! -e "$uz_out/$(basename $weight_src)" ]; then + command "ln -sf $weight_src $uz_out/$(basename $weight_src)" $dry_run + fi + else + # Job 2: uncompress tile weights + run_tile_job 2 "Uz" "uncompress_fits_runner:1" + fi fi (( do_job = job & 4 )) From b8dca7377d8cac1398d901e2889b5c57bd20cc71 Mon Sep 17 00:00:00 2001 From: martinkilbinger Date: Tue, 2 Jun 2026 15:24:47 +0200 Subject: [PATCH 03/24] SP v2.0 running on image simulations including ngmix --- example/cfis/config_exp_Ma_onthefly.ini | 3 - example/cfis/config_tile_Sx_nomask.ini | 2 +- .../config_exp_Ma_onthefly.ini | 76 +++++++++++ example/cfis_image_sims/config_exp_Sp.ini | 74 ++++++++++ example/cfis_image_sims/config_exp_psfex.ini | 54 ++++++++ .../cfis_image_sims/config_onthefly.mask_simu | 86 ++++++++++++ .../config_tile_Ma_onthefly.ini | 82 +++++++++++ .../cfis_image_sims/config_tile_Mh_exp.ini | 72 ++++++++++ .../config_tile_Ng_batch_psfex_sx.ini | 79 +++++++++++ .../config_tile_PiViVi_canfar_sx.ini | 127 ++++++++++++++++++ .../cfis_image_sims/config_tile_Sx_nomask.ini | 114 ++++++++++++++++ .../config_tile_onthefly.mask_simu | 90 +++++++++++++ example/cfis_image_sims/default.conv | 1 + example/cfis_image_sims/default.param | 1 + .../cfis_image_sims/default_noimaflags.param | 1 + example/cfis_image_sims/default_tile.sex | 1 + scripts/sh/job_list_help.bash | 2 +- scripts/sh/job_sp_canfar_v2.0.bash | 19 +-- scripts/sh/run_job_sp_canfar_v2.0.bash | 45 +++++-- .../modules/fake_psf_package/__init__.py | 28 ++++ .../modules/fake_psf_package/fake_psf.py | 89 ++++++++++++ src/shapepipe/modules/fake_psf_runner.py | 36 +++++ .../modules/mask_package/__init__.py | 2 - src/shapepipe/modules/mask_package/mask.py | 17 +-- src/shapepipe/modules/ngmix_package/ngmix.py | 48 ++++--- src/shapepipe/modules/ngmix_runner.py | 14 +- .../sextractor_package/sextractor_script.py | 6 +- src/shapepipe/pipeline/exp_utils.py | 13 +- 28 files changed, 1109 insertions(+), 73 deletions(-) create mode 100644 example/cfis_image_sims/config_exp_Ma_onthefly.ini create mode 100644 example/cfis_image_sims/config_exp_Sp.ini create mode 100644 example/cfis_image_sims/config_exp_psfex.ini create mode 100644 example/cfis_image_sims/config_onthefly.mask_simu create mode 100644 example/cfis_image_sims/config_tile_Ma_onthefly.ini create mode 100644 example/cfis_image_sims/config_tile_Mh_exp.ini create mode 100644 example/cfis_image_sims/config_tile_Ng_batch_psfex_sx.ini create mode 100644 example/cfis_image_sims/config_tile_PiViVi_canfar_sx.ini create mode 100644 example/cfis_image_sims/config_tile_Sx_nomask.ini create mode 100644 example/cfis_image_sims/config_tile_onthefly.mask_simu create mode 120000 example/cfis_image_sims/default.conv create mode 120000 example/cfis_image_sims/default.param create mode 120000 example/cfis_image_sims/default_noimaflags.param create mode 120000 example/cfis_image_sims/default_tile.sex create mode 100644 src/shapepipe/modules/fake_psf_package/__init__.py create mode 100644 src/shapepipe/modules/fake_psf_package/fake_psf.py create mode 100644 src/shapepipe/modules/fake_psf_runner.py diff --git a/example/cfis/config_exp_Ma_onthefly.ini b/example/cfis/config_exp_Ma_onthefly.ini index 2ab1c041c..de58b56ae 100644 --- a/example/cfis/config_exp_Ma_onthefly.ini +++ b/example/cfis/config_exp_Ma_onthefly.ini @@ -74,6 +74,3 @@ USE_EXT_STAR = False # File name suffix for the output flag files (optional) PREFIX = pipeline - -# Path to check for existing output mask files -CHECK_EXISTING_DIR = $SP_RUN/output/run_sp_Ma_exp/mask_runner/output diff --git a/example/cfis/config_tile_Sx_nomask.ini b/example/cfis/config_tile_Sx_nomask.ini index 81bb89c61..731a8d338 100644 --- a/example/cfis/config_tile_Sx_nomask.ini +++ b/example/cfis/config_tile_Sx_nomask.ini @@ -55,7 +55,7 @@ TIMEOUT = 96:00:00 [SEXTRACTOR_RUNNER] -INPUT_DIR = run_sp_Git:get_images_runner, last:uncompress_fits_runner, run_sp_exp_Mh:merge_headers_runner +INPUT_DIR = run_sp_tile_Git:get_images_runner, last:uncompress_fits_runner, run_sp_tile_Mh_exp:merge_headers_runner FILE_PATTERN = CFIS_image, CFIS_weight, log_exp_headers diff --git a/example/cfis_image_sims/config_exp_Ma_onthefly.ini b/example/cfis_image_sims/config_exp_Ma_onthefly.ini new file mode 100644 index 000000000..f5699d82a --- /dev/null +++ b/example/cfis_image_sims/config_exp_Ma_onthefly.ini @@ -0,0 +1,76 @@ +# ShapePipe configuration file for masking of exposures + + +## Default ShapePipe options +[DEFAULT] + +# verbose mode (optional), default: True, print messages on terminal +VERBOSE = True + +# Name of run (optional) default: shapepipe_run +RUN_NAME = run_sp_exp_Ma + +# Add date and time to RUN_NAME, optional, default: False +; RUN_DATETIME = False + + +## ShapePipe execution options +[EXECUTION] + +# Module name, single string or comma-separated list of valid module runner names +MODULE = mask_runner + +# Parallel processing mode, SMP or MPI +MODE = SMP + + +## ShapePipe file handling options +[FILE] + +# Log file master name, optional, default: shapepipe +LOG_NAME = log_sp + +# Runner log file name, optional, default: shapepipe_runs +RUN_LOG_NAME = log_run_sp + +# Input directory, containing input files, single string or list of names +INPUT_DIR = . + +# Output directory +OUTPUT_DIR = $SP_RUN/output + + +## ShapePipe job handling options +[JOB] + +# Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial +SMP_BATCH_SIZE = 4 + +# Timeout value (optional), default is None, i.e. no timeout limit applied +TIMEOUT = 96:00:00 + + +## Module options + +### Mask exposures +[MASK_RUNNER] + +# Parent module +INPUT_DIR = last:split_exp_runner + +# Update numbering convention, accounting for HDU number of +# single-exposure single-HDU files +NUMBERING_SCHEME = -0000000-0 + +# Path of mask config file +MASK_CONFIG_PATH = $SP_CONFIG/config_onthefly.mask_simu + +# External mask file flag, use if True, otherwise ignore +USE_EXT_FLAG = True + +# External star catalogue flag, use external cat if True, +# obtain from online catalogue if False +USE_EXT_STAR = False + +# File name suffix for the output flag files (optional) +PREFIX = pipeline diff --git a/example/cfis_image_sims/config_exp_Sp.ini b/example/cfis_image_sims/config_exp_Sp.ini new file mode 100644 index 000000000..f21b77aaf --- /dev/null +++ b/example/cfis_image_sims/config_exp_Sp.ini @@ -0,0 +1,74 @@ +# ShapePipe configuration file for single-exposures, +# split images + + +## Default ShapePipe options +[DEFAULT] + +# verbose mode (optional), default: True, print messages on terminal +VERBOSE = True + +# Name of run (optional) default: shapepipe_run +RUN_NAME = run_sp_exp_Sp + +# Add date and time to RUN_NAME, optional, default: True +RUN_DATETIME = True + + +## ShapePipe execution options +[EXECUTION] + +# Module name, single string or comma-separated list of valid module runner names +MODULE = split_exp_runner + +# Run mode, SMP or MPI +MODE = SMP + + +## ShapePipe file handling options +[FILE] + +# Log file master name, optional, default: shapepipe +LOG_NAME = log_sp + +# Runner log file name, optional, default: shapepipe_runs +RUN_LOG_NAME = log_run_sp + +# Input directory, containing input files, single string or list of names with length matching FILE_PATTERN +INPUT_DIR = . + +# Output directory +OUTPUT_DIR = $SP_RUN/output + + +## ShapePipe job handling options +[JOB] + +# Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial +SMP_BATCH_SIZE = 8 + +# Timeout value (optional), default is None, i.e. no timeout limit applied +TIMEOUT = 96:00:00 + + +## Module options + +[SPLIT_EXP_RUNNER] + +INPUT_DIR = last:get_images_runner + +FILE_PATTERN = image, weight, flag + +# Matches compressed single-exposure files +FILE_EXT = .fits, .fits, .fits + +NUMBERING_SCHEME = -0000000 + +# OUTPUT_SUFFIX, actually file name prefixes. +# Expected keyword "flag" will lead to a behavior where the data are saved as int. +# The code also expects the image data to use the "image" suffix +# (default value in the pipeline). +OUTPUT_SUFFIX = image, weight, flag + +# Number of HDUs/CCDs of mosaic +N_HDU = 40 diff --git a/example/cfis_image_sims/config_exp_psfex.ini b/example/cfis_image_sims/config_exp_psfex.ini new file mode 100644 index 000000000..f44bb38d7 --- /dev/null +++ b/example/cfis_image_sims/config_exp_psfex.ini @@ -0,0 +1,54 @@ +# ShapePipe configuration file for fake PSF postage stamps (image simulations) + + +## Default ShapePipe options +[DEFAULT] + +VERBOSE = True + +RUN_NAME = run_sp_tile_fpsf + +RUN_DATETIME = True + + +## ShapePipe execution options +[EXECUTION] + +MODULE = fake_psf_runner + +MODE = SMP + + +## ShapePipe file handling options +[FILE] + +LOG_NAME = log_sp + +RUN_LOG_NAME = log_run_sp + +INPUT_DIR = $SP_RUN/output + +OUTPUT_DIR = $SP_RUN/output + + +## ShapePipe job handling options +[JOB] + +SMP_BATCH_SIZE = 1 + +TIMEOUT = 96:00:00 + + +## Module options +[FAKE_PSF_RUNNER] + +INPUT_MODULE = sextractor_runner + +FILE_PATTERN = sexcat + +FILE_EXT = .fits + +NUMBERING_SCHEME = -000-000 + +# Path to the pickled PSF dictionary +PSF_DICT_PATH = /home/hervas/fhervas/workdir_skills/input/psf_files/Full_psf_dict.pickle diff --git a/example/cfis_image_sims/config_onthefly.mask_simu b/example/cfis_image_sims/config_onthefly.mask_simu new file mode 100644 index 000000000..1a63cc2e5 --- /dev/null +++ b/example/cfis_image_sims/config_onthefly.mask_simu @@ -0,0 +1,86 @@ +# Mask module configuration file for single-exposure images + +## Paths to executables +[PROGRAM_PATH] + +WW_PATH = weightwatcher +WW_CONFIG_FILE = $SP_CONFIG/mask_default/default.ww + +# Indicate cds client executable if no external star catalogue is available +# (e.g. no internet access on run nodes) +CDSCLIENT_PATH = findgsc2.2 + + +## Border mask +[BORDER_PARAMETERS] + +BORDER_MAKE = True + +BORDER_WIDTH = 50 +BORDER_FLAG_VALUE = 4 + + +## Halo mask +[HALO_PARAMETERS] + +HALO_MAKE = False + +HALO_MASKMODEL_PATH = $SP_CONFIG/mask_default/halo_mask.reg +HALO_MAG_LIM = 13. +HALO_SCALE_FACTOR = 0.05 +HALO_MAG_PIVOT = 13.8 +HALO_FLAG_VALUE = 2 +HALO_REG_FILE = halo.reg + + +## Diffraction spike mask +[SPIKE_PARAMETERS] + +SPIKE_MAKE = False + +SPIKE_MASKMODEL_PATH = $SP_CONFIG/mask_default/MEGAPRIME_star_i_13.8.reg +SPIKE_MAG_LIM = 18. +SPIKE_SCALE_FACTOR = 0.3 +SPIKE_MAG_PIVOT = 13.8 +SPIKE_FLAG_VALUE = 128 +SPIKE_REG_FILE = spike.reg + + +## Messier mask +[MESSIER_PARAMETERS] + +MESSIER_MAKE = False + +MESSIER_CAT_PATH = $SP_CONFIG/mask_default/Messier_catalog_updated.fits +MESSIER_SIZE_PLUS = 0. +MESSIER_FLAG_VALUE = 16 + + +## NGC mask +[NGC_PARAMETERS] + +NGC_MAKE = False + +NGC_CAT_PATH = $SP_CONFIG/mask_default/ngc_cat.fits +NGC_SIZE_PLUS = 0. +NGC_FLAG_VALUE = 32 + + + +## Missing data parameters +[MD_PARAMETERS] + +MD_MAKE = False + +MD_THRESH_FLAG = 0.3 +MD_THRESH_REMOVE = 0.75 +MD_REMOVE = False + + +## Other parameters +[OTHER] + +TEMP_DIRECTORY = .temp + +KEEP_REG_FILE = False +KEEP_INDIVIDUAL_MASK = False diff --git a/example/cfis_image_sims/config_tile_Ma_onthefly.ini b/example/cfis_image_sims/config_tile_Ma_onthefly.ini new file mode 100644 index 000000000..2e891246a --- /dev/null +++ b/example/cfis_image_sims/config_tile_Ma_onthefly.ini @@ -0,0 +1,82 @@ +# ShapePipe configuration file for masking of tiles + + +## Default ShapePipe options +[DEFAULT] + +# verbose mode (optional), default: True, print messages on terminal +VERBOSE = True + +# Name of run (optional) default: shapepipe_run +RUN_NAME = run_sp_tile_Ma + +# Add date and time to RUN_NAME, optional, default: False +; RUN_DATETIME = False + + +## ShapePipe execution options +[EXECUTION] + +# Module name, single string or comma-separated list of valid module runner names +MODULE = mask_runner + +# Parallel processing mode, SMP or MPI +MODE = SMP + + +## ShapePipe file handling options +[FILE] + +# Log file master name, optional, default: shapepipe +LOG_NAME = log_sp + +# Runner log file name, optional, default: shapepipe_runs +RUN_LOG_NAME = log_run_sp + +# Input directory, containing input files, single string or list of names +INPUT_DIR = $SP_RUN/output + +# Output directory +OUTPUT_DIR = $SP_RUN/output + + +## ShapePipe job handling options +[JOB] + +# Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial +SMP_BATCH_SIZE = 8 + +# Timeout value (optional), default is None, i.e. no timeout limit applied +TIMEOUT = 96:00:00 + + +## Module options + +### Mask tiles +[MASK_RUNNER] + +# Input directory, containing input files, single string or list of names +INPUT_DIR = run_sp_Git:get_images_runner, last:uncompress_fits_runner + +# NUMBERING_SCHEME (optional) string with numbering pattern for input files +NUMBERING_SCHEME = -000-000 + +# Input file pattern(s), list of strings with length matching number of expected input file types +# Cannot contain wild cards +FILE_PATTERN = CFIS_simu_image, CFIS_simu_weight + +# FILE_EXT (optional) list of string extensions to identify input files +FILE_EXT = .fits, .fits + +# Path of mask config file +MASK_CONFIG_PATH = $SP_CONFIG/config_tile_onthefly.mask_simu + +# External mask file flag, use if True, otherwise ignore +USE_EXT_FLAG = False + +# External star catalogue flag, use external cat if True, +# obtain from online catalogue if False +USE_EXT_STAR = False + +# File name suffix for the output flag files (optional) +PREFIX = pipeline diff --git a/example/cfis_image_sims/config_tile_Mh_exp.ini b/example/cfis_image_sims/config_tile_Mh_exp.ini new file mode 100644 index 000000000..0d3f9f8b3 --- /dev/null +++ b/example/cfis_image_sims/config_tile_Mh_exp.ini @@ -0,0 +1,72 @@ +# ShapePipe configuration file for merging per-exposure WCS headers +# at the tile level. Input is the exp_numbers file produced by +# find_exposures_runner; EXP_BASE_DIR tells the runner where to find +# the per-exposure split_exp_runner header .npy files. + + +## Default ShapePipe options +[DEFAULT] + +# verbose mode (optional), default: True, print messages on terminal +VERBOSE = True + +# Name of run (optional) default: shapepipe_run +RUN_NAME = run_sp_tile_Mh_exp + +# Add date and time to RUN_NAME, optional, default: True +RUN_DATETIME = False + + +## ShapePipe execution options +[EXECUTION] + +# Module name, single string or comma-separated list of valid module runner names +MODULE = merge_headers_runner + +# Run mode, SMP or MPI +MODE = SMP + + +## ShapePipe file handling options +[FILE] + +# Log file master name, optional, default: shapepipe +LOG_NAME = log_sp + +# Runner log file name, optional, default: shapepipe_runs +RUN_LOG_NAME = log_run_sp + +# Input directory, containing input files, single string or list of names with length matching FILE_PATTERN +INPUT_DIR = . + +# Output directory +OUTPUT_DIR = $SP_RUN/output + + +## ShapePipe job handling options +[JOB] + +# Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial +SMP_BATCH_SIZE = 16 + +# Timeout value (optional), default is None, i.e. no timeout limit applied +TIMEOUT = 96:00:00 + + +## Module options + +[MERGE_HEADERS_RUNNER] + +# Input: exp_numbers txt file from find_exposures_runner +INPUT_DIR = last:find_exposures_runner + +FILE_PATTERN = exp_numbers + +FILE_EXT = .txt + +# Tile numbering scheme (RA-Dec, e.g. -301-279) +NUMBERING_SCHEME = -000-000 + +# Root directory containing all per-exposure work directories. +# The runner will walk this tree to collect headers-.npy files. +EXP_BASE_DIR = $SP_EXP diff --git a/example/cfis_image_sims/config_tile_Ng_batch_psfex_sx.ini b/example/cfis_image_sims/config_tile_Ng_batch_psfex_sx.ini new file mode 100644 index 000000000..0a329f441 --- /dev/null +++ b/example/cfis_image_sims/config_tile_Ng_batch_psfex_sx.ini @@ -0,0 +1,79 @@ +# ShapePipe configuration file for tiles: ngmix + + +## Default ShapePipe options +[DEFAULT] + +# verbose mode (optional), default: True, print messages on terminal +VERBOSE = True + +# Name of run (optional) default: shapepipe_run +RUN_NAME = run_sp_tile_Ng + +# Add date and time to RUN_NAME, optional, default: False +RUN_DATETIME = False + + +## ShapePipe execution options +[EXECUTION] + +# Module name, single string or comma-separated list of valid module runner names +MODULE = ngmix_runner + +# Parallel processing mode, SMP or MPI +MODE = SMP + + +## ShapePipe file handling options +[FILE] + +# Log file master name, optional, default: shapepipe +LOG_NAME = log_sp + +# Runner log file name, optional, default: shapepipe_runs +RUN_LOG_NAME = log_run_sp + +# Input directory, containing input files, single string or list of names +INPUT_DIR = . + +# Output directory +OUTPUT_DIR = $SP_RUN/output + + +## ShapePipe job handling options +[JOB] + +# Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial +SMP_BATCH_SIZE = 24 + +# Timeout value (optional), default is None, i.e. no timeout limit applied +TIMEOUT = 96:00:00 + + +## Module options + +# Model-fitting shapes with ngmix +[NGMIX_RUNNER] + +INPUT_DIR = run_sp_tile_Sx:sextractor_runner,run_sp_tile_fpsf:fake_psf_runner,last:vignetmaker_runner_run_2,run_sp_tile_Mh_exp:merge_headers_runner + +FILE_PATTERN = sexcat, image_vignet, galaxy_psf, weight_vignet, flag_vignet, log_exp_headers + +FILE_EXT = .fits, .sqlite, .sqlite, .sqlite, .sqlite, .sqlite + +# NUMBERING_SCHEME (optional) string with numbering pattern for input files +NUMBERING_SCHEME = -000-000 + +# Magnitude zero-point +MAG_ZP = 30.0 + +# No background subtraction for image sims (background not simulated) +BKG_SUB = False + +# Pixel scale in arcsec +PIXEL_SCALE = 0.186 + +SAVE_BATCH = 1000 + +ID_OBJ_MIN = -1 +ID_OBJ_MAX = -1 diff --git a/example/cfis_image_sims/config_tile_PiViVi_canfar_sx.ini b/example/cfis_image_sims/config_tile_PiViVi_canfar_sx.ini new file mode 100644 index 000000000..fb9b3c46a --- /dev/null +++ b/example/cfis_image_sims/config_tile_PiViVi_canfar_sx.ini @@ -0,0 +1,127 @@ +# ShapePipe configuration file for tile, from detection up to shape measurement. +# Fake PSF model. + + +## Default ShapePipe options +[DEFAULT] + +# verbose mode (optional), default: True, print messages on terminal +VERBOSE = True + +# Name of run (optional) default: shapepipe_run +RUN_NAME = run_sp_tile_ViVi + +# Add date and time to RUN_NAME, optional, default: False +; RUN_DATETIME = False + + +## ShapePipe execution options +[EXECUTION] + +# Module name, single string or comma-separated list of valid module runner names + +MODULE = vignetmaker_runner, vignetmaker_runner + +# Parallel processing mode, SMP or MPI +MODE = SMP + + +## ShapePipe file handling options +[FILE] + +# Log file master name, optional, default: shapepipe +LOG_NAME = log_sp + +# Runner log file name, optional, default: shapepipe_runs +RUN_LOG_NAME = log_run_sp + +# Input directory, containing input files, single string or list of names +INPUT_DIR = . + +# Output directory +OUTPUT_DIR = $SP_RUN/output + + +## ShapePipe job handling options +[JOB] + +# Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial +SMP_BATCH_SIZE = 16 + +# Timeout value (optional), default is None, i.e. no timeout limit applied +TIMEOUT = 96:00:00 + + +## Module options + +# Create vignets for tiles weights +[VIGNETMAKER_RUNNER_RUN_1] + +INPUT_DIR = run_sp_tile_Sx:sextractor_runner, last:uncompress_fits_runner + +FILE_PATTERN = sexcat, CFIS_simu_weight + +FILE_EXT = .fits, .fits + +# NUMBERING_SCHEME (optional) string with numbering pattern for input files +NUMBERING_SCHEME = -000-000 + +MASKING = False +MASK_VALUE = 0 + +# Run mode for psfex interpolation: +# CLASSIC: 'classical' run, interpolate to object positions +# MULTI-EPOCH: interpolate for multi-epoch images +# VALIDATION: validation for single-epoch images +MODE = CLASSIC + +# Coordinate frame type, one in PIX (pixel frame), SPHE (spherical coordinates) +COORD = PIX +POSITION_PARAMS = XWIN_IMAGE,YWIN_IMAGE + +# Vignet size in pixels +STAMP_SIZE = 51 + +# Output file name prefix, file name is _vignet.fits +PREFIX = weight + + +[VIGNETMAKER_RUNNER_RUN_2] + +# Create multi-epoch vignets for tiles corresponding to +# positions on single-exposures + +INPUT_DIR = run_sp_tile_Sx:sextractor_runner, run_sp_tile_Mh_exp:merge_headers_runner, last:find_exposures_runner + +FILE_PATTERN = sexcat, log_exp_headers, exp_numbers + +FILE_EXT = .fits, .sqlite, .txt + +# NUMBERING_SCHEME (optional) string with numbering pattern for input files +NUMBERING_SCHEME = -000-000 + +MASKING = False +MASK_VALUE = 0 + +# Run mode for psfex interpolation: +# CLASSIC: 'classical' run, interpolate to object positions +# MULTI-EPOCH: interpolate for multi-epoch images +# VALIDATION: validation for single-epoch images +MODE = MULTI-EPOCH + +# Coordinate frame type, one in PIX (pixel frame), SPHE (spherical coordinates) +COORD = SPHE +POSITION_PARAMS = XWIN_WORLD,YWIN_WORLD + +# Vignet size in pixels +STAMP_SIZE = 51 + +# Output file name prefix, file name is vignet.fits +PREFIX = + +# Additional parameters for path and file pattern corresponding to single-exposure +# run outputs. ME_IMAGE_EXP_DIR/ME_IMAGE_EXP_RUNNERS replace ME_IMAGE_DIR for +# the v2.0 per-exposure pipeline; output dirs are discovered by scanning $SP_EXP. +ME_IMAGE_EXP_DIR = $SP_EXP +ME_IMAGE_EXP_RUNNERS = split_exp_runner, split_exp_runner, split_exp_runner +ME_IMAGE_PATTERN = flag, image, weight diff --git a/example/cfis_image_sims/config_tile_Sx_nomask.ini b/example/cfis_image_sims/config_tile_Sx_nomask.ini new file mode 100644 index 000000000..62d1bffed --- /dev/null +++ b/example/cfis_image_sims/config_tile_Sx_nomask.ini @@ -0,0 +1,114 @@ +# ShapePipe configuration file for tile detection + + +## Default ShapePipe options +[DEFAULT] + +# verbose mode (optional), default: True, print messages on terminal +VERBOSE = True + +# Name of run (optional) default: shapepipe_run +RUN_NAME = run_sp_tile_Sx + +# Add date and time to RUN_NAME, optional, default: True +; RUN_DATETIME = False + + +## ShapePipe execution options +[EXECUTION] + +# Module name, single string or comma-separated list of valid module runner names +MODULE = sextractor_runner + + +# Run mode, SMP or MPI +MODE = SMP + + +## ShapePipe file handling options +[FILE] + +# Log file master name, optional, default: shapepipe +LOG_NAME = log_sp + +# Runner log file name, optional, default: shapepipe_runs +RUN_LOG_NAME = log_run_sp + +# Input directory, containing input files, single string or list of names with length matching FILE_PATTERN +INPUT_DIR = $SP_RUN/output + +# Output directory +OUTPUT_DIR = $SP_RUN/output + + +## ShapePipe job handling options +[JOB] + +# Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial +SMP_BATCH_SIZE = 16 + +# Timeout value (optional), default is None, i.e. no timeout limit applied +TIMEOUT = 96:00:00 + + +## Module options + +[SEXTRACTOR_RUNNER] + +INPUT_DIR = run_sp_tile_Git:get_images_runner, last:uncompress_fits_runner, run_sp_tile_Mh_exp:merge_headers_runner + +FILE_PATTERN = CFIS_simu_image, CFIS_simu_weight, log_exp_headers + +FILE_EXT = .fits, .fits, .sqlite + +# NUMBERING_SCHEME (optional) string with numbering pattern for input files +NUMBERING_SCHEME = -000-000 + +# SExtractor executable path +EXEC_PATH = source-extractor + +# SExtractor configuration files +DOT_SEX_FILE = $SP_CONFIG/default_tile.sex +DOT_PARAM_FILE = $SP_CONFIG/default_noimaflags.param +DOT_CONV_FILE = $SP_CONFIG/default.conv + +# Use input weight image if True +WEIGHT_IMAGE = True + +# Use input flag image if True +FLAG_IMAGE = False + +# Use input PSF file if True +PSF_FILE = False + +# Use distinct image for detection (SExtractor in +# dual-image mode) if True +DETECTION_IMAGE = False + +# Distinct weight image for detection (SExtractor +# in dual-image mode) +DETECTION_WEIGHT = False + +ZP_FROM_HEADER = False + +BKG_FROM_HEADER = False + +# Type of image check (optional), default not used, can be a list of +# BACKGROUND, BACKGROUND_RMS, INIBACKGROUND, +# MINIBACK_RMS, -BACKGROUND, #FILTERED, +# OBJECTS, -OBJECTS, SEGMENTATION, APERTURES +CHECKIMAGE = BACKGROUND + +# File name suffix for the output sextractor files (optional) +SUFFIX = sexcat + +## Post-processing + +# Necessary for tiles, to enable multi-exposure processing +MAKE_POST_PROCESS = True + +# World coordinate keywords, SExtractor output. Format: KEY_X,KEY_Y +WORLD_POSITION = XWIN_WORLD,YWIN_WORLD + +# Number of pixels in x,y of a CCD. Format: Nx,Ny +CCD_SIZE = 33,2080,1,4612 diff --git a/example/cfis_image_sims/config_tile_onthefly.mask_simu b/example/cfis_image_sims/config_tile_onthefly.mask_simu new file mode 100644 index 000000000..f42d05d39 --- /dev/null +++ b/example/cfis_image_sims/config_tile_onthefly.mask_simu @@ -0,0 +1,90 @@ +# Mask module config file for tiles + +## Paths to executables +[PROGRAM_PATH] + +WW_PATH = weightwatcher +WW_CONFIG_FILE = $SP_CONFIG/mask_default/default.ww + +# Indicate cds client executable if no external star catalogue is available +# (e.g. no internet access on run nodes) +CDSCLIENT_PATH = findgsc2.2 + +## Border parameters +[BORDER_PARAMETERS] + +BORDER_MAKE = False + +BORDER_WIDTH = 1 +BORDER_FLAG_VALUE = 4 + + +## Halo parameters +[HALO_PARAMETERS] + +HALO_MAKE = False + +HALO_MASKMODEL_PATH = $SP_CONFIG/mask_default/halo_mask.reg +HALO_MAG_LIM = 13. +HALO_SCALE_FACTOR = 0.05 +HALO_MAG_PIVOT = 13.8 +HALO_FLAG_VALUE = 2 +HALO_REG_FILE = halo.reg + + +## Diffraction pike parameters +[SPIKE_PARAMETERS] + +SPIKE_MAKE = False + +SPIKE_MASKMODEL_PATH = $SP_CONFIG/mask_default/MEGAPRIME_star_i_13.8.reg +SPIKE_MAG_LIM = 18. +SPIKE_SCALE_FACTOR = 0.3 +SPIKE_MAG_PIVOT = 13.8 +SPIKE_FLAG_VALUE = 128 +SPIKE_REG_FILE = spike.reg + + +## Messier parameters +[MESSIER_PARAMETERS] + +MESSIER_MAKE = False + +MESSIER_CAT_PATH = $SP_CONFIG/mask_default/Messier_catalog_updated.fits +MESSIER_PIXEL_SCALE = 0.187 +MESSIER_SIZE_PLUS = 0. +MESSIER_FLAG_VALUE = 16 + +## NGC mask +[NGC_PARAMETERS] + +NGC_MAKE = False + +NGC_CAT_PATH = $SP_CONFIG/mask_default/ngc_cat.fits +NGC_SIZE_PLUS = 0. +NGC_FLAG_VALUE = 32 + + +## External flag +[EXTERNAL_FLAG] + +EF_MAKE = False + + +## Missing data parameters +[MD_PARAMETERS] + +MD_MAKE = False + +MD_THRESH_FLAG = 0.3 +MD_THRESH_REMOVE = 0.75 +MD_REMOVE = False + + +## Other parameters +[OTHER] + +KEEP_REG_FILE = False +KEEP_INDIVIDUAL_MASK = False + +TEMP_DIRECTORY = .temp_tiles diff --git a/example/cfis_image_sims/default.conv b/example/cfis_image_sims/default.conv new file mode 120000 index 000000000..bd71df850 --- /dev/null +++ b/example/cfis_image_sims/default.conv @@ -0,0 +1 @@ +../cfis/default.conv \ No newline at end of file diff --git a/example/cfis_image_sims/default.param b/example/cfis_image_sims/default.param new file mode 120000 index 000000000..49e000314 --- /dev/null +++ b/example/cfis_image_sims/default.param @@ -0,0 +1 @@ +../cfis/default.param \ No newline at end of file diff --git a/example/cfis_image_sims/default_noimaflags.param b/example/cfis_image_sims/default_noimaflags.param new file mode 120000 index 000000000..75451801e --- /dev/null +++ b/example/cfis_image_sims/default_noimaflags.param @@ -0,0 +1 @@ +../cfis/default_noimaflags.param \ No newline at end of file diff --git a/example/cfis_image_sims/default_tile.sex b/example/cfis_image_sims/default_tile.sex new file mode 120000 index 000000000..8770da87e --- /dev/null +++ b/example/cfis_image_sims/default_tile.sex @@ -0,0 +1 @@ +../cfis/default_tile.sex \ No newline at end of file diff --git a/scripts/sh/job_list_help.bash b/scripts/sh/job_list_help.bash index 52430a434..a4a43bd3f 100644 --- a/scripts/sh/job_list_help.bash +++ b/scripts/sh/job_list_help.bash @@ -13,7 +13,7 @@ JOB_LIST_HELP="\ \t 32: mask exposures (online if star_cat_for_mask=onthefly)\n\ \t 64: process stars on exposures, PSF model (offline)\n\ \t 128: merge exposure WCS headers into tile-level sqlite log\n\ - \t 256: object selection on tiles (online if UNIONS catalogue or tar_cat_for_mask=onthefly)\n\ + \t 256: object selection on tiles (online if UNIONS catalogue or star_cat_for_mask=onthefly)\n\ \t 512: postage stamp creation\n\ \t1024: multi-epoch shape measurement\n\ \t2048: create final catalogue\n\ diff --git a/scripts/sh/job_sp_canfar_v2.0.bash b/scripts/sh/job_sp_canfar_v2.0.bash index 295722fdf..19ca661e2 100755 --- a/scripts/sh/job_sp_canfar_v2.0.bash +++ b/scripts/sh/job_sp_canfar_v2.0.bash @@ -16,7 +16,7 @@ source $HOME/shapepipe/scripts/sh/job_list_help.bash ## Default values job=255 config_dir=$HOME/shapepipe/example/cfis -psf='mccd' +psf='psfex' retrieve='vos' star_cat_for_mask='onthefly' tile_det='sx' @@ -124,8 +124,8 @@ while [ $# -gt 0 ]; do done ## Check options -if [ "$psf" != "psfex" ] && [ "$psf" != "mccd" ]; then - echo "PSF (option -p) needs to be 'psfex' or 'mccd'" +if [ "$psf" != "psfex" ] && [ "$psf" != "mccd" ] && [ "$psf" != "psf" ]; then + echo "PSF (option -p) needs to be 'psfex', 'mccd', or 'psf' (image sims)" exit 2 fi @@ -161,15 +161,16 @@ fi # Run path and location of input image directories export SP_RUN=`pwd` -# Config file path -export SP_CONFIG=$SP_RUN/cfis +# Config file path — use value exported by run_job_sp_canfar_v2.0.bash if set, +# otherwise fall back to the cfis symlink in the run directory. +export SP_CONFIG=${SP_CONFIG:-$SP_RUN/cfis} # Root directory for per-exposure work directories. -# Set SP_EXP in the environment to override; otherwise derive it by stripping -# the /tiles/... suffix from SP_RUN — robust to any directory depth and to -# both data (.../v2.0/tiles/IDra/ID) and image_sims (.../grid_N/tiles/IDra/ID). +# Set SP_EXP in the environment to override; otherwise use SP_DIR (the run +# root, always exported by run_job_sp_canfar_v2.0.bash for both data and +# image_sims) so exp/ is always a sibling of tiles/ under the same root. if [ -z "${SP_EXP}" ]; then - export SP_EXP="${SP_RUN%/tiles/*}/exp" + export SP_EXP="$SP_DIR/exp" echo "Setting SP_EXP to $SP_EXP" fi diff --git a/scripts/sh/run_job_sp_canfar_v2.0.bash b/scripts/sh/run_job_sp_canfar_v2.0.bash index 03df41ff6..e8a327b68 100755 --- a/scripts/sh/run_job_sp_canfar_v2.0.bash +++ b/scripts/sh/run_job_sp_canfar_v2.0.bash @@ -155,7 +155,6 @@ function message() { } - # Initialise exposure work directory: create dirs, exp_numbers file, config symlink. # The exp_numbers-000-000.txt file is created only once (skipped if already exists). # Args: $1 = exp_id, $2 = exp_work_dir @@ -215,11 +214,17 @@ function run_exp_job() { (( n_total++ )) - # exp_id e.g. "2182795p": ab = first 2 chars, abcdefg = all but last char + # exp_id e.g. "2182795p" (data) or "208659" (image_sims) + # Strip trailing letter if present (data format); keep full id if numeric only. local exp_prefix="${exp_id:0:2}" - local exp_base="${exp_id%?}" + local exp_base + if [[ "${exp_id: -1}" =~ [a-zA-Z] ]]; then + exp_base="${exp_id%?}" + else + exp_base="$exp_id" + fi local exp_id_disp="${exp_prefix}/${exp_base}" - local exp_work_dir="$HOME/v${version}/exp/$exp_prefix/$exp_base" + local exp_work_dir="$dir/exp/$exp_prefix/$exp_base" local exp_log_file="$exp_work_dir/job_sp_canfar_v2.0.log" # Create exp_numbers-000-000.txt and cfis link if not existent @@ -327,7 +332,7 @@ function run_exp_job() { echo "$(basename "$0") -j $exp_job -e $exp_id" > "$exp_log_file" echo "pwd=`pwd`" - command "job_sp_canfar_v2.0.bash -p $psf -r $retrieve --tile_det $tile_det --tile_mask $tile_mask -j $exp_job --n_smp $N_SMP --nsh_jobs $N_SMP $debug_flag" $dry_run 2>&1 | tee -a "$exp_log_file" + command "job_sp_canfar_v2.0.bash -c $config_dir -p $psf -r $retrieve --tile_det $tile_det --tile_mask $tile_mask -j $exp_job --n_smp $N_SMP --nsh_jobs $N_SMP $debug_flag" $dry_run 2>&1 | tee -a "$exp_log_file" echo "Done with job_sp_canfar_v2.0.bash" done < "$exp_numbers_file" @@ -461,7 +466,7 @@ function run_tile_job() { command "update_runs_log_file.py" $dry_run # Run job script - command "job_sp_canfar_v2.0.bash -p $psf -r $retrieve --tile_det $tile_det --tile_mask $tile_mask -j $tile_job --n_smp $N_SMP --nsh_jobs $N_SMP $debug_flag" $dry_run 2>&1 | tee -a "$log_file" + command "job_sp_canfar_v2.0.bash -c $config_dir -p $psf -r $retrieve --tile_det $tile_det --tile_mask $tile_mask -j $tile_job --n_smp $N_SMP --nsh_jobs $N_SMP $debug_flag" $dry_run 2>&1 | tee -a "$log_file" } @@ -470,16 +475,20 @@ if [ "$type" == "data" ]; then echo "Running on data" retrieve="vos" config_dir=$HOME/shapepipe/example/cfis + export SP_DIR=$dir + export SP_CONFIG=$config_dir elif [ "$type" == "image_sims" ]; then echo "Running on image simulations" retrieve="symlink" config_dir=$HOME/shapepipe/example/cfis_image_sims - # SP_DIR points to the run directory (grid level) where input_tiles and - # input_exp live; configs use $SP_DIR/input_* so those dirs stay outside - # SP_RUN and are not found twice by ShapePipe's recursive glob scan. + # SP_DIR points to the run directory where input_tiles and input_exp live; + # configs use $SP_DIR/input_* so those dirs stay outside SP_RUN and are + # not found twice by ShapePipe's recursive glob scan. export SP_DIR=$dir + export SP_CONFIG=$config_dir + tile_det='sx' else @@ -510,7 +519,7 @@ if [ "$ID" == "-1" ]; then fi if [ "$psf" != "psfex" ] && [ "$psf" != "mccd" ]; then - message "PSF (option -p) needs to be 'psfex' or 'mccd'" "$debug_out" 4 + message "PSF (option -p) needs to be 'psfex' or 'mccd', not '$psf'" "$debug_out" 4 fi if [ "$dry_run" != "0" ] && [ "$dry_run" != "1" ]; then @@ -633,8 +642,12 @@ fi (( do_job = job & 64 )) if [[ $do_job != 0 ]]; then - # Job 64: process stars on exposures, PSF model - if [ "$psf" == "psfex" ]; then + # Job 64: PSF model + # For image_sims: build fake PSF stamps at tile level from pre-computed dict + # For data: run full exposure-level PSF modelling pipeline + if [ "$type" == "image_sims" ]; then + run_tile_job 64 "fpsf" "fake_psf_runner:1" + elif [ "$psf" == "psfex" ]; then run_exp_job 64 "SxSePsf${Letter}i" "sextractor_runner:80 psfex_runner:80 psfex_interp_runner:40::warn setools_runner:80:rand_split" else message "MCCD not implemented yet for v2.0" "$debug_out" 10 @@ -659,8 +672,12 @@ fi (( do_job = job & 512 )) if [[ $do_job != 0 ]]; then - # Job 512: process tiles (PSF interp, vignet) - run_tile_job 512 "${Letter}iViVi ${Letter}iViVi ${Letter}iViVi" "psfex_interp_runner:1 vignetmaker_runner_run_1:1 vignetmaker_runner_run_2:4" + # Job 512: process tiles ([PSF interp,] vignets) + if [ "$type" == "data" ]; then + run_tile_job 512 "${Letter}iViVi ${Letter}iViVi ${Letter}iViVi" "psfex_interp_runner:1 vignetmaker_runner_run_1:1 vignetmaker_runner_run_2:4" + else + run_tile_job 512 "ViVi VViVi" "vignetmaker_runner_run_1:1 vignetmaker_runner_run_2:4" + fi fi (( do_job = job & 1024 )) diff --git a/src/shapepipe/modules/fake_psf_package/__init__.py b/src/shapepipe/modules/fake_psf_package/__init__.py new file mode 100644 index 000000000..c77dd5658 --- /dev/null +++ b/src/shapepipe/modules/fake_psf_package/__init__.py @@ -0,0 +1,28 @@ +"""FAKE PSF PACKAGE. + +This package creates fake PSF postage-stamp dictionaries for image simulations. + +:Author: Martin Kilbinger + +:Parent module: ``sextractor_runner`` + +:Input: Tile SExtractor catalogue (multi-epoch FITS format) + +:Output: Per-galaxy PSF sqlite dictionary (``galaxy_psf-XXX-XXX.sqlite``) + +Description +=========== + +For each galaxy in the tile catalogue the module looks up the pre-computed +PSF stamp for every contributing exposure-CCD combination from a pickled +PSF dictionary and writes the result to a SqliteDict file in the same format +produced by ``psfex_interp_runner``. + +Module-specific config file entries +==================================== + +PSF_DICT_PATH : str + Path to the pickled PSF dictionary (``Full_psf_dict.pickle``). +""" + +__all__ = ["fake_psf.py"] diff --git a/src/shapepipe/modules/fake_psf_package/fake_psf.py b/src/shapepipe/modules/fake_psf_package/fake_psf.py new file mode 100644 index 000000000..4c2ac4bc2 --- /dev/null +++ b/src/shapepipe/modules/fake_psf_package/fake_psf.py @@ -0,0 +1,89 @@ +"""FAKE PSF. + +Create fake PSF postage-stamp dictionaries for image simulations. + +:Author: Martin Kilbinger + +""" + +import pickle + +import numpy as np +from astropy.io import fits +from sqlitedict import SqliteDict + + +class FakePsf: + """Fake PSF. + + Parameters + ---------- + sexcat_path : str + Path to the tile SExtractor catalogue (multi-epoch FITS format). + psf_dict_path : str + Path to the pickled PSF dictionary. + output_path : str + Path for the output SqliteDict file. + w_log : logging.Logger + Pipeline logger. + """ + + def __init__(self, sexcat_path, psf_dict_path, output_path, w_log): + self._sexcat_path = sexcat_path + self._psf_dict_path = psf_dict_path + self._output_path = output_path + self._w_log = w_log + + def process(self): + """Run fake PSF creation.""" + self._w_log.info(f"Reading sexcat: {self._sexcat_path}") + sex = fits.open(self._sexcat_path, ignore_missing_simple=True) + + self._w_log.info(f"Loading PSF dictionary: {self._psf_dict_path}") + with open(self._psf_dict_path, "rb") as f: + psf_dict = pickle.load(f) + + n_gal = len(sex[3].data.field("NUMBER")) + n_exp = len(sex) - 3 + + self._w_log.info(f"Processing {n_gal} galaxies over {n_exp} epochs") + + # Build (n_gal, n_exp) array of "expname-ccdnum" strings + string_array = np.empty((n_gal, n_exp), dtype="object") + for exp_ind in range(3, len(sex)): + ccd_n = sex[exp_ind].data.field("CCD_N") + exp_name = sex[exp_ind].data.field("EXP_NAME") + string_array[:, exp_ind - 3] = [ + f"{int(exp)}-{int(ccd)}" + for exp, ccd in zip(exp_name, ccd_n) + ] + + # Mask invalid entries (exp or ccd == -99 produces strings with "--") + mask = np.array( + [["--" in item for item in row] for row in string_array] + ) + masked = np.ma.masked_array(string_array, mask) + + # Build per-galaxy PSF dictionaries + output_file = SqliteDict(self._output_path) + missing = 0 + for idx, gal_row in enumerate(masked): + galaxy_number = idx + 1 # 1-based, matches NUMBER field + gal_dict = {} + for exp_ccd in gal_row.compressed(): + if exp_ccd not in psf_dict: + missing += 1 + self._w_log.warning( + f"Galaxy {galaxy_number}: key '{exp_ccd}' not in PSF dict" + ) + continue + gal_dict[exp_ccd] = psf_dict[exp_ccd] + output_file[str(galaxy_number)] = gal_dict + + output_file.commit() + output_file.close() + sex.close() + + if missing: + self._w_log.warning(f"{missing} missing PSF entries across all galaxies") + self._w_log.info(f"Written: {self._output_path}") diff --git a/src/shapepipe/modules/fake_psf_runner.py b/src/shapepipe/modules/fake_psf_runner.py new file mode 100644 index 000000000..d3dcbbc0e --- /dev/null +++ b/src/shapepipe/modules/fake_psf_runner.py @@ -0,0 +1,36 @@ +"""FAKE PSF RUNNER. + +Module runner for ``fake_psf``. + +:Author: Martin Kilbinger + +""" + +from shapepipe.modules.fake_psf_package import fake_psf +from shapepipe.modules.module_decorator import module_runner + + +@module_runner( + version="1.0", + file_pattern=["sexcat"], + file_ext=".fits", + depends=["numpy", "astropy", "sqlitedict"], + numbering_scheme="-000-000", +) +def fake_psf_runner( + input_file_list, + run_dirs, + file_number_string, + config, + module_config_sec, + w_log, +): + """Define The Fake PSF Runner.""" + sexcat_path = input_file_list[0] + psf_dict_path = config.getexpanded(module_config_sec, "PSF_DICT_PATH") + output_path = f'{run_dirs["output"]}/galaxy_psf{file_number_string}.sqlite' + + inst = fake_psf.FakePsf(sexcat_path, psf_dict_path, output_path, w_log) + inst.process() + + return None, None diff --git a/src/shapepipe/modules/mask_package/__init__.py b/src/shapepipe/modules/mask_package/__init__.py index bdfece65b..8f203eafd 100644 --- a/src/shapepipe/modules/mask_package/__init__.py +++ b/src/shapepipe/modules/mask_package/__init__.py @@ -83,8 +83,6 @@ environment will be used WW_CONFIG_FILE : str Path to the WeightWatcher configuration file -CDSCLIENT_PATH : str, optional - Path to CDS client executable; required if ``USE_EXT_STAR = False`` [BORDER_PARAMETERS] ------------------- diff --git a/src/shapepipe/modules/mask_package/mask.py b/src/shapepipe/modules/mask_package/mask.py index 17e052a6e..4e07e054d 100644 --- a/src/shapepipe/modules/mask_package/mask.py +++ b/src/shapepipe/modules/mask_package/mask.py @@ -172,17 +172,8 @@ def _get_config(self): self._config["PATH"]["WW_configfile"] = conf.getexpanded( "PROGRAM_PATH", "WW_CONFIG_FILE" ) - if conf.has_option("PROGRAM_PATH", "CDSCLIENT_PATH"): - self._config["PATH"]["CDSclient"] = conf.getexpanded( - "PROGRAM_PATH", "CDSCLIENT_PATH" - ) - elif self._star_cat_path is not None: + if self._star_cat_path is not None: self._config["PATH"]["star_cat"] = self._star_cat_path - else: - raise ValueError( - "Either [PROGRAM_PATH]:CDSCLIENT_PATH in the mask config file " - + " or a star catalogue as module input needs to be present" - ) self._config["PATH"]["temp_dir"] = self._get_temp_dir_path( conf.getexpanded("OTHER", "TEMP_DIRECTORY") @@ -429,9 +420,11 @@ def make_mask(self): ) # Handle stdout / stderr - general_stdout = f"\nCDSClient\n{self._CDS_stdout}" + general_stdout = "" general_stderr = "" - if self._CDS_stderr != "": + if hasattr(self, "_CDS_stdout"): + general_stdout += f"\nCDSClient\n{self._CDS_stdout}" + if hasattr(self, "_CDS_stderr") and self._CDS_stderr != "": general_stderr += f"\nCDSClient\n{self._CDS_stderr}" if hasattr(self, "_WW_stdout") or hasattr(self, "_WW_stdout"): general_stdout += f"\n\nWeightWatcher\n{self._WW_stdout}" diff --git a/src/shapepipe/modules/ngmix_package/ngmix.py b/src/shapepipe/modules/ngmix_package/ngmix.py index b2516e605..a1f204252 100644 --- a/src/shapepipe/modules/ngmix_package/ngmix.py +++ b/src/shapepipe/modules/ngmix_package/ngmix.py @@ -149,12 +149,11 @@ def __init__( psf_vignet_path, weight_vignet_path, flag_vignet_path, - f_wcs_path - + f_wcs_path, ): self.f_wcs_file = SqliteDict(f_wcs_path) self.gal_vign_cat = SqliteDict(gal_vignet_path) - self.bkg_vign_cat = SqliteDict(bkg_vignet_path) + self.bkg_vign_cat = SqliteDict(bkg_vignet_path) if bkg_vignet_path is not None else None self.psf_vign_cat = SqliteDict(psf_vignet_path) self.weight_vign_cat = SqliteDict(weight_vignet_path) self.flag_vign_cat = SqliteDict(flag_vignet_path) @@ -162,7 +161,8 @@ def __init__( def close(self): self.f_wcs_file.close() self.gal_vign_cat.close() - self.bkg_vign_cat.close() + if self.bkg_vign_cat is not None: + self.bkg_vign_cat.close() self.flag_vign_cat.close() self.weight_vign_cat.close() self.psf_vign_cat.close() @@ -220,28 +220,35 @@ def __init__( save_batch=-1, id_obj_min=-1, id_obj_max=-1, + bkg_sub=True, ): - if len(input_file_list) != 6: + n_expected = 6 if bkg_sub else 5 + if len(input_file_list) != n_expected: raise IndexError( f"Input file list has length {len(input_file_list)}," - + " required is 6" + + f" required is {n_expected}" ) self._tile_cat_path = input_file_list[0] + if bkg_sub: + bkg_path, psf_path, weight_path, flag_path = ( + input_file_list[2], input_file_list[3], + input_file_list[4], input_file_list[5], + ) + else: + bkg_path, psf_path, weight_path, flag_path = ( + None, input_file_list[2], + input_file_list[3], input_file_list[4], + ) self._vignet_cat = Vignet( input_file_list[1], - input_file_list[2], - input_file_list[3], - input_file_list[4], - input_file_list[5], - f_wcs_path + bkg_path, + psf_path, + weight_path, + flag_path, + f_wcs_path, ) - #self._gal_vignet_path = input_file_list[1] - #self._bkg_vignet_path = input_file_list[2] - #self._psf_vignet_path = input_file_list[3] - #self._weight_vignet_path = input_file_list[4] - #self._flag_vignet_path = input_file_list[5] @@ -258,6 +265,7 @@ def __init__( self._save_batch = save_batch self._id_obj_min = id_obj_min self._id_obj_max = id_obj_max + self._bkg_sub = bkg_sub self._w_log = w_log @@ -423,6 +431,8 @@ def compile_results(self, results): output_dict[name]["g2_err"].append( np.sqrt(results[idx][name]["g_cov"][1, 1]) ) + output_dict[name]["flux"].append(results[idx][name]["flux"]) + output_dict[name]["flux_err"].append(results[idx][name]["flux_err"]) output_dict[name]["mag"].append(mag) output_dict[name]["mag_err"].append(mag_err) @@ -640,7 +650,7 @@ def process(self): n_empty_cat += 1 continue - stamp = prepare_postage_stamps(vignet_cat, obj_id, i_tile, tile_cat) + stamp = prepare_postage_stamps(vignet_cat, obj_id, i_tile, tile_cat, self._bkg_sub) if len(stamp.gals) == 0: n_no_epoch += 1 @@ -720,9 +730,9 @@ def process(self): # Save results self.save_results(res_dict) -def prepare_postage_stamps(vignet, obj_id, i_tile, tile_cat): +def prepare_postage_stamps(vignet, obj_id, i_tile, tile_cat, bkg_sub=True): # define per-object lists of individual exposures to go into ngmix - stamp = Postage_stamp() + stamp = Postage_stamp(bkg_sub=bkg_sub) #identify exposure and ccd number from psf catalog psf_expccd_names = list(vignet.psf_vign_cat[str(obj_id)].keys()) for expccd_name in psf_expccd_names: diff --git a/src/shapepipe/modules/ngmix_runner.py b/src/shapepipe/modules/ngmix_runner.py index b0e91a236..d52c1b427 100644 --- a/src/shapepipe/modules/ngmix_runner.py +++ b/src/shapepipe/modules/ngmix_runner.py @@ -47,9 +47,6 @@ def ngmix_runner( # Pixel scale pixel_scale = config.getfloat(module_config_sec, "PIXEL_SCALE") - # Path to merged single-exposure single-HDU headers - f_wcs_path = input_file_list[6] - # Input directory to check for already retrieved files if config.has_option(module_config_sec, "CHECK_EXISTING_DIR"): check_existing_dir = config.getexpanded( @@ -73,10 +70,16 @@ def ngmix_runner( id_obj_min = config.getint(module_config_sec, "ID_OBJ_MIN") id_obj_max = config.getint(module_config_sec, "ID_OBJ_MAX") + # Background subtraction (disable for image sims where background is absent) + bkg_sub = config.getboolean(module_config_sec, "BKG_SUB", fallback=True) + + # wcs path and vignet slice depend on whether background vignet is present + wcs_idx = 6 if bkg_sub else 5 + f_wcs_path = input_file_list[wcs_idx] + # Initialise class instance - # input_file_list[6] is log_exp_headers, already extracted as f_wcs_path ngmix_inst = Ngmix( - input_file_list[:6], + input_file_list[:wcs_idx], run_dirs["output"], file_number_string, zero_point, @@ -87,6 +90,7 @@ def ngmix_runner( save_batch=save_batch, id_obj_min=id_obj_min, id_obj_max=id_obj_max, + bkg_sub=bkg_sub, ) # Process ngmix shape measurement and metacalibration diff --git a/src/shapepipe/modules/sextractor_package/sextractor_script.py b/src/shapepipe/modules/sextractor_package/sextractor_script.py index 3238edf46..b24d6e6a8 100644 --- a/src/shapepipe/modules/sextractor_package/sextractor_script.py +++ b/src/shapepipe/modules/sextractor_package/sextractor_script.py @@ -95,9 +95,13 @@ def make_post_process(cat_path, f_wcs_path, pos_params, ccd_size): history.append(idx) exp_list = [] - pattern = r"([0-9]*)p\.(.*)" + pattern = r"([0-9]+)p?\.(.*)" for hist in history: m = re.search(pattern, hist) + if m is None: + raise ValueError( + f"Could not parse exposure ID from HISTORY entry: '{hist}'" + ) exp_list.append(m.group(1)) obj_id = np.copy(cat.get_data()["NUMBER"]) diff --git a/src/shapepipe/pipeline/exp_utils.py b/src/shapepipe/pipeline/exp_utils.py index 8f0df784a..4fdca1834 100644 --- a/src/shapepipe/pipeline/exp_utils.py +++ b/src/shapepipe/pipeline/exp_utils.py @@ -31,9 +31,9 @@ def get_exp_output_files( ///output/run_sp_*//output/ - where ``exp_prefix = exp_id[:2]`` and ``exp_base = exp_id[:-1]`` (the - trailing letter, typically ``p``, is stripped because directory names - do not carry it). + where ``exp_prefix = exp_id[:2]`` and ``exp_base`` is ``exp_id`` with the + trailing letter stripped if present (e.g. ``2113864p`` → ``2113864``), or + the full ``exp_id`` for numeric-only IDs (image simulations). Parameters ---------- @@ -88,9 +88,10 @@ def get_exp_output_files( for exp_id in exp_ids: # Directory structure mirrors run_job_canfar_v2.0.sh: # exp_prefix = first 2 chars of exp_id (e.g. "21") - # exp_base = exp_id without trailing letter (e.g. "2113864") + # exp_base = exp_id without trailing letter if present (e.g. "2113864"), + # or full exp_id for numeric-only ids (image sims) exp_prefix = exp_id[:2] - exp_base = exp_id[:-1] + exp_base = exp_id[:-1] if exp_id[-1].isalpha() else exp_id pattern = os.path.join( exp_base_dir, @@ -183,7 +184,7 @@ def get_exp_output_dirs( for exp_id in exp_ids: exp_prefix = exp_id[:2] - exp_base = exp_id[:-1] + exp_base = exp_id[:-1] if exp_id[-1].isalpha() else exp_id pattern = os.path.join( exp_base_dir, From f7fffdc17382bea0570c7d0c758f70846d089465 Mon Sep 17 00:00:00 2001 From: martinkilbinger Date: Fri, 5 Jun 2026 18:29:42 +0200 Subject: [PATCH 04/24] Running on im sims incl final cat --- ...sfex_nosm.ini => config_tile_Mc_psfex.ini} | 0 .../cfis_image_sims/config_tile_Mc_psfex.ini | 73 +++++++ scripts/sh/job_sp_canfar_v2.0.bash | 4 +- scripts/sh/run_job_sp_canfar_v2.0.bash | 9 +- scripts/sh/run_scratch_local.sh | 178 ------------------ .../modules/make_cat_package/make_cat.py | 71 ------- src/shapepipe/modules/make_cat_runner.py | 49 +---- 7 files changed, 86 insertions(+), 298 deletions(-) rename example/cfis/{config_make_cat_psfex_nosm.ini => config_tile_Mc_psfex.ini} (100%) create mode 100644 example/cfis_image_sims/config_tile_Mc_psfex.ini delete mode 100755 scripts/sh/run_scratch_local.sh diff --git a/example/cfis/config_make_cat_psfex_nosm.ini b/example/cfis/config_tile_Mc_psfex.ini similarity index 100% rename from example/cfis/config_make_cat_psfex_nosm.ini rename to example/cfis/config_tile_Mc_psfex.ini diff --git a/example/cfis_image_sims/config_tile_Mc_psfex.ini b/example/cfis_image_sims/config_tile_Mc_psfex.ini new file mode 100644 index 000000000..795f07806 --- /dev/null +++ b/example/cfis_image_sims/config_tile_Mc_psfex.ini @@ -0,0 +1,73 @@ +# ShapePipe post-run configuration file: create final catalogs for psfex + + +## Default ShapePipe options +[DEFAULT] + +# verbose mode (optional), default: True, print messages on terminal +VERBOSE = True + +# Name of run (optional) default: shapepipe_run +RUN_NAME = run_sp_Mc + +# Add date and time to RUN_NAME, optional, default: True +; RUN_DATETIME = False + + +## ShapePipe execution options +[EXECUTION] + +# Module name, single string or comma-separated list of valid module runner names +MODULE = make_cat_runner + +# Parallel processing mode, SMP or MPI +MODE = SMP + + +## ShapePipe file handling options +[FILE] + +# Log file master name, optional, default: shapepipe +LOG_NAME = log_sp + +# Runner log file name, optional, default: shapepipe_runs +RUN_LOG_NAME = log_run_sp + +# Input directory, containing input files, single string or list of names with length matching FILE_PATTERN +INPUT_DIR = . + +# Output directory +OUTPUT_DIR = ./output + + +## ShapePipe job handling options +[JOB] + +# Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial +SMP_BATCH_SIZE = 8 + +# Timeout value (optional), default is None, i.e. no timeout limit applied +TIMEOUT = 96:00:00 + + +## Module options + +[MAKE_CAT_RUNNER] + +# Input directory, containing input files, single string or list of names with length matching FILE_PATTERN +INPUT_DIR = run_sp_tile_Sx:sextractor_runner, last:fake_psf_runner, last:ngmix_runner + +# Input file pattern(s), list of strings with length matching number of expected input file types +# Cannot contain wild cards +FILE_PATTERN = sexcat, galaxy_psf, ngmix + +# FILE_EXT (optional) list of string extensions to identify input files +FILE_EXT = .fits, .sqlite, .fits + +# Numbering convention, string that exemplifies a numbering pattern. +# Matches input single exposures (with 'p' removed) +# Needs to be given in this section, will be updated in module +# sections below +NUMBERING_SCHEME = -000-000 + +SHAPE_MEASUREMENT_TYPE = ngmix diff --git a/scripts/sh/job_sp_canfar_v2.0.bash b/scripts/sh/job_sp_canfar_v2.0.bash index 19ca661e2..0ea0d965a 100755 --- a/scripts/sh/job_sp_canfar_v2.0.bash +++ b/scripts/sh/job_sp_canfar_v2.0.bash @@ -467,11 +467,9 @@ fi (( do_job = $job & 2048 )) if [[ $do_job != 0 ]]; then - suff_sm="_nosm" - ### Merge all relevant information into final catalogue command_cfg_shapepipe \ - "config_tile_make_cat_$psf${suff_sm}.ini" \ + "config_tile_Mc_$psf.ini" \ "Run shapepipe (tile: create final cat $psf)" \ $n_smp \ $exclusive diff --git a/scripts/sh/run_job_sp_canfar_v2.0.bash b/scripts/sh/run_job_sp_canfar_v2.0.bash index e8a327b68..f1179f493 100755 --- a/scripts/sh/run_job_sp_canfar_v2.0.bash +++ b/scripts/sh/run_job_sp_canfar_v2.0.bash @@ -683,9 +683,16 @@ fi (( do_job = job & 1024 )) if [[ $do_job != 0 ]]; then # Job 1024: shape measurement - run_tile_job 1024 "Ng" "ngmix_interp_runner:1" + run_tile_job 1024 "Ng" "ngmix_runner:1" fi +(( do_job = job & 2048 )) +if [[ $do_job != 0 ]]; then + # Job 2048: merge catalogues + run_tile_job 2048 "Mc_${psf}" "make_cat_runner:1" +fi + + if [ -n "$scratch" ]; then message "Syncing output from scratch back to permanent dir" "$debug_out" -1 command "rsync -a output/ $work_dir/output/" $dry_run diff --git a/scripts/sh/run_scratch_local.sh b/scripts/sh/run_scratch_local.sh deleted file mode 100755 index 1223e6683..000000000 --- a/scripts/sh/run_scratch_local.sh +++ /dev/null @@ -1,178 +0,0 @@ -#!/bin/bash - -# Command line arguments -## Default values -job=-1 -ID=-1 -N_SMP=1 -dry_run=0 -dir=`pwd` -debug_out=-1 -scratch=/n17data/`whoami`/scratch -exec_path=$HOME/shapepipe/scripts/sh -slurm=1 - -# mh_local is 0 (1) if merge_header_runner is run on all exposures, -# which is standard so far (run on exposures of given tile only; new) -mh_local=0 - -# sp_local is 0 (1) is split_headers_runner and mask_runner is run -# on all exposures (locally). Not 100% automatic yet. -sp_local=1 -VERBOSE=1 - -pat="-- " - -# Help string -usage="Usage: $(basename "$0") -j JOB -e ID -k KIND [OPTIONS] -\n\nOptions:\n - -h\tthis message\n - -j, --job JOB\tRUnning JOB, bit-coded\n - -e, --exclusive ID - \timage ID\n - -p, --psf MODEL\n - \tPSF model, one in ['psfex'|'mccd'], default='$psf'\n - -m, --mh_local MH\n - \tmerged header file local (MH=0) or global (MH=1); default is $mh_local\n - -N, --N_SMP N_SMOp\n - \tnumber of jobs (SMP mode only), default from original config files\n - -d, --directory\n - \trun directory, default is pwd ($dir)\n - -S, --scratch\n - \tprocessing scratch directory, default is $scratch\n - -n, --dry_run LEVEL\n - \tdry run, no actuall processing\n - --debug_out PATH\n - \tdebug output file PATH, default not used\n -" - -## Help if no arguments -if [ -z $1 ]; then - echo -ne $usage - exit 1 -fi - -## Parse command line -while [ $# -gt 0 ]; do - case "$1" in - -h) - echo -ne $usage - exit 0 - ;; - -j|--job) - job="$2" - shift - ;; - -e|--exclusive) - ID="$2" - shift - ;; - -p|--psf) - psf="$2" - shift - ;; - -m|--mh_local) - mh_local="$2" - shift - ;; - -N|--N_SMP) - N_SMP="$2" - shift - ;; - -d|--directory) - dir="$2" - shift - ;; - -S|--scratch) - scratch="$2" - shift - ;; - -n|--dry_run) - dry_run="$2" - shift - ;; - --debug_out) - debug_out="$2" - shift - ;; - esac - shift -done - -## Check options -if [ "$job" == "-1" ]; then - echo "No job indicated, use option -j" - exit 2 -fi - -if [ "$exclusive" == "-1" ]; then - echo "No image ID indicated, use option -e" - exit 3 -fi - -if [ "$psf" != "psfex" ] && [ "$psf" != "mccd" ]; then - echo "PSF (option -p) needs to be 'psfex' or 'mccd'" - exit 4 -fi - - -source $HOME/shapepipe/scripts/sh/functions.sh - - -kind=$(get_kind_from_job $job) - - -# Load common functions -source $HOME/shapepipe/scripts/sh/functions.sh - - -# Start script - -if [ "$scratch" != "-1" ]; then - - command "mkdir -p $scratch/${kind}_runs" $dry_run - command "cp -R ${kind}_runs/$ID $scratch/${kind}_runs" $dry_run - command "cd $scratch" $dry_run - -fi - - if [ "$slurm" == "0" ]; then - command "init_run_exclusive_canfar.sh -j $job -p $psf -m $mh_local -N $N_SMP -e $ID" $dry_run - else - STATUS=$(sbatch --output=./sbatch-$ID.out --partition=comp --job-name="j${job}_${ID}" --ntasks-per-node=$N_SMP --time=32:00:00 --mem=64G $exec_path/init_run_exclusive_canfar.sh -j $job -p $psf -m $mh_local -N $N_SMP -e $ID) - - JOB_ID=$(echo $STATUS | cut -d ' ' -f 4) - echo "JOB_ID=$JOB_ID" - - # Wait for the job to finish - while true; do - STATUS=$(squeue -j "$JOB_ID" -h -o "%T") - if [[ -z "$STATUS" ]]; then - echo "job $JOB_ID no longer in the queue" - break - fi - - echo "Waiting for job $JOB_ID in state '$STATUS' to complete..." - sleep 10 - done - - echo "Job $JOB_ID has completed. Proceeding with the script..." - fi - -if [ "$scratch" != "-1" ]; then - - if [ "$job" == "32" ]; then - command "mv ${kind}_runs/$ID/output/run_sp_exp_SxSe* $dir/${kind}_runs/$ID/output" $dry_run - elif [ "$job" == "64" ]; then - command "mv ${kind}_runs/$ID/output/run_sp_tile_PsViSm* $dir/${kind}_runs/$ID/output" $dry_run - elif [ "$job" == "128" ]; then - command "mv ${kind}_runs/$ID/output/run_sp_tile_ngmix_* $dir/${kind}_runs/$ID/output" $dry_run - fi - - command "rm -rf ${kind}_runs/$ID" $dry_run - command "cd $dir/${kind}_runs/$ID" $dry_run - # Gave Input/Output python error - #command "update_runs_log_file.py" $dry_run - command "cd $dir" $dry_run - -fi diff --git a/src/shapepipe/modules/make_cat_package/make_cat.py b/src/shapepipe/modules/make_cat_package/make_cat.py index 72024b3a4..8966bbdae 100644 --- a/src/shapepipe/modules/make_cat_package/make_cat.py +++ b/src/shapepipe/modules/make_cat_package/make_cat.py @@ -139,77 +139,6 @@ def save_sextractor_data(final_cat_file, sexcat_path, remove_vignet=True): return cat_size -def save_sm_data( - final_cat_file, - sexcat_sm_path, - do_classif=True, - star_thresh=0.003, - gal_thresh=0.01, - n_obj=-1, -): - r"""Save Spread-Model Data. - - Save the spread-model data into the final catalogue. - - Parameters - ---------- - final_cat_file : file_io.FITSCatalogue - Final catalogue - sexcat_sm_path : str - Path to spread-model catalogue to save. If ``None``, spread_model is - set to 99 - do_classif : bool - If ``True`` objects will be classified into stars, galaxies, and other, - using the classifier - :math:`{\rm class} = {\rm sm} + 2 * {\rm sm}_{\rm err}` - star_thresh : float - Threshold for star selection; object is classified as star if - :math:`|{\rm class}| <` ``star_thresh`` - gal_thresh : float - Threshold for galaxy selection; object is classified as galaxy if - :math:`{\rm class} >` ``gal_thresh`` - nobj : int, optional - Number of objects, only used if sexcat_sm_path is ``-1`` - - Returns - ------- - int - Number of objects saved - """ - final_cat_file.open() - - if sexcat_sm_path is not None: - sexcat_sm_file = file_io.FITSCatalogue( - sexcat_sm_path, - SEx_catalogue=True, - ) - sexcat_sm_file.open() - - sm = np.copy(sexcat_sm_file.get_data()["SPREAD_MODEL"]) - sm_err = np.copy(sexcat_sm_file.get_data()["SPREADERR_MODEL"]) - - sexcat_sm_file.close() - - else: - sm = np.ones(n_obj) * 99 - sm_err = np.ones(n_obj) * 99 - - final_cat_file.add_col("SPREAD_MODEL", sm) - final_cat_file.add_col("SPREADERR_MODEL", sm_err) - - if do_classif: - obj_flag = np.ones_like(sm, dtype="int16") * 2 - classif = sm + 2.0 * sm_err - obj_flag[np.where(np.abs(classif) < star_thresh)] = 0 - obj_flag[np.where(classif > gal_thresh)] = 1 - - final_cat_file.add_col("SPREAD_CLASS", obj_flag) - - final_cat_file.close() - - return n_obj - - class SaveCatalogue: """Save Catalogue. diff --git a/src/shapepipe/modules/make_cat_runner.py b/src/shapepipe/modules/make_cat_runner.py index fa1fc43ff..6c2fe0015 100644 --- a/src/shapepipe/modules/make_cat_runner.py +++ b/src/shapepipe/modules/make_cat_runner.py @@ -22,7 +22,7 @@ ], file_pattern=[ "tile_sexcat", - "sexcat_sm", + "sexcat", "galaxy_psf", "ngmix", ], @@ -46,30 +46,10 @@ def make_cat_runner( galaxy_psf_path, shape1_cat_path, ) = input_file_list - sexcat_sm_path = None else: - # With spread model input - ( - tile_sexcat_path, - sexcat_sm_path, - galaxy_psf_path, - shape1_cat_path, - ) = input_file_list[0:4] - if len(input_file_list) == 5: - # With second shape catalogue input - shape2_cat_path = input_file_list[4] - - # Fetch classification options - do_classif = config.getboolean( - module_config_sec, - "SM_DO_CLASSIFICATION", - ) - if do_classif: - star_thresh = config.getfloat(module_config_sec, "SM_STAR_THRESH") - gal_thresh = config.getfloat(module_config_sec, "SM_GAL_THRESH") - else: - star_thresh = None - gal_thresh = None + raise IndexError( + f"Invalid number of input files {len(input_file_List)}, expected 3." + ) # Fetch shape measurement type shape_type_list = config.getlist( @@ -99,27 +79,6 @@ def make_cat_runner( n_obj = make_cat.save_sextractor_data(final_cat_file, tile_sexcat_path) cat_size_sextractor = n_obj - # Save spread-model data - if sexcat_sm_path is None: - w_log.info("No sm cat input, setting spread model to 99") - else: - w_log.info("Save spread-model data") - cat_size_sm = make_cat.save_sm_data( - final_cat_file, - sexcat_sm_path, - do_classif, - star_thresh, - gal_thresh, - n_obj=n_obj - ) - - if cat_size_sextractor != cat_size_sm: - w_log( - f"Warnign: SExtractor catalogue {tile_sexcat_path} has different size" - + f" ({cat_size_sextractor} than spread_model catalogue" - + f" {sexcat_sm_path} ({cat_size_sm})" - ) - # Save shape data sc_inst = make_cat.SaveCatalogue(final_cat_file, cat_size_sextractor, w_log) w_log.info("Save shape measurement data") From 374ab51c3df3a2bc159a5d83a78bbf2aabb40b0c Mon Sep 17 00:00:00 2001 From: martinkilbinger Date: Tue, 9 Jun 2026 11:44:48 +0200 Subject: [PATCH 05/24] =?UTF-8?q?=C3=83=C3=83=C3=82added=20datetime=20to?= =?UTF-8?q?=20Mh=20run?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../cfis_image_sims/config_tile_Mh_exp.ini | 2 +- scripts/sh/run_image_sims.sh | 34 +++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 scripts/sh/run_image_sims.sh diff --git a/example/cfis_image_sims/config_tile_Mh_exp.ini b/example/cfis_image_sims/config_tile_Mh_exp.ini index 0d3f9f8b3..3fb649997 100644 --- a/example/cfis_image_sims/config_tile_Mh_exp.ini +++ b/example/cfis_image_sims/config_tile_Mh_exp.ini @@ -14,7 +14,7 @@ VERBOSE = True RUN_NAME = run_sp_tile_Mh_exp # Add date and time to RUN_NAME, optional, default: True -RUN_DATETIME = False +RUN_DATETIME = True ## ShapePipe execution options diff --git a/scripts/sh/run_image_sims.sh b/scripts/sh/run_image_sims.sh new file mode 100644 index 000000000..8a6d6e909 --- /dev/null +++ b/scripts/sh/run_image_sims.sh @@ -0,0 +1,34 @@ +base=$HOME/v2.0/image_sims + +type="grid" + +tile_ID="233.293" + +if [ "$type" == "grid" ]; + str_type="_grid" +else + str_type="" + +job=4091 + +num=1 + +dgs=("m" "z" "p") + +mkdir -p $base/$type +cd $base/$type + +for dg in dgs; do + + name="1${dg}2${dx}${str_type}_$num" + + init_run_v2.0.sh -t image_sims -s $name + + cd $name + + apptainer exec --bind /n09data,/home \ + run_job_sp_canfar_v2.0.bash -e ${tile_ID} -t image_sims -j $job + +done + +#--env PATH=/home/mkilbing/.local/bin:/home/mkilbing/astro/repositories/github/shapepipe/bin:$PATH --env PYTHONPATH=/home/mkilbing/astro/repositories/github/shapepipe/src:$PYTHONPATH /n17data/mkilbing/shapepipe_ngmix_v2.0.sif From 0731062340143cddad9589193e8b59550f4972ae Mon Sep 17 00:00:00 2001 From: martinkilbinger Date: Tue, 9 Jun 2026 11:49:42 +0200 Subject: [PATCH 06/24] =?UTF-8?q?=C3=82init=20v2.0:=20cd=20in=20new=20subd?= =?UTF-8?q?ir=C3=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/sh/init_run_v2.0.sh | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/scripts/sh/init_run_v2.0.sh b/scripts/sh/init_run_v2.0.sh index f2b82734f..bf671632e 100755 --- a/scripts/sh/init_run_v2.0.sh +++ b/scripts/sh/init_run_v2.0.sh @@ -77,8 +77,6 @@ elif [ "$type" == "image_sims" ]; then input_dir_base="/n09data/hervas/skills_out" input_dir_tiles="$input_dir_base/$subdir/images/SP_tiles" input_dir_exp="$input_dir_base/$subdir/images/SP_exp" - ln -s $input_dir_tiles input_tiles - ln -s $input_dir_exp input_exp else @@ -92,8 +90,13 @@ echo "Initialising ShapePipe v${version} run directory: $base_dir" echo "" # --- Base directory --- -mkdir -p "$base_dir" -cd "$base_dir" +mkdir -p "$base_dir/$subdir" +cd "$base_dir/$subdir" + +if [ "$type" == "image_sims" ]; then + ln -s "$input_dir_tiles" input_tiles + ln -s "$input_dir_exp" input_exp +fi echo "Creating tiles/ directory..." mkdir -p tiles @@ -111,7 +114,7 @@ mkdir -p debug # --- Config symlink --- -# Config directory (will be symlinked as $base_dir/cfis) +# Config directory (will be symlinked) if [ -L cfis ]; then echo "cfis symlink already exists, skipping" @@ -137,7 +140,7 @@ echo " $n_tiles tiles" echo "" echo "Done. Directory structure:" -echo " $base_dir/" +echo " $base_dir/$subdir" echo " ├── tiles/" echo " ├── exp/" echo " ├── logs/" From 75679705fedddaad69e3b9d2e4e9588b29ceea5b Mon Sep 17 00:00:00 2001 From: martinkilbinger Date: Tue, 9 Jun 2026 11:50:15 +0200 Subject: [PATCH 07/24] =?UTF-8?q?=C3=82=C3=82image=20sims=20run=20updates?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/sh/run_job_sp_canfar_v2.0.bash | 31 +++++++++++++++++++++----- 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/scripts/sh/run_job_sp_canfar_v2.0.bash b/scripts/sh/run_job_sp_canfar_v2.0.bash index f1179f493..f77982bba 100755 --- a/scripts/sh/run_job_sp_canfar_v2.0.bash +++ b/scripts/sh/run_job_sp_canfar_v2.0.bash @@ -596,7 +596,12 @@ IDdec=${ID##*.} (( do_job = job & 1 )) if [[ $do_job != 0 ]]; then # Job 1: download tile images and weights - run_tile_job 1 "Git" "get_images_runner:4" + if [ "$type" == "image_sims" ]; then + n_exp=2 + else + n_exp=4 + fi + run_tile_job 1 "Git" "get_images_runner:${n_exp}" fi (( do_job = job & 2 )) @@ -604,11 +609,20 @@ if [[ $do_job != 0 ]]; then if [ "$type" == "image_sims" ]; then # Image sims weights are already uncompressed; fake the Uz output directory # so downstream jobs can find the weight via last:uncompress_fits_runner. - uz_out="$work_dir/output/run_sp_tile_Uz/uncompress_fits_runner/output" - command "mkdir -p $uz_out" $dry_run weight_src="$dir/input_tiles/CFIS_simu_weight-${ID//./-}.fits" - if [ -e "$weight_src" ] && [ ! -e "$uz_out/$(basename $weight_src)" ]; then - command "ln -sf $weight_src $uz_out/$(basename $weight_src)" $dry_run + if [ "$check" == "1" ]; then + uz_run_dir=$(ls -dt "$work_dir/output/run_sp_tile_Uz"* 2>/dev/null | head -1) + if [ -n "$uz_run_dir" ] && [ -e "$uz_run_dir/uncompress_fits_runner/output/$(basename $weight_src)" ]; then + message "Complete: Uz $(basename $weight_src)" "$debug_out" -1 + else + message "Missing: Uz $(basename $weight_src)" "$debug_out" -1 + fi + else + uz_out="$work_dir/output/run_sp_tile_Uz$(date +_%Y-%m-%d_%H-%M-%S)/uncompress_fits_runner/output" + command "mkdir -p $uz_out" $dry_run + if [ -e "$weight_src" ] && [ ! -e "$uz_out/$(basename $weight_src)" ]; then + command "ln -sf $weight_src $uz_out/$(basename $weight_src)" $dry_run + fi fi else # Job 2: uncompress tile weights @@ -625,7 +639,12 @@ fi (( do_job = job & 8 )) if [[ $do_job != 0 ]]; then # Job 8: retrieve exposure images - run_exp_job 8 "Gie" "get_images_runner:6" + if [ "$type" == "image_sims" ]; then + n_exp=3 + else + n_exp=6 + fi + run_exp_job 8 "Gie" "get_images_runner:${n_exp}" fi (( do_job = job & 16 )) From ec8047ed95c0032f4d2aabf1b593be64c6d1d421 Mon Sep 17 00:00:00 2001 From: martinkilbinger Date: Tue, 9 Jun 2026 11:51:26 +0200 Subject: [PATCH 08/24] =?UTF-8?q?=C3=82small=20changes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/sh/run_job_sp_canfar_v2.0.bash | 4 ++-- src/shapepipe/modules/mask_package/mask.py | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/scripts/sh/run_job_sp_canfar_v2.0.bash b/scripts/sh/run_job_sp_canfar_v2.0.bash index f77982bba..95395879a 100755 --- a/scripts/sh/run_job_sp_canfar_v2.0.bash +++ b/scripts/sh/run_job_sp_canfar_v2.0.bash @@ -662,10 +662,10 @@ fi (( do_job = job & 64 )) if [[ $do_job != 0 ]]; then # Job 64: PSF model - # For image_sims: build fake PSF stamps at tile level from pre-computed dict + # For image_sims: fake PSF runs as part of job 512 (requires sexcat from job 256) # For data: run full exposure-level PSF modelling pipeline if [ "$type" == "image_sims" ]; then - run_tile_job 64 "fpsf" "fake_psf_runner:1" + message "Job 64 (fake PSF) is handled as part of job 512 for image_sims — skipping." "$debug_out" -1 elif [ "$psf" == "psfex" ]; then run_exp_job 64 "SxSePsf${Letter}i" "sextractor_runner:80 psfex_runner:80 psfex_interp_runner:40::warn setools_runner:80:rand_split" else diff --git a/src/shapepipe/modules/mask_package/mask.py b/src/shapepipe/modules/mask_package/mask.py index 4e07e054d..740b1be2c 100644 --- a/src/shapepipe/modules/mask_package/mask.py +++ b/src/shapepipe/modules/mask_package/mask.py @@ -106,9 +106,8 @@ def __init__( # Search path for existing mask files self._check_existing_dir = check_existing_dir - # Set external star catalogue path if given - if star_cat_path is not None: - self._star_cat_path = star_cat_path + # External star catalogue path, if given (None otherwise) + self._star_cat_path = star_cat_path self._hdu = hdu From 1ae8a75115617ee85428e560bd2007001aab0e0d Mon Sep 17 00:00:00 2001 From: martinkilbinger Date: Tue, 9 Jun 2026 13:50:24 +0200 Subject: [PATCH 09/24] output mean e to ngmix log --- example/cfis/config_tile_Mc_psfex.ini | 2 +- example/cfis/config_tile_Mh_exp.ini | 4 ++ .../cfis_image_sims/config_tile_Mc_psfex.ini | 2 +- .../cfis_image_sims/config_tile_Mh_exp.ini | 5 ++ scripts/sh/run_job_sp_canfar_v2.0.bash | 7 ++- src/shapepipe/modules/merge_headers_runner.py | 4 ++ src/shapepipe/modules/ngmix_package/ngmix.py | 58 +++++++++++++++++++ src/shapepipe/pipeline/exp_utils.py | 15 +++-- 8 files changed, 89 insertions(+), 8 deletions(-) diff --git a/example/cfis/config_tile_Mc_psfex.ini b/example/cfis/config_tile_Mc_psfex.ini index 1983f91c7..50f5c0612 100644 --- a/example/cfis/config_tile_Mc_psfex.ini +++ b/example/cfis/config_tile_Mc_psfex.ini @@ -9,7 +9,7 @@ VERBOSE = True # Name of run (optional) default: shapepipe_run -RUN_NAME = run_sp_Mc +RUN_NAME = run_sp_tile_Mc # Add date and time to RUN_NAME, optional, default: True ; RUN_DATETIME = False diff --git a/example/cfis/config_tile_Mh_exp.ini b/example/cfis/config_tile_Mh_exp.ini index 0d3f9f8b3..6b0abb7b3 100644 --- a/example/cfis/config_tile_Mh_exp.ini +++ b/example/cfis/config_tile_Mh_exp.ini @@ -70,3 +70,7 @@ NUMBERING_SCHEME = -000-000 # Root directory containing all per-exposure work directories. # The runner will walk this tree to collect headers-.npy files. EXP_BASE_DIR = $SP_EXP +# If True, log a warning and continue when split_exp_runner output is missing +# for some exposures (e.g. those that failed job 16). If False (default), +# raise an error and stop. +WARN_MISSING_EXP = False diff --git a/example/cfis_image_sims/config_tile_Mc_psfex.ini b/example/cfis_image_sims/config_tile_Mc_psfex.ini index 795f07806..b4ca4bb0e 100644 --- a/example/cfis_image_sims/config_tile_Mc_psfex.ini +++ b/example/cfis_image_sims/config_tile_Mc_psfex.ini @@ -8,7 +8,7 @@ VERBOSE = True # Name of run (optional) default: shapepipe_run -RUN_NAME = run_sp_Mc +RUN_NAME = run_sp_tile_Mc # Add date and time to RUN_NAME, optional, default: True ; RUN_DATETIME = False diff --git a/example/cfis_image_sims/config_tile_Mh_exp.ini b/example/cfis_image_sims/config_tile_Mh_exp.ini index 3fb649997..d213c0776 100644 --- a/example/cfis_image_sims/config_tile_Mh_exp.ini +++ b/example/cfis_image_sims/config_tile_Mh_exp.ini @@ -70,3 +70,8 @@ NUMBERING_SCHEME = -000-000 # Root directory containing all per-exposure work directories. # The runner will walk this tree to collect headers-.npy files. EXP_BASE_DIR = $SP_EXP + +# If True, log a warning and continue when split_exp_runner output is missing +# for some exposures (e.g. those that failed job 16). If False (default), +# raise an error and stop. +WARN_MISSING_EXP = False diff --git a/scripts/sh/run_job_sp_canfar_v2.0.bash b/scripts/sh/run_job_sp_canfar_v2.0.bash index 95395879a..502161caa 100755 --- a/scripts/sh/run_job_sp_canfar_v2.0.bash +++ b/scripts/sh/run_job_sp_canfar_v2.0.bash @@ -16,7 +16,7 @@ ID=-1 psf='psfex' tile_det='uc' tile_mask=0 -N_SMP=1 +N_SMP=-1 dry_run=0 dir=`pwd` debug_out="" @@ -685,16 +685,19 @@ if [[ $do_job != 0 ]]; then if [ "$tile_det" == "uc" ]; then run_tile_job 256 "Gic Uc" "get_images_runner:2 read_ext_sexcat_runner:1" else - run_tile_job 256 "Sx" "sextractor_runner:1" + n_exp=2 + run_tile_job 256 "Sx" "sextractor_runner:$n_exp" fi fi (( do_job = job & 512 )) if [[ $do_job != 0 ]]; then # Job 512: process tiles ([PSF interp,] vignets) + # For image_sims: fake PSF runs first (requires sexcat from job 256), then vignets if [ "$type" == "data" ]; then run_tile_job 512 "${Letter}iViVi ${Letter}iViVi ${Letter}iViVi" "psfex_interp_runner:1 vignetmaker_runner_run_1:1 vignetmaker_runner_run_2:4" else + run_tile_job 64 "fpsf" "fake_psf_runner:1" run_tile_job 512 "ViVi VViVi" "vignetmaker_runner_run_1:1 vignetmaker_runner_run_2:4" fi fi diff --git a/src/shapepipe/modules/merge_headers_runner.py b/src/shapepipe/modules/merge_headers_runner.py index 9bee35159..6c3cfa38e 100644 --- a/src/shapepipe/modules/merge_headers_runner.py +++ b/src/shapepipe/modules/merge_headers_runner.py @@ -43,6 +43,9 @@ def merge_headers_runner( f"Tile-level merge: collecting headers from {exp_base_dir} " f"using {exp_numbers_file}" ) + warn_missing = config.getboolean( + module_config_sec, "WARN_MISSING_EXP", fallback=False + ) headers_file_list = get_exp_output_files( exp_base_dir, exp_numbers_file, @@ -50,6 +53,7 @@ def merge_headers_runner( "headers", ".npy", w_log=w_log, + warn_only=warn_missing, ) # Extract tile number from the exp_numbers filename, e.g. # "exp_numbers-284.272-1.000.txt" -> "-284.272-1.000" diff --git a/src/shapepipe/modules/ngmix_package/ngmix.py b/src/shapepipe/modules/ngmix_package/ngmix.py index a1f204252..ee142d253 100644 --- a/src/shapepipe/modules/ngmix_package/ngmix.py +++ b/src/shapepipe/modules/ngmix_package/ngmix.py @@ -601,6 +601,61 @@ def check_key(self, expccd_name_tmp, vign_cat, vignet_path): + f" file '{vignet_path}'" ) + def log_mean_ellipticity(self): + """Log mean ellipticity from NOSHEAR HDU to the run log. + + Reports , with standard errors for all objects and for + objects passing the default metacal cuts (flags==0, mcal_flags==0, + 10 < SNR < 500, T/Tpsf > 0.5). + """ + output_path = self.get_output_path(self._output_dir) + try: + with fits.open(output_path) as hdul: + d = hdul['NOSHEAR'].data + g1 = d['g1'].astype(float) + g2 = d['g2'].astype(float) + flags = d['flags'] + mcal_flags = d['mcal_flags'] + s2n = d['s2n'].astype(float) + T = d['T'].astype(float) + Tpsf = d['Tpsf'].astype(float) + except Exception as e: + self._w_log.warning(f"Could not compute mean ellipticity: {e}") + return + + n_total = len(g1) + if n_total == 0: + self._w_log.info("Mean ellipticity: no objects in output catalogue") + return + + def _log_stats(g1_sel, g2_sel, label): + n = len(g1_sel) + if n == 0: + self._w_log.info(f" {label}: 0 objects") + return + mean_g1 = g1_sel.mean() + mean_g2 = g2_sel.mean() + err_g1 = g1_sel.std() / np.sqrt(n) + err_g2 = g2_sel.std() / np.sqrt(n) + self._w_log.info( + f" {label} (N={n}):" + f" = {mean_g1:+.4e} +/- {err_g1:.4e}," + f" = {mean_g2:+.4e} +/- {err_g2:.4e}" + ) + + self._w_log.info(f"Mean ellipticity (NOSHEAR, N_total={n_total}):") + _log_stats(g1, g2, "no cuts") + + with np.errstate(invalid='ignore'): + mask = ( + (flags == 0) + & (mcal_flags == 0) + & (s2n >= 10.0) + & (s2n <= 500.0) + & (T / Tpsf >= 0.5) + ) + _log_stats(g1[mask], g2[mask], "SNR in [10, 500], T/Tpsf > 0.5") + def process(self): """Process. @@ -730,6 +785,9 @@ def process(self): # Save results self.save_results(res_dict) + # Log mean ellipticity statistics + self.log_mean_ellipticity() + def prepare_postage_stamps(vignet, obj_id, i_tile, tile_cat, bkg_sub=True): # define per-object lists of individual exposures to go into ngmix stamp = Postage_stamp(bkg_sub=bkg_sub) diff --git a/src/shapepipe/pipeline/exp_utils.py b/src/shapepipe/pipeline/exp_utils.py index 4fdca1834..b934f0bb3 100644 --- a/src/shapepipe/pipeline/exp_utils.py +++ b/src/shapepipe/pipeline/exp_utils.py @@ -22,6 +22,7 @@ def get_exp_output_files( file_pattern, file_ext, w_log=None, + warn_only=False, ): """Collect output files from a per-exposure runner for all tile exposures. @@ -52,6 +53,10 @@ def get_exp_output_files( File extension including the leading dot, e.g. ``.npy`` w_log : logging.Logger, optional Pipeline logger; ``None`` silences all logging + warn_only : bool, optional + If ``True``, log a warning for missing exposures and continue with + the files that were found instead of raising ``FileNotFoundError``. + Default is ``False``. Returns ------- @@ -116,10 +121,12 @@ def get_exp_output_files( w_log.warning(f" {exp_id}: no match for {pattern}") if missing: - raise FileNotFoundError( - f"No {runner_name} output found for " - f"{len(missing)} exposure(s): {missing}" - ) + msg = f"No {runner_name} output found for {len(missing)} exposure(s): {missing}" + if warn_only: + if w_log: + w_log.warning(msg) + else: + raise FileNotFoundError(msg) if w_log: w_log.info(f"Found {len(file_list)} exposure output files") From aa25cc28802d8c5dcc07c9cc7c0392e9c2a29a19 Mon Sep 17 00:00:00 2001 From: martinkilbinger Date: Wed, 10 Jun 2026 10:37:13 +0200 Subject: [PATCH 10/24] doc --- docs/source/pipeline_v2.0.md | 53 +++++++++++++++++++++++++----------- uv.lock | 1 - 2 files changed, 37 insertions(+), 17 deletions(-) diff --git a/docs/source/pipeline_v2.0.md b/docs/source/pipeline_v2.0.md index f089f7e87..1252c1611 100644 --- a/docs/source/pipeline_v2.0.md +++ b/docs/source/pipeline_v2.0.md @@ -15,14 +15,22 @@ v2.0/ │ │ ├── 301.278/ │ │ ├── 301.279/ │ │ └── ... +│ │ ... ├── exp/ │ ├── 21/ │ │ ├── 21163916 │ │ └── ... +│ │ ... ├── cfis -> ├── tile_numbers -> +└── logs/ └── debug/ +Additionally, for image_sims: +├── input_tiles -> +├── input_exp -> + + ### Interactive job from the terminal for a single tile @@ -30,22 +38,35 @@ Run bit-coded jobs ```bash run_job_canfar_v2.0.sh -e ID -j ``` -with job processing tiles: -- 1: download tiles -- 2: uncompress tile weights -- 4: find exposures -then exposures: -- 8: download exposures -- 16: split exposures into single-CCD HDUs -- 32: mask exposures -- 64: process stars (selection, PSF movel) -then back to tiles: -- 128: select objects (using external catalogue) -- 256: create object postage stamps - -## CANFAR Setup - -### CANFAR Login +# with job processing tiles: + 1: retrieve tile images and weights (download/symlink) + 2: uncompress weights (no processing for image_sims) + 4: find exposures +# then exposures + 8: retrieve exposures (download/symlink) +16: split exposures, get WCS header +32: mask exposures +64: process stars on exposures, PSF model (no processing for image sims) +# back to tiles +128: merge exposure WCS headers into tile-level sqlite log +256: object selection on tiles (external cat/SExtractor) +512: postage stamp creation +1024: multi-epoch shape measurement +2048: create final catalogue + +## Candide setup + +## CANFAR setup + +### For image simulations + +#### Download docker image + +```bash +apptainer pull shapepipe_im_sims.sif docker://ghcr.io/cosmostat/shapepipe:im_sims +``` + +### CANFAR login Login to the canfar system with diff --git a/uv.lock b/uv.lock index b3292a696..297e6f756 100644 --- a/uv.lock +++ b/uv.lock @@ -2336,7 +2336,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/31/89/8fc1c268969fac43688d65fd92e67df24bd128d53cb4d2eee534cd307399/pandas-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9c39be2d709d01fa972a0cabc522389fceca4f3969332ba25a7d6c5802cf976a", size = 11828897, upload-time = "2026-05-11T18:54:17.146Z" }, { url = "https://files.pythonhosted.org/packages/56/3b/e7d20dea247a3e6dc0bd8a6953854afbedc03951def4e7371e05e7263e25/pandas-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4db8c527972a821cf5286b40ccc57642a39bc62e62022b42f99f8a67fca8c3a1", size = 10900855, upload-time = "2026-05-11T18:54:19.72Z" }, { url = "https://files.pythonhosted.org/packages/0f/54/68a0978d1ef8502b8492099beaa6e7a0c1b32e3b5d4f677f5810cb08711c/pandas-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:b2c95f8bfc1ee412bf482605d7bfd30c12d1d26bd59fdd91efeef1d4718decb1", size = 9466464, upload-time = "2026-05-11T18:54:22.754Z" }, ->>>>>>> upstream/develop ] [[package]] From 1224a7fcc4cf074ff6fc7758d5c65d205a22fed5 Mon Sep 17 00:00:00 2001 From: martinkilbinger Date: Wed, 10 Jun 2026 11:45:48 +0200 Subject: [PATCH 11/24] =?UTF-8?q?=C3=82=C3=82run=20script=20:=20added=20--?= =?UTF-8?q?retry?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- example/cfis/config_tile_Mc_psfex.ini | 2 +- .../cfis/config_tile_Ng_batch_psfex_uc.ini | 2 +- .../cfis_image_sims/config_tile_Mc_psfex.ini | 2 +- .../config_tile_Ng_batch_psfex_sx.ini | 2 +- scripts/sh/run_image_sims.sh | 9 +- scripts/sh/run_job_sp_canfar_v2.0.bash | 82 +++++++++++-------- 6 files changed, 59 insertions(+), 40 deletions(-) diff --git a/example/cfis/config_tile_Mc_psfex.ini b/example/cfis/config_tile_Mc_psfex.ini index 50f5c0612..77b52ee1d 100644 --- a/example/cfis/config_tile_Mc_psfex.ini +++ b/example/cfis/config_tile_Mc_psfex.ini @@ -9,7 +9,7 @@ VERBOSE = True # Name of run (optional) default: shapepipe_run -RUN_NAME = run_sp_tile_Mc +RUN_NAME = run_sp_tile_Mc_psfex # Add date and time to RUN_NAME, optional, default: True ; RUN_DATETIME = False diff --git a/example/cfis/config_tile_Ng_batch_psfex_uc.ini b/example/cfis/config_tile_Ng_batch_psfex_uc.ini index 9b57396c5..3d086bf31 100644 --- a/example/cfis/config_tile_Ng_batch_psfex_uc.ini +++ b/example/cfis/config_tile_Ng_batch_psfex_uc.ini @@ -11,7 +11,7 @@ VERBOSE = True RUN_NAME = run_sp_tile_Ng # Add date and time to RUN_NAME, optional, default: False -RUN_DATETIME = False +RUN_DATETIME = True ## ShapePipe execution options diff --git a/example/cfis_image_sims/config_tile_Mc_psfex.ini b/example/cfis_image_sims/config_tile_Mc_psfex.ini index b4ca4bb0e..e37b71dbe 100644 --- a/example/cfis_image_sims/config_tile_Mc_psfex.ini +++ b/example/cfis_image_sims/config_tile_Mc_psfex.ini @@ -8,7 +8,7 @@ VERBOSE = True # Name of run (optional) default: shapepipe_run -RUN_NAME = run_sp_tile_Mc +RUN_NAME = run_sp_tile_Mc_psfex # Add date and time to RUN_NAME, optional, default: True ; RUN_DATETIME = False diff --git a/example/cfis_image_sims/config_tile_Ng_batch_psfex_sx.ini b/example/cfis_image_sims/config_tile_Ng_batch_psfex_sx.ini index 0a329f441..3e5690531 100644 --- a/example/cfis_image_sims/config_tile_Ng_batch_psfex_sx.ini +++ b/example/cfis_image_sims/config_tile_Ng_batch_psfex_sx.ini @@ -11,7 +11,7 @@ VERBOSE = True RUN_NAME = run_sp_tile_Ng # Add date and time to RUN_NAME, optional, default: False -RUN_DATETIME = False +RUN_DATETIME = True ## ShapePipe execution options diff --git a/scripts/sh/run_image_sims.sh b/scripts/sh/run_image_sims.sh index 8a6d6e909..3d5fca8c8 100644 --- a/scripts/sh/run_image_sims.sh +++ b/scripts/sh/run_image_sims.sh @@ -4,10 +4,11 @@ type="grid" tile_ID="233.293" -if [ "$type" == "grid" ]; +if [ "$type" == "grid" ]; then str_type="_grid" else str_type="" +fi job=4091 @@ -18,9 +19,9 @@ dgs=("m" "z" "p") mkdir -p $base/$type cd $base/$type -for dg in dgs; do +for dg in "${dgs[@]}"; do - name="1${dg}2${dx}${str_type}_$num" + name="1${dg}2${dg}${str_type}_$num" init_run_v2.0.sh -t image_sims -s $name @@ -29,6 +30,8 @@ for dg in dgs; do apptainer exec --bind /n09data,/home \ run_job_sp_canfar_v2.0.bash -e ${tile_ID} -t image_sims -j $job + cd .. + done #--env PATH=/home/mkilbing/.local/bin:/home/mkilbing/astro/repositories/github/shapepipe/bin:$PATH --env PYTHONPATH=/home/mkilbing/astro/repositories/github/shapepipe/src:$PYTHONPATH /n17data/mkilbing/shapepipe_ngmix_v2.0.sif diff --git a/scripts/sh/run_job_sp_canfar_v2.0.bash b/scripts/sh/run_job_sp_canfar_v2.0.bash index 502161caa..fd2bbb5a3 100755 --- a/scripts/sh/run_job_sp_canfar_v2.0.bash +++ b/scripts/sh/run_job_sp_canfar_v2.0.bash @@ -29,6 +29,7 @@ scratch="" test_only=0 check=0 force=0 +retry=0 VERBOSE=1 pat="-- " @@ -53,6 +54,8 @@ ${JOB_LIST_HELP} -e, --exclusive ID\timage ID\n --test\t\t\ttest mode, no processing\n --check\t\tcheck download completeness only (job 8), no processing\n --force\t\tremove existing module output dir(s) before running\n + --retry\t\tskip jobs whose existing run is complete; remove and rerun\n + \t\t\tonly those whose existing run is incomplete/failed\n " ## Help if no arguments @@ -121,6 +124,9 @@ while [ $# -gt 0 ]; do --force) force=1 ;; + --retry) + retry=1 + ;; esac shift done @@ -177,6 +183,30 @@ function init_exp_work_dir() { } +# Remove existing run_sp__* output directories. +# Args: $1 = base dir containing the "output" subdir (work_dir or exp_work_dir) +# $2 = "tile" or "exp" +# $3 = space-separated run_prefixes +# $4 = label for the removal message (e.g. "Force-removing") +function remove_run_dirs() { + local base_dir=$1 + local kind=$2 + local run_prefixes=$3 + local label=$4 + local run_prefix + for run_prefix in $run_prefixes; do + local dirs_to_remove + dirs_to_remove=$(ls -d "$base_dir/output/run_sp_${kind}_${run_prefix}"* 2>/dev/null) + if [ -n "$dirs_to_remove" ]; then + for d in $dirs_to_remove; do + message "${label} $d" "$debug_out" -1 + command "rm -rf $d" $dry_run + done + fi + done +} + + # Run a per-exposure job (e.g. job 8, 16). # Args: $1 = job number # $2 = space-separated list of run_sp_exp output dir prefixes (e.g. "Gie") @@ -230,21 +260,6 @@ function run_exp_job() { # Create exp_numbers-000-000.txt and cfis link if not existent init_exp_work_dir "$exp_id" "$exp_work_dir" - # force: remove all existing run directories for each prefix before running - if [ "$force" == "1" ]; then - local run_prefix - for run_prefix in $run_prefixes; do - local dirs_to_remove - dirs_to_remove=$(ls -d "$exp_work_dir/output/run_sp_exp_${run_prefix}"* 2>/dev/null) - if [ -n "$dirs_to_remove" ]; then - for d in $dirs_to_remove; do - message "Force-removing $d" "$debug_out" -1 - command "rm -rf $d" $dry_run - done - fi - done - fi - # Check completeness of existing run output (main prefix) local run_dir=$(ls -dt "$exp_work_dir/output/run_sp_exp_${main_prefix}"* 2>/dev/null | head -1) local is_complete=1 @@ -306,7 +321,15 @@ function run_exp_job() { fi done - if [ "$is_complete" == "1" ]; then + # force: always remove and rerun. + # retry: remove and rerun only if the existing run is incomplete/failed. + if [ "$force" == "1" ]; then + remove_run_dirs "$exp_work_dir" "exp" "$run_prefixes" "Force-removing" + elif [ "$retry" == "1" ] && [ "$is_complete" == "0" ]; then + remove_run_dirs "$exp_work_dir" "exp" "$run_prefixes" "Retry: removing incomplete run" + fi + + if [ "$force" != "1" ] && [ "$is_complete" == "1" ]; then message "Complete $exp_id_disp: run_sp_exp_${main_prefix} ( $check_desc)" "$debug_out" -1 (( n_complete++ )) continue @@ -363,21 +386,6 @@ function run_tile_job() { local complete_checks=$3 local main_prefix="${run_prefixes%% *}" - # force: remove all existing run directories for each prefix before running - if [ "$force" == "1" ]; then - local run_prefix - for run_prefix in $run_prefixes; do - local dirs_to_remove - dirs_to_remove=$(ls -d "$work_dir/output/run_sp_tile_${run_prefix}"* 2>/dev/null) - if [ -n "$dirs_to_remove" ]; then - for d in $dirs_to_remove; do - message "Force-removing $d" "$debug_out" -1 - command "rm -rf $d" $dry_run - done - fi - done - fi - # Locate most recent existing run directory for the main prefix local run_dir run_dir=$(ls -dt "$work_dir/output/run_sp_tile_${main_prefix}"* 2>/dev/null | head -1) @@ -441,7 +449,15 @@ function run_tile_job() { done fi - if [ "$is_complete" == "1" ] && [ -n "$complete_checks" ]; then + # force: always remove and rerun. + # retry: remove and rerun only if the existing run is incomplete/failed. + if [ "$force" == "1" ]; then + remove_run_dirs "$work_dir" "tile" "$run_prefixes" "Force-removing" + elif [ "$retry" == "1" ] && [ "$is_complete" == "0" ]; then + remove_run_dirs "$work_dir" "tile" "$run_prefixes" "Retry: removing incomplete run" + fi + + if [ "$force" != "1" ] && [ "$is_complete" == "1" ] && [ -n "$complete_checks" ]; then message "Complete: ( $check_desc)" "$debug_out" -1 return 0 fi @@ -698,7 +714,7 @@ if [[ $do_job != 0 ]]; then run_tile_job 512 "${Letter}iViVi ${Letter}iViVi ${Letter}iViVi" "psfex_interp_runner:1 vignetmaker_runner_run_1:1 vignetmaker_runner_run_2:4" else run_tile_job 64 "fpsf" "fake_psf_runner:1" - run_tile_job 512 "ViVi VViVi" "vignetmaker_runner_run_1:1 vignetmaker_runner_run_2:4" + run_tile_job 512 "ViVi ViVi" "vignetmaker_runner_run_1:1 vignetmaker_runner_run_2:3" fi fi From c8721041dbe45875fa6abf849f77404f9daea56c Mon Sep 17 00:00:00 2001 From: martinkilbinger Date: Wed, 10 Jun 2026 13:57:06 +0200 Subject: [PATCH 12/24] im_sims docker deployment --- .github/workflows/deploy-image.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/deploy-image.yml b/.github/workflows/deploy-image.yml index b3b0dc54e..c7a8d8246 100644 --- a/.github/workflows/deploy-image.yml +++ b/.github/workflows/deploy-image.yml @@ -13,6 +13,7 @@ on: - develop - main - master + - im_sims pull_request: branches: - develop From dfb99a4e2aee6e4eb621b67aa9b9db7521b0f664 Mon Sep 17 00:00:00 2001 From: martinkilbinger Date: Wed, 10 Jun 2026 16:53:28 +0200 Subject: [PATCH 13/24] added .py to executables --- docs/source/pipeline_v2.0.md | 10 +++++++++- pyproject.toml | 9 ++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/docs/source/pipeline_v2.0.md b/docs/source/pipeline_v2.0.md index 1252c1611..6bb093b18 100644 --- a/docs/source/pipeline_v2.0.md +++ b/docs/source/pipeline_v2.0.md @@ -60,12 +60,20 @@ run_job_canfar_v2.0.sh -e ID -j ### For image simulations -#### Download docker image +Download docker image ```bash apptainer pull shapepipe_im_sims.sif docker://ghcr.io/cosmostat/shapepipe:im_sims ``` +Activate container + +```bash +apptainer shell --bind /n17data,/n09data,/home /path/to/shapepipe_im_sims.sif +``` + + + ### CANFAR login Login to the canfar system with diff --git a/pyproject.toml b/pyproject.toml index ed906bf5d..54e6c80d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,11 +78,10 @@ fitsio = ["fitsio"] dev = ["shapepipe[doc,jupyter,lint,release,test,fitsio]"] [project.scripts] -shapepipe_run = "shapepipe.shapepipe_run:main" -summary_run = "shapepipe.summary_run:main" -canfar_submit_job = "shapepipe.canfar_run:run_job" -canfar_monitor = "shapepipe.canfar_run:run_log" -canfar_monitor_log = "shapepipe.canfar_run:run_monitor_log" +shapepipe_run.py = "shapepipe.shapepipe_run:main" +canfar_submit_job.py = "shapepipe.canfar_run:run_job" +canfar_monitor.py = "shapepipe.canfar_run:run_log" +canfar_monitor_log.py = "shapepipe.canfar_run:run_monitor_log" [tool.uv] # shapepipe targets Linux only; skip Windows/macOS wheel resolution From 76f81b677abf98e24b923e26f5f432eee1e520d8 Mon Sep 17 00:00:00 2001 From: martinkilbinger Date: Wed, 10 Jun 2026 17:15:40 +0200 Subject: [PATCH 14/24] fixe pyproject bug --- pyproject.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 54e6c80d6..034e8507f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,10 +78,10 @@ fitsio = ["fitsio"] dev = ["shapepipe[doc,jupyter,lint,release,test,fitsio]"] [project.scripts] -shapepipe_run.py = "shapepipe.shapepipe_run:main" -canfar_submit_job.py = "shapepipe.canfar_run:run_job" -canfar_monitor.py = "shapepipe.canfar_run:run_log" -canfar_monitor_log.py = "shapepipe.canfar_run:run_monitor_log" +"shapepipe_run.py" = "shapepipe.shapepipe_run:main" +"canfar_submit_job.py" = "shapepipe.canfar_run:run_job" +"canfar_monitor.py" = "shapepipe.canfar_run:run_log" +"canfar_monitor_log.py" = "shapepipe.canfar_run:run_monitor_log" [tool.uv] # shapepipe targets Linux only; skip Windows/macOS wheel resolution From 797eeff2839cd867f2a53c3c24eba4a82459735e Mon Sep 17 00:00:00 2001 From: martinkilbinger Date: Wed, 10 Jun 2026 17:23:22 +0200 Subject: [PATCH 15/24] pyproject moved back to exe scripts without extension --- pyproject.toml | 8 ++++---- scripts/sh/job_sp_canfar_v2.0.bash | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 034e8507f..5d300829e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,10 +78,10 @@ fitsio = ["fitsio"] dev = ["shapepipe[doc,jupyter,lint,release,test,fitsio]"] [project.scripts] -"shapepipe_run.py" = "shapepipe.shapepipe_run:main" -"canfar_submit_job.py" = "shapepipe.canfar_run:run_job" -"canfar_monitor.py" = "shapepipe.canfar_run:run_log" -"canfar_monitor_log.py" = "shapepipe.canfar_run:run_monitor_log" +shapepipe_run = "shapepipe.shapepipe_run:main" +canfar_submit_job = "shapepipe.canfar_run:run_job" +canfar_monitor = "shapepipe.canfar_run:run_log" +canfar_monitor_log = "shapepipe.canfar_run:run_monitor_log" [tool.uv] # shapepipe targets Linux only; skip Windows/macOS wheel resolution diff --git a/scripts/sh/job_sp_canfar_v2.0.bash b/scripts/sh/job_sp_canfar_v2.0.bash index 0ea0d965a..1f46081a6 100755 --- a/scripts/sh/job_sp_canfar_v2.0.bash +++ b/scripts/sh/job_sp_canfar_v2.0.bash @@ -264,7 +264,7 @@ function command_cfg_shapepipe() { fi local config="$SP_CONFIG/$config_name" - local cmd="shapepipe_run.py -c $config $exclusive_flag $batch_flag" + local cmd="shapepipe_run -c $config $exclusive_flag $batch_flag" command "$cmd" "$str" } From d7af0c975386950dac0505ac6f560ee634d419d3 Mon Sep 17 00:00:00 2001 From: martinkilbinger Date: Tue, 16 Jun 2026 11:47:13 +0200 Subject: [PATCH 16/24] running create_final_cat.py on image_sims --- example/cfis_image_sims/final_cat.param | 102 ++++++++++++++ scripts/python/create_final_cat.py | 169 +++++++++++++++--------- scripts/sh/run_image_sims.sh | 2 + 3 files changed, 209 insertions(+), 64 deletions(-) create mode 100644 example/cfis_image_sims/final_cat.param diff --git a/example/cfis_image_sims/final_cat.param b/example/cfis_image_sims/final_cat.param new file mode 100644 index 000000000..cb57d019b --- /dev/null +++ b/example/cfis_image_sims/final_cat.param @@ -0,0 +1,102 @@ +# coordinates +XWIN_WORLD +YWIN_WORLD + +# tile ID, for plot of tile-dependent additive bias. +# Can maybe be removed. +TILE_ID + +# flags +FLAGS +NGMIX_MCAL_FLAGS + +# PSF ellipticity +NGMIX_ELL_PSFo_NOSHEAR + +# spread class +#SPREAD_CLASS + +# spread model flag and error +#SPREAD_MODEL +#SPREADERR_MODEL + +# Number of epochs (exposures) +N_EPOCH +NGMIX_N_EPOCH + +## Shape measurement outputs +## Ngmix: model fitting + +# galaxy ellipticity +NGMIX_ELL_1M +NGMIX_ELL_1P +NGMIX_ELL_2M +NGMIX_ELL_2P +NGMIX_ELL_NOSHEAR +#NGMIX_ELL_ERR_1M +#NGMIX_ELL_ERR_1P +#NGMIX_ELL_ERR_2M +#NGMIX_ELL_ERR_2P +NGMIX_ELL_ERR_NOSHEAR + +# flags +NGMIX_FLAGS_1M +NGMIX_FLAGS_1P +NGMIX_FLAGS_2M +NGMIX_FLAGS_2P +NGMIX_FLAGS_NOSHEAR + +# size and error +NGMIX_T_1M +NGMIX_T_1P +NGMIX_T_2M +NGMIX_T_2P +NGMIX_T_NOSHEAR +NGMIX_T_ERR_1M +NGMIX_T_ERR_1P +NGMIX_T_ERR_2M +NGMIX_T_ERR_2P +NGMIX_T_ERR_NOSHEAR +NGMIX_Tpsf_1M +NGMIX_Tpsf_1P +NGMIX_Tpsf_2M +NGMIX_Tpsf_2P +NGMIX_Tpsf_NOSHEAR + +# flux and error +NGMIX_FLUX_1M +NGMIX_FLUX_1P +NGMIX_FLUX_2M +NGMIX_FLUX_2P +NGMIX_FLUX_NOSHEAR +NGMIX_FLUX_ERR_1M +NGMIX_FLUX_ERR_1P +NGMIX_FLUX_ERR_2M +NGMIX_FLUX_ERR_2P +NGMIX_FLUX_ERR_NOSHEAR + +# magnitudes +MAG_AUTO +MAGERR_AUTO +MAG_WIN +MAGERR_WIN +FLUX_AUTO +FLUXERR_AUTO +FLUX_APER +FLUXERR_APER +FLUX_RADIUS + +# SNR from SExtractor +SNR_WIN + +FWHM_IMAGE +FWHM_WORLD + +# PSF size measured on original image +NGMIX_T_PSFo_NOSHEAR + +# PSF size measured on reconvolved image +# NGMIX_Tpsf_NOSHEAR + +# ngmix moment failure flag +NGMIX_MOM_FAIL diff --git a/scripts/python/create_final_cat.py b/scripts/python/create_final_cat.py index a0b10e8c9..50116b48a 100755 --- a/scripts/python/create_final_cat.py +++ b/scripts/python/create_final_cat.py @@ -61,7 +61,7 @@ def params_default(): Set default parameter values. """ - _params = { + _params = { "input_root_dir": ".", "merged_cat_path": "final_cat.hdf5", "param_path": None, @@ -71,29 +71,33 @@ def params_default(): "output_summary": "n_tiles_final.txt", "ID": None, "single_op": None, + "image_sims": False, } - _short_options = { - "input_root_dir": "-i", + _short_options = { + "input_root_dir": "-i", "merged_cat_path": "-m", "param_path": "-p", "patch": "-P", "list_only": "-l", "output_summary": "-o", "single_op": "-s", - } - _types = { - "hdu_num": "int", + "image_sims": "-I", + } + _types = { + "hdu_num": "int", "list_only": "bool", - } - _help_strings = { + "image_sims": "bool", + } + _help_strings = { "input_root_dir": "input root_dir for tile catalogues, default={}", "merged_cat_path": "merged catalogue path (hdf5 file), default={}", "param_path": "parameter file path, if not given use all columns, default={}", - "patch": "patch number, default={}", + "patch": "patch number (data) or grid subdir (image_sims), default={}", "list_only": "print list of patches and IDs only, default={}", "output_summary": "output file for numbre of tiles, default={}", "ID": "ID for single-ID operation, default={}", "single_op": "single ID operation, allowed are 'check', 'add', 'remove'; default={}", + "image_sims": "image simulations mode (different dir layout and run prefix), default={}", } return _params, _short_options, _types, _help_strings @@ -234,10 +238,12 @@ def print_list(params): n_tiles = 0 with h5py.File(params["merged_cat_path"], "r") as hdf5_file: - for patch in hdf5_file["patches"]: - #print(patch) - for id in hdf5_file[f"patches/{patch}"]: - n_tiles += 1 + if "patches" not in hdf5_file: + print("Warning: no 'patches' group in output file (0 tiles added?)") + else: + for patch in hdf5_file["patches"]: + for id in hdf5_file[f"patches/{patch}"]: + n_tiles += 1 with open(params["output_summary"], "w") as f_out: print(n_tiles, file=f_out) @@ -339,20 +345,50 @@ def copy_data(param_list, extracted_data, dtype): return structured_data +def collect_tile_ids_image_sims(patch_path): + """Collect tile IDs from image-sims layout: tiles/// + + Parameters + ---------- + patch_path : str + path to the grid subdir (e.g. .../1p2z_grid_1) + + Returns + ------- + list of (tile_id, tile_path) tuples + """ + id_pattern = re.compile(r"^\d+\.\d+$") + tiles_root = os.path.join(patch_path, "tiles") + result = [] + if not os.path.isdir(tiles_root): + return result + for prefix in os.listdir(tiles_root): + prefix_path = os.path.join(tiles_root, prefix) + if not os.path.isdir(prefix_path): + continue + for tile_id in os.listdir(prefix_path): + if id_pattern.match(tile_id): + result.append((tile_id, os.path.join(prefix_path, tile_id))) + return result + + def process(params): - - patch = rf"P{params['patch']}" - patch_pattern = re.compile(patch) - - # Define the nested path pattern for locating `.fits` files - file_pattern = "output/run_sp_Mc_*/make_cat_runner/output/*.fits" + + if params["image_sims"]: + patch_name = params["patch"] + run_prefix = "run_sp_tile_Mc_*" + else: + patch_name = rf"P{params['patch']}" + run_prefix = "run_sp_Mc_*" + + patch_pattern = re.compile(patch_name) # Regex pattern for tile IDs id_pattern = re.compile(r"^\d+\.\d+$") n_added = 0 IDs_added = [] - + # Open the HDF5 file (create it if it doesn't exist) if params["verbose"]: print(f"Initializing file {params['merged_cat_path']}") @@ -364,21 +400,30 @@ def process(params): # Skip non-matching entries if not patch_pattern.fullmatch(patch): continue - + # Full path to patch patch_path = os.path.join(params["input_root_dir"], patch) if not os.path.isdir(patch_path): if params["verbose"]: print(f"Path {patch_path} not found, skipping") continue - + # Get hdf5 group for this patch patch_group = get_patch_group(hdf5_file, patch, params["verbose"]) - # Get paths to all tile IDs and loop - tile_runs_path = os.path.join(patch_path, "tile_runs") - subdirs = os.listdir(tile_runs_path) - for id in tqdm.tqdm(subdirs, total=len(subdirs)): + # Collect (tile_id, tile_path) pairs depending on layout + if params["image_sims"]: + tile_items = collect_tile_ids_image_sims(patch_path) + else: + tile_runs_path = os.path.join(patch_path, "tile_runs") + tile_items = [ + (tid, os.path.join(tile_runs_path, tid)) + for tid in os.listdir(tile_runs_path) + if id_pattern.match(tid) + and os.path.isdir(os.path.join(tile_runs_path, tid)) + ] + + for id, id_path in tqdm.tqdm(tile_items, total=len(tile_items)): # Skip if the patch/ID data already exists if id in patch_group: @@ -386,44 +431,40 @@ def process(params): print(f"Skipping {id} (already processed)") continue - if id_pattern.match(id) and os.path.isdir(os.path.join(tile_runs_path, id)): - id_path = os.path.join(tile_runs_path, id) - - base_pattern = os.path.join(id_path, "output", "run_sp_Mc_*") - all_matches = [d for d in glob.glob(base_pattern) if os.path.isdir(d)] - if not all_matches: - if params["verbose"]: - print(f"Final cat for {id} not found, continuing") - continue - newest_dir = max(all_matches, key=os.path.getmtime) - - id_dash = re.sub("\.", "-", id) - fits_file = f"{newest_dir}/make_cat_runner/output/final_cat-{id_dash}.fits" - - # Exclude unsuccessful run without output FITS file - if not os.path.exists(fits_file): - if params["verbose"]: - print(f"Run without output file found for {id}, skipping") - continue - - if True: - extracted_data, dtype = read_data(fits_file, params) - - structured_data = copy_data(params["param_list"], extracted_data, dtype) - - # Create a new dataset - try: - patch_group.create_dataset( - str(id), - data=structured_data, - dtype=dtype, - ) - except: - print(f"Error for {id}: Could not create dataset in group {patch}") - raise - - n_added += 1 - IDs_added.append(id) + base_pattern = os.path.join(id_path, "output", run_prefix) + all_matches = [d for d in glob.glob(base_pattern) if os.path.isdir(d)] + if not all_matches: + if params["verbose"]: + print(f"Final cat for {id} not found, continuing") + continue + newest_dir = max(all_matches, key=os.path.getmtime) + + id_dash = re.sub(r"\.", "-", id) + fits_file = f"{newest_dir}/make_cat_runner/output/final_cat-{id_dash}.fits" + + # Exclude unsuccessful run without output FITS file + if not os.path.exists(fits_file): + if params["verbose"]: + print(f"Run without output file found for {id}, skipping") + continue + + extracted_data, dtype = read_data(fits_file, params) + + structured_data = copy_data(params["param_list"], extracted_data, dtype) + + # Create a new dataset + try: + patch_group.create_dataset( + str(id), + data=structured_data, + dtype=dtype, + ) + except: + print(f"Error for {id}: Could not create dataset in group {patch}") + raise + + n_added += 1 + IDs_added.append(id) if params["verbose"]: print(f"{n_added} tiles added ({' '.join(IDs_added)})") diff --git a/scripts/sh/run_image_sims.sh b/scripts/sh/run_image_sims.sh index 3d5fca8c8..ee467b9d6 100644 --- a/scripts/sh/run_image_sims.sh +++ b/scripts/sh/run_image_sims.sh @@ -32,6 +32,8 @@ for dg in "${dgs[@]}"; do cd .. + create_final_cat.py -I -m final_cat_$name.hdf5 -i . -p ~/shapepipe/example/cfis/final_cat.param -P $name -o $name/n_tiles_final.txt -v + done #--env PATH=/home/mkilbing/.local/bin:/home/mkilbing/astro/repositories/github/shapepipe/bin:$PATH --env PYTHONPATH=/home/mkilbing/astro/repositories/github/shapepipe/src:$PYTHONPATH /n17data/mkilbing/shapepipe_ngmix_v2.0.sif From c777efa07793150ae7811c2305828945ca7985b5 Mon Sep 17 00:00:00 2001 From: martinkilbinger Date: Wed, 24 Jun 2026 13:15:01 +0200 Subject: [PATCH 17/24] Pipeline running on image sims --- docs/source/pipeline_v2.0.md | 11 +- example/cfis/final_cat.param | 4 + example/cfis_image_sims/final_cat.param | 5 + scripts/python/create_final_cat.py | 11 ++ scripts/sh/init_run_v2.0.sh | 88 ++++++++++-- scripts/sh/job_sp_canfar_v2.0.bash | 4 +- scripts/sh/run_image_sims.sh | 39 ------ scripts/sh/run_job_sp_canfar_v2.0.bash | 175 +++++++++++++++++++----- 8 files changed, 250 insertions(+), 87 deletions(-) delete mode 100644 scripts/sh/run_image_sims.sh diff --git a/docs/source/pipeline_v2.0.md b/docs/source/pipeline_v2.0.md index 6bb093b18..d08f2d896 100644 --- a/docs/source/pipeline_v2.0.md +++ b/docs/source/pipeline_v2.0.md @@ -63,7 +63,7 @@ run_job_canfar_v2.0.sh -e ID -j Download docker image ```bash -apptainer pull shapepipe_im_sims.sif docker://ghcr.io/cosmostat/shapepipe:im_sims +apptainer pull shapepipe_im_sims.sif docker://ghcr.io/cosmostat/shapepipe:im_sims-runtime ``` Activate container @@ -72,6 +72,15 @@ Activate container apptainer shell --bind /n17data,/n09data,/home /path/to/shapepipe_im_sims.sif ``` +Run jobs +```bash +run_job_sp_canfar_v2.0.bash -e 233.293 -t image_sims -j +``` + +Help +```bash +run_job_sp_canfar_v2.0.bash -h +``` ### CANFAR login diff --git a/example/cfis/final_cat.param b/example/cfis/final_cat.param index eae7a2c04..d4dc631e4 100644 --- a/example/cfis/final_cat.param +++ b/example/cfis/final_cat.param @@ -6,6 +6,10 @@ YWIN_WORLD # Can maybe be removed. TILE_ID +# SExtractor number. TODO: Create unique UNIONS number +# from TILD_ID + NUMBER +NUMBER + # flags FLAGS IMAFLAGS_ISO diff --git a/example/cfis_image_sims/final_cat.param b/example/cfis_image_sims/final_cat.param index cb57d019b..0357c4850 100644 --- a/example/cfis_image_sims/final_cat.param +++ b/example/cfis_image_sims/final_cat.param @@ -6,8 +6,13 @@ YWIN_WORLD # Can maybe be removed. TILE_ID +# SExtractor number. TODO: Create unique UNIONS number +# from TILD_ID + NUMBER +NUMBER + # flags FLAGS +#IMAFLAGS_ISO NGMIX_MCAL_FLAGS # PSF ellipticity diff --git a/scripts/python/create_final_cat.py b/scripts/python/create_final_cat.py index 50116b48a..8219e7d13 100755 --- a/scripts/python/create_final_cat.py +++ b/scripts/python/create_final_cat.py @@ -235,7 +235,13 @@ def check_ID(merged_cat_path, ID, verbose=False): def print_list(params): + verbose = params.get("verbose", False) n_tiles = 0 + + if not os.path.exists(params["merged_cat_path"]): + print(f"File {params['merged_cat_path']} not found") + return + with h5py.File(params["merged_cat_path"], "r") as hdf5_file: if "patches" not in hdf5_file: @@ -244,6 +250,11 @@ def print_list(params): for patch in hdf5_file["patches"]: for id in hdf5_file[f"patches/{patch}"]: n_tiles += 1 + if verbose: + print(f" {patch}/{id}") + + if verbose: + print(f"Total: {n_tiles} tiles") with open(params["output_summary"], "w") as f_out: print(n_tiles, file=f_out) diff --git a/scripts/sh/init_run_v2.0.sh b/scripts/sh/init_run_v2.0.sh index bf671632e..293a78515 100755 --- a/scripts/sh/init_run_v2.0.sh +++ b/scripts/sh/init_run_v2.0.sh @@ -15,6 +15,9 @@ type="data" # Subdir for image_sims subdir="1z2z_grid_1" +force=0 +sample="" + # Default base run directory (permanent storage) #base_dir="$HOME/cosmostat/v2/v${version}" base_dir=`pwd` @@ -22,6 +25,8 @@ base_dir=`pwd` # ShapePipe repository root (for config symlink and tile list) sp_root="$HOME/shapepipe" +# Source params.py to copy into the run directory +params_src="$HOME/astro/repositories/github/sp_validation/notebooks/params.py" ## Help string usage="Usage: $(basename "$0") [OPTIONS] @@ -30,6 +35,9 @@ usage="Usage: $(basename "$0") [OPTIONS] -t, --type TYPE input type, allowed are 'data', 'image_sims', default='$type'\n -s, --subdir SUBDIR subdir for image simulations, default='$subdir'\n -d, --dir DIR\tbase run directory, default='$base_dir'\n + -P, --params PATH\tsource params.py to copy, default='$params_src'\n + --force\t\trecreate existing symlinks and parameter files\n + -S, --sample SAMPLE\tsample version for mask config (e.g. 6)\n " ## Parse command line @@ -51,6 +59,17 @@ while [ $# -gt 0 ]; do base_dir="$2" shift ;; + -P|--params) + params_src="$2" + shift + ;; + --force) + force=1 + ;; + -S|--sample) + sample="$2" + shift + ;; *) echo "Unknown option: $1" echo -ne $usage @@ -71,7 +90,7 @@ if [ "$type" == "data" ]; then elif [ "$type" == "image_sims" ]; then - config_dir="$sp_root/example/cfis_im_sims" + config_dir="$sp_root/example/cfis_image_sims" tiles_src="$sp_root/auxdir/CFIS/im_sims_202606/numbers.txt" input_dir_base="/n09data/hervas/skills_out" @@ -94,8 +113,22 @@ mkdir -p "$base_dir/$subdir" cd "$base_dir/$subdir" if [ "$type" == "image_sims" ]; then - ln -s "$input_dir_tiles" input_tiles - ln -s "$input_dir_exp" input_exp + for link in input_tiles input_exp; do + src=$([ "$link" == "input_tiles" ] && echo "$input_dir_tiles" || echo "$input_dir_exp") + [ $force -eq 1 ] && rm -f "$link" + [ ! -e "$link" ] && ln -s "$src" "$link" + done + + if [ -n "$sample" ]; then + mask_src="$HOME/astro/repositories/github/sp_validation/config/calibration/mask_v1.X.${sample}_im_sim.yaml" + [ $force -eq 1 ] && rm -f config_mask.yaml + if [ ! -e config_mask.yaml ]; then + ln -s "$mask_src" config_mask.yaml + echo "Created symlink: config_mask.yaml -> $mask_src" + else + echo "config_mask.yaml already exists, skipping" + fi + fi fi echo "Creating tiles/ directory..." @@ -116,28 +149,54 @@ mkdir -p debug # Config directory (will be symlinked) -if [ -L cfis ]; then - echo "cfis symlink already exists, skipping" -elif [ -d cfis ]; then +if [ -d cfis ] && [ ! -L cfis ]; then echo "WARNING: cfis/ exists as a directory, not creating symlink" else - ln -s "$config_dir" cfis - echo "Created symlink: cfis -> $config_dir" + [ $force -eq 1 ] && rm -f cfis + if [ ! -e cfis ]; then + ln -s "$config_dir" cfis + echo "Created symlink: cfis -> $config_dir" + else + echo "cfis symlink already exists, skipping" + fi fi # --- Tile number list --- echo "Creating tile_numbers.txt symlink..." -if [ -L tile_numbers.txt ]; then - echo " tile_numbers.txt symlink already exists, skipping" -elif [ -f tile_numbers.txt ]; then +if [ -f tile_numbers.txt ] && [ ! -L tile_numbers.txt ]; then echo " WARNING: tile_numbers.txt exists as a regular file, not creating symlink" else - ln -s "$tiles_src" tile_numbers.txt - echo " Created symlink: tile_numbers.txt -> $tiles_src" + [ $force -eq 1 ] && rm -f tile_numbers.txt + if [ ! -e tile_numbers.txt ]; then + ln -s "$tiles_src" tile_numbers.txt + echo " Created symlink: tile_numbers.txt -> $tiles_src" + else + echo " tile_numbers.txt symlink already exists, skipping" + fi fi n_tiles=$(wc -l < tile_numbers.txt) echo " $n_tiles tiles" +# --- params.py --- +echo "Creating params.py..." +if [ -f params.py ] && [ $force -eq 0 ]; then + echo " params.py already exists, skipping" +else + cp "$params_src" params.py + sed -i "s/[\"']IMAFLAGS_ISO[\"'],\s*//g; s/,\s*[\"']IMAFLAGS_ISO[\"']//g" params.py + if [ "$type" == "image_sims" ]; then + sed -i \ + -e "s/^name = .*/name = '$subdir'/" \ + -e "s/^star_cat_path = .*/star_cat_path = None/" \ + -e "s/^output_format = .*/output_format = '.hdf5'/" \ + params.py + echo " Copied and adapted: params.py <- $params_src" + echo " name = '$subdir', star_cat_path = None, output_format = '.hdf5'" + else + echo " Copied: params.py <- $params_src" + fi +fi + echo "" echo "Done. Directory structure:" echo " $base_dir/$subdir" @@ -145,7 +204,8 @@ echo " ├── tiles/" echo " ├── exp/" echo " ├── logs/" echo " ├── cfis -> ${config_dir}" -echo " └── tile_numbers.txt -> ${tiles_src}" +echo " ├── tile_numbers.txt -> ${tiles_src}" +echo " └── params.py <- ${params_src}" if [ "$type" == "image_sims" ]; then echo " ___ input_dir_tiles -> $input_dir_tiles" echo " ___ input_dir_exp -> $input_dir_exp" diff --git a/scripts/sh/job_sp_canfar_v2.0.bash b/scripts/sh/job_sp_canfar_v2.0.bash index 1f46081a6..d8f3f21c2 100755 --- a/scripts/sh/job_sp_canfar_v2.0.bash +++ b/scripts/sh/job_sp_canfar_v2.0.bash @@ -235,10 +235,10 @@ function command () { else echo -e "${RED}error, return value = $res${NC}" if [ $STOP == 1 ]; then - echo "${RED}exiting 'canfar_sp.bash', error in command '$cmd'${NC}" + echo "${RED}exiting '$(basename "$0")', error in command '$cmd'${NC}" exit $res else - echo "${RED}continuing 'canfar_sp.bash', error in command '$cmd'${NC}" + echo "${RED}continuing '$(basename "$0")', error in command '$cmd'${NC}" fi fi fi diff --git a/scripts/sh/run_image_sims.sh b/scripts/sh/run_image_sims.sh deleted file mode 100644 index ee467b9d6..000000000 --- a/scripts/sh/run_image_sims.sh +++ /dev/null @@ -1,39 +0,0 @@ -base=$HOME/v2.0/image_sims - -type="grid" - -tile_ID="233.293" - -if [ "$type" == "grid" ]; then - str_type="_grid" -else - str_type="" -fi - -job=4091 - -num=1 - -dgs=("m" "z" "p") - -mkdir -p $base/$type -cd $base/$type - -for dg in "${dgs[@]}"; do - - name="1${dg}2${dg}${str_type}_$num" - - init_run_v2.0.sh -t image_sims -s $name - - cd $name - - apptainer exec --bind /n09data,/home \ - run_job_sp_canfar_v2.0.bash -e ${tile_ID} -t image_sims -j $job - - cd .. - - create_final_cat.py -I -m final_cat_$name.hdf5 -i . -p ~/shapepipe/example/cfis/final_cat.param -P $name -o $name/n_tiles_final.txt -v - -done - -#--env PATH=/home/mkilbing/.local/bin:/home/mkilbing/astro/repositories/github/shapepipe/bin:$PATH --env PYTHONPATH=/home/mkilbing/astro/repositories/github/shapepipe/src:$PYTHONPATH /n17data/mkilbing/shapepipe_ngmix_v2.0.sif diff --git a/scripts/sh/run_job_sp_canfar_v2.0.bash b/scripts/sh/run_job_sp_canfar_v2.0.bash index fd2bbb5a3..c2104394d 100755 --- a/scripts/sh/run_job_sp_canfar_v2.0.bash +++ b/scripts/sh/run_job_sp_canfar_v2.0.bash @@ -27,9 +27,11 @@ type="data" #scratch="/scratch/$USER/shapepipe/v${version}" scratch="" test_only=0 -check=0 +check_only=0 +run_and_check=1 force=0 retry=0 +quiet=0 VERBOSE=1 pat="-- " @@ -52,10 +54,11 @@ ${JOB_LIST_HELP} -e, --exclusive ID\timage ID\n -n, --dry_run\t\tDRY RUN, no actual processing; default is $dry_run\n --debug_out PATH\tdebug output file PATH, default=none\n --test\t\t\ttest mode, no processing\n - --check\t\tcheck download completeness only (job 8), no processing\n + --check_only\t\tcheck completeness only, no processing\n --force\t\tremove existing module output dir(s) before running\n --retry\t\tskip jobs whose existing run is complete; remove and rerun\n \t\t\tonly those whose existing run is incomplete/failed\n + -q, --quiet\t\tsuppress all output except Complete/Missing/Incomplete/WARNING lines\n " ## Help if no arguments @@ -118,8 +121,8 @@ while [ $# -gt 0 ]; do --test) test_only=1 ;; - --check) - check=1 + --check_only) + check_only=1 ;; --force) force=1 @@ -127,6 +130,9 @@ while [ $# -gt 0 ]; do --retry) retry=1 ;; + -q|--quiet) + quiet=1 + ;; esac shift done @@ -139,7 +145,9 @@ function message() { my_debug_out=$2 my_exit=$3 - echo $msg + if [ "$quiet" == "0" ] || [[ "$msg" =~ (Complete|[Mm]issing|Incomplete|WARNING) ]]; then + echo $msg + fi if [ -n "$my_debug_out" ]; then echo ${pat}$msg >> $my_debug_out fi @@ -229,7 +237,7 @@ function run_exp_job() { message "Exposure numbers file exp_numbers-${IDra}-${IDdec}.txt not found in $work_dir/output" "$debug_out" 10 fi - if [ "$check" == "1" ]; then + if [ "$check_only" == "1" ]; then message "Check mode: skipping job $exp_job" "$debug_out" -1 fi @@ -336,7 +344,7 @@ function run_exp_job() { fi # Report incomplete/missing in check mode; in run mode handle and proceed - if [ "$check" == "1" ]; then + if [ "$check_only" == "1" ]; then if [ -n "$run_dir" ]; then message " Benign incomplete: $exp_id_disp ($check_desc)" "$debug_out" -1 else @@ -354,9 +362,9 @@ function run_exp_job() { [ -n "$debug_out" ] && debug_flag="--debug_out $debug_out" echo "$(basename "$0") -j $exp_job -e $exp_id" > "$exp_log_file" - echo "pwd=`pwd`" + [ "$quiet" == "0" ] && echo "pwd=`pwd`" command "job_sp_canfar_v2.0.bash -c $config_dir -p $psf -r $retrieve --tile_det $tile_det --tile_mask $tile_mask -j $exp_job --n_smp $N_SMP --nsh_jobs $N_SMP $debug_flag" $dry_run 2>&1 | tee -a "$exp_log_file" - echo "Done with job_sp_canfar_v2.0.bash" + [ "$quiet" == "0" ] && echo "Done with job_sp_canfar_v2.0.bash" done < "$exp_numbers_file" @@ -462,11 +470,11 @@ function run_tile_job() { return 0 fi - if [ "$check" == "1" ]; then + if [ "$check_only" == "1" ]; then if [ -n "$run_dir" ]; then - message "Incomplete: ($check_desc)" "$debug_out" -1 + message "Incomplete: ( $check_desc)" "$debug_out" -1 else - message "Missing: ($check_desc)" "$debug_out" -1 + message "Missing: ( $check_desc)" "$debug_out" -1 fi return 0 fi @@ -486,9 +494,49 @@ function run_tile_job() { } +# Wrap a run_tile_job or run_exp_job call with check-log and force-cleanup. +# Usage: run_job_logged BIT FUNC [FUNC_ARGS...] +# - With --force: removes log_job_BIT.txt before running FUNC +# - With --check: captures output of FUNC; writes log_job_BIT.txt only if no +# Missing/Incomplete lines appear +# - Otherwise: calls FUNC directly +function run_job_logged() { + local bit=$1 + shift + local log="$dir/logs/log_job_${ID}_${bit}.txt" + + [ "$force" == "1" ] && rm -f "$log" + + if [ "$check_only" == "1" ]; then + local out + out=$( "$@" 2>&1 ) + echo "$out" + echo "$out" | grep -qiE "(Missing|Incomplete)" || echo "$out" > "$log" + else + "$@" + local rc=$? + if [ $rc -eq 0 ]; then + if [ "$run_and_check" == "1" ]; then + check_only=1 + local saved_force=$force + force=0 + local out + out=$( "$@" 2>&1 ) + check_only=0 + force=$saved_force + echo "$out" + echo "$out" | grep -qiE "(Missing|Incomplete)" || echo "$out" > "$log" + else + echo "Completed job $bit $(date)" > "$log" + fi + fi + fi +} + + if [ "$type" == "data" ]; then - echo "Running on data" + [ "$quiet" == "0" ] && echo "Running on data" retrieve="vos" config_dir=$HOME/shapepipe/example/cfis export SP_DIR=$dir @@ -496,7 +544,7 @@ if [ "$type" == "data" ]; then elif [ "$type" == "image_sims" ]; then - echo "Running on image simulations" + [ "$quiet" == "0" ] && echo "Running on image simulations" retrieve="symlink" config_dir=$HOME/shapepipe/example/cfis_image_sims # SP_DIR points to the run directory where input_tiles and input_exp live; @@ -512,7 +560,7 @@ else fi -echo "config_dir=$config_dir" +[ "$quiet" == "0" ] && echo "config_dir=$config_dir" # Init message @@ -542,6 +590,17 @@ if [ "$dry_run" != "0" ] && [ "$dry_run" != "1" ]; then message "dry_run must be 0 or 1, not $dry_run" "$debug_out" 8 fi +## Check input links +for link in "$dir/input_tiles" "$dir/input_exp" "$dir/cfis"; do + if [ -L "$link" ]; then + if [ ! -e "$link" ]; then + message "Broken symlink: $link" "$debug_out" 6 + fi + elif [ ! -e "$link" ]; then + message "Missing path: $link" "$debug_out" 6 + fi +done + # Start script @@ -561,8 +620,10 @@ Letter=${letter^} cd $dir -# Derive tile path components from ID (e.g. "000.227" -> IDra="000") +# Derive tile path components from ID (e.g. "000.227" -> IDra="000", IDdec="227") IDra=${ID%%.*} +IDdec=${ID##*.} +ID_DASHED="${IDra}-${IDdec}" work_dir="$dir/tiles/$IDra/$ID" log_file="$work_dir/job_sp_canfar_v2.0.log" @@ -589,7 +650,7 @@ if [ ! -d "output" ]; then fi -echo -n "pwd: "; pwd +[ "$quiet" == "0" ] && { echo -n "pwd: "; pwd; } # Avoid Qt error with setools @@ -617,19 +678,23 @@ if [[ $do_job != 0 ]]; then else n_exp=4 fi - run_tile_job 1 "Git" "get_images_runner:${n_exp}" + run_job_logged 1 run_tile_job 1 "Git" "get_images_runner:${n_exp}" fi (( do_job = job & 2 )) if [[ $do_job != 0 ]]; then + log_2="$dir/logs/log_job_${ID}_2.txt" + [ "$force" == "1" ] && rm -f "$log_2" if [ "$type" == "image_sims" ]; then # Image sims weights are already uncompressed; fake the Uz output directory # so downstream jobs can find the weight via last:uncompress_fits_runner. weight_src="$dir/input_tiles/CFIS_simu_weight-${ID//./-}.fits" - if [ "$check" == "1" ]; then + if [ "$check_only" == "1" ]; then uz_run_dir=$(ls -dt "$work_dir/output/run_sp_tile_Uz"* 2>/dev/null | head -1) if [ -n "$uz_run_dir" ] && [ -e "$uz_run_dir/uncompress_fits_runner/output/$(basename $weight_src)" ]; then - message "Complete: Uz $(basename $weight_src)" "$debug_out" -1 + msg="Complete: ( Uz/uncompress_fits_runner[fake] 1/1 )" + message "$msg" "$debug_out" -1 + echo "$msg" > "$log_2" else message "Missing: Uz $(basename $weight_src)" "$debug_out" -1 fi @@ -639,17 +704,29 @@ if [[ $do_job != 0 ]]; then if [ -e "$weight_src" ] && [ ! -e "$uz_out/$(basename $weight_src)" ]; then command "ln -sf $weight_src $uz_out/$(basename $weight_src)" $dry_run fi + if [ "$run_and_check" == "1" ]; then + uz_run_dir=$(ls -dt "$work_dir/output/run_sp_tile_Uz"* 2>/dev/null | head -1) + if [ -n "$uz_run_dir" ] && [ -e "$uz_run_dir/uncompress_fits_runner/output/$(basename $weight_src)" ]; then + msg="Complete: Uz $(basename $weight_src)" + message "$msg" "$debug_out" -1 + echo "$msg" > "$log_2" + else + message "Missing: Uz $(basename $weight_src)" "$debug_out" -1 + fi + else + echo "Completed job 2 $(date)" > "$log_2" + fi fi else # Job 2: uncompress tile weights - run_tile_job 2 "Uz" "uncompress_fits_runner:1" + run_job_logged 2 run_tile_job 2 "Uz" "uncompress_fits_runner:1" fi fi (( do_job = job & 4 )) if [[ $do_job != 0 ]]; then # Job 4: find exposures - run_tile_job 4 "Fe" "find_exposures_runner:1" + run_job_logged 4 run_tile_job 4 "Fe" "find_exposures_runner:1" fi (( do_job = job & 8 )) @@ -660,19 +737,19 @@ if [[ $do_job != 0 ]]; then else n_exp=6 fi - run_exp_job 8 "Gie" "get_images_runner:${n_exp}" + run_job_logged 8 run_exp_job 8 "Gie" "get_images_runner:${n_exp}" fi (( do_job = job & 16 )) if [[ $do_job != 0 ]]; then # Job 16: split exposures, get WCS headers - run_exp_job 16 "Sp" "split_exp_runner:121" + run_job_logged 16 run_exp_job 16 "Sp" "split_exp_runner:121" fi (( do_job = job & 32 )) if [[ $do_job != 0 ]]; then # Job 32: mask exposures - run_exp_job 32 "Ma" "mask_runner:40" + run_job_logged 32 run_exp_job 32 "Ma" "mask_runner:40" fi (( do_job = job & 64 )) @@ -683,7 +760,7 @@ if [[ $do_job != 0 ]]; then if [ "$type" == "image_sims" ]; then message "Job 64 (fake PSF) is handled as part of job 512 for image_sims — skipping." "$debug_out" -1 elif [ "$psf" == "psfex" ]; then - run_exp_job 64 "SxSePsf${Letter}i" "sextractor_runner:80 psfex_runner:80 psfex_interp_runner:40::warn setools_runner:80:rand_split" + run_job_logged 64 run_exp_job 64 "SxSePsf${Letter}i" "sextractor_runner:80 psfex_runner:80 psfex_interp_runner:40::warn setools_runner:80:rand_split" else message "MCCD not implemented yet for v2.0" "$debug_out" 10 fi @@ -692,17 +769,17 @@ fi (( do_job = job & 128 )) if [[ $do_job != 0 ]]; then # Job 128: merge exposure WCS headers into tile-level sqlite log - run_tile_job 128 "Mh_exp" "merge_headers_runner:1" + run_job_logged 128 run_tile_job 128 "Mh_exp" "merge_headers_runner:1" fi (( do_job = job & 256 )) if [[ $do_job != 0 ]]; then # Job 256: object selection on tiles if [ "$tile_det" == "uc" ]; then - run_tile_job 256 "Gic Uc" "get_images_runner:2 read_ext_sexcat_runner:1" + run_job_logged 256 run_tile_job 256 "Gic Uc" "get_images_runner:2 read_ext_sexcat_runner:1" else n_exp=2 - run_tile_job 256 "Sx" "sextractor_runner:$n_exp" + run_job_logged 256 run_tile_job 256 "Sx" "sextractor_runner:$n_exp" fi fi @@ -710,24 +787,60 @@ fi if [[ $do_job != 0 ]]; then # Job 512: process tiles ([PSF interp,] vignets) # For image_sims: fake PSF runs first (requires sexcat from job 256), then vignets + log_512="$dir/logs/log_job_${ID}_512.txt" + [ "$force" == "1" ] && rm -f "$log_512" if [ "$type" == "data" ]; then + if [ "$check_only" == "1" ]; then + out=$(run_tile_job 512 "${Letter}iViVi ${Letter}iViVi ${Letter}iViVi" "psfex_interp_runner:1 vignetmaker_runner_run_1:1 vignetmaker_runner_run_2:4" 2>&1) + echo "$out" + echo "$out" | grep -qiE "(Missing|Incomplete)" || echo "$out" > "$log_512" + else run_tile_job 512 "${Letter}iViVi ${Letter}iViVi ${Letter}iViVi" "psfex_interp_runner:1 vignetmaker_runner_run_1:1 vignetmaker_runner_run_2:4" + if [ "$run_and_check" == "1" ]; then + check_only=1; local saved_force_512=$force; force=0 + out=$(run_tile_job 512 "${Letter}iViVi ${Letter}iViVi ${Letter}iViVi" "psfex_interp_runner:1 vignetmaker_runner_run_1:1 vignetmaker_runner_run_2:4" 2>&1) + check_only=0; force=$saved_force_512 + echo "$out" + echo "$out" | grep -qiE "(Missing|Incomplete)" || echo "$out" > "$log_512" + else + echo "Completed job 512 $(date)" > "$log_512" + fi + fi else + if [ "$check_only" == "1" ]; then + out1=$(run_tile_job 64 "fpsf" "fake_psf_runner:1" 2>&1) + out2=$(run_tile_job 512 "ViVi ViVi" "vignetmaker_runner_run_1:1 vignetmaker_runner_run_2:3" 2>&1) + echo "$out1"; echo "$out2" + { echo "$out1"; echo "$out2"; } | grep -qiE "(Missing|Incomplete)" || \ + { echo "$out1"; echo "$out2"; } > "$log_512" + else run_tile_job 64 "fpsf" "fake_psf_runner:1" run_tile_job 512 "ViVi ViVi" "vignetmaker_runner_run_1:1 vignetmaker_runner_run_2:3" + if [ "$run_and_check" == "1" ]; then + check_only=1; local saved_force_512im=$force; force=0 + out1=$(run_tile_job 64 "fpsf" "fake_psf_runner:1" 2>&1) + out2=$(run_tile_job 512 "ViVi ViVi" "vignetmaker_runner_run_1:1 vignetmaker_runner_run_2:3" 2>&1) + check_only=0; force=$saved_force_512im + echo "$out1"; echo "$out2" + { echo "$out1"; echo "$out2"; } | grep -qiE "(Missing|Incomplete)" || \ + { echo "$out1"; echo "$out2"; } > "$log_512" + else + echo "Completed job 512 $(date)" > "$log_512" + fi + fi fi fi (( do_job = job & 1024 )) if [[ $do_job != 0 ]]; then # Job 1024: shape measurement - run_tile_job 1024 "Ng" "ngmix_runner:1" + run_job_logged 1024 run_tile_job 1024 "Ng" "ngmix_runner:1" fi (( do_job = job & 2048 )) if [[ $do_job != 0 ]]; then # Job 2048: merge catalogues - run_tile_job 2048 "Mc_${psf}" "make_cat_runner:1" + run_job_logged 2048 run_tile_job 2048 "Mc_${psf}" "make_cat_runner:1" fi From c4b389723ae47f1fc999ef8ed4b398b7f42e92ab Mon Sep 17 00:00:00 2001 From: martinkilbinger Date: Fri, 26 Jun 2026 13:28:19 +0200 Subject: [PATCH 18/24] config files: change batch size to 1 (default) --- example/cfis_image_sims/config_exp_Ma_onthefly.ini | 2 +- example/cfis_image_sims/config_exp_Sp.ini | 2 +- example/cfis_image_sims/config_tile_Ma_onthefly.ini | 2 +- example/cfis_image_sims/config_tile_Mc_psfex.ini | 2 +- example/cfis_image_sims/config_tile_Mh_exp.ini | 2 +- example/cfis_image_sims/config_tile_Ng_batch_psfex_sx.ini | 2 +- example/cfis_image_sims/config_tile_PiViVi_canfar_sx.ini | 2 +- example/cfis_image_sims/config_tile_Sx_nomask.ini | 2 +- scripts/sh/run_job_sp_canfar_v2.0.bash | 7 ++++++- 9 files changed, 14 insertions(+), 9 deletions(-) diff --git a/example/cfis_image_sims/config_exp_Ma_onthefly.ini b/example/cfis_image_sims/config_exp_Ma_onthefly.ini index f5699d82a..df0307a19 100644 --- a/example/cfis_image_sims/config_exp_Ma_onthefly.ini +++ b/example/cfis_image_sims/config_exp_Ma_onthefly.ini @@ -44,7 +44,7 @@ OUTPUT_DIR = $SP_RUN/output [JOB] # Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial -SMP_BATCH_SIZE = 4 +SMP_BATCH_SIZE = 1 # Timeout value (optional), default is None, i.e. no timeout limit applied TIMEOUT = 96:00:00 diff --git a/example/cfis_image_sims/config_exp_Sp.ini b/example/cfis_image_sims/config_exp_Sp.ini index f21b77aaf..ca010b643 100644 --- a/example/cfis_image_sims/config_exp_Sp.ini +++ b/example/cfis_image_sims/config_exp_Sp.ini @@ -45,7 +45,7 @@ OUTPUT_DIR = $SP_RUN/output [JOB] # Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial -SMP_BATCH_SIZE = 8 +SMP_BATCH_SIZE = 1 # Timeout value (optional), default is None, i.e. no timeout limit applied TIMEOUT = 96:00:00 diff --git a/example/cfis_image_sims/config_tile_Ma_onthefly.ini b/example/cfis_image_sims/config_tile_Ma_onthefly.ini index 2e891246a..0f49eccea 100644 --- a/example/cfis_image_sims/config_tile_Ma_onthefly.ini +++ b/example/cfis_image_sims/config_tile_Ma_onthefly.ini @@ -44,7 +44,7 @@ OUTPUT_DIR = $SP_RUN/output [JOB] # Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial -SMP_BATCH_SIZE = 8 +SMP_BATCH_SIZE = 1 # Timeout value (optional), default is None, i.e. no timeout limit applied TIMEOUT = 96:00:00 diff --git a/example/cfis_image_sims/config_tile_Mc_psfex.ini b/example/cfis_image_sims/config_tile_Mc_psfex.ini index e37b71dbe..728ae89e7 100644 --- a/example/cfis_image_sims/config_tile_Mc_psfex.ini +++ b/example/cfis_image_sims/config_tile_Mc_psfex.ini @@ -44,7 +44,7 @@ OUTPUT_DIR = ./output [JOB] # Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial -SMP_BATCH_SIZE = 8 +SMP_BATCH_SIZE = 1 # Timeout value (optional), default is None, i.e. no timeout limit applied TIMEOUT = 96:00:00 diff --git a/example/cfis_image_sims/config_tile_Mh_exp.ini b/example/cfis_image_sims/config_tile_Mh_exp.ini index d213c0776..94270a7e6 100644 --- a/example/cfis_image_sims/config_tile_Mh_exp.ini +++ b/example/cfis_image_sims/config_tile_Mh_exp.ini @@ -47,7 +47,7 @@ OUTPUT_DIR = $SP_RUN/output [JOB] # Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial -SMP_BATCH_SIZE = 16 +SMP_BATCH_SIZE = 1 # Timeout value (optional), default is None, i.e. no timeout limit applied TIMEOUT = 96:00:00 diff --git a/example/cfis_image_sims/config_tile_Ng_batch_psfex_sx.ini b/example/cfis_image_sims/config_tile_Ng_batch_psfex_sx.ini index 3e5690531..ae08976a2 100644 --- a/example/cfis_image_sims/config_tile_Ng_batch_psfex_sx.ini +++ b/example/cfis_image_sims/config_tile_Ng_batch_psfex_sx.ini @@ -44,7 +44,7 @@ OUTPUT_DIR = $SP_RUN/output [JOB] # Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial -SMP_BATCH_SIZE = 24 +SMP_BATCH_SIZE = 1 # Timeout value (optional), default is None, i.e. no timeout limit applied TIMEOUT = 96:00:00 diff --git a/example/cfis_image_sims/config_tile_PiViVi_canfar_sx.ini b/example/cfis_image_sims/config_tile_PiViVi_canfar_sx.ini index fb9b3c46a..025097f81 100644 --- a/example/cfis_image_sims/config_tile_PiViVi_canfar_sx.ini +++ b/example/cfis_image_sims/config_tile_PiViVi_canfar_sx.ini @@ -46,7 +46,7 @@ OUTPUT_DIR = $SP_RUN/output [JOB] # Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial -SMP_BATCH_SIZE = 16 +SMP_BATCH_SIZE = 1 # Timeout value (optional), default is None, i.e. no timeout limit applied TIMEOUT = 96:00:00 diff --git a/example/cfis_image_sims/config_tile_Sx_nomask.ini b/example/cfis_image_sims/config_tile_Sx_nomask.ini index 62d1bffed..810614c5d 100644 --- a/example/cfis_image_sims/config_tile_Sx_nomask.ini +++ b/example/cfis_image_sims/config_tile_Sx_nomask.ini @@ -45,7 +45,7 @@ OUTPUT_DIR = $SP_RUN/output [JOB] # Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial -SMP_BATCH_SIZE = 16 +SMP_BATCH_SIZE = 1 # Timeout value (optional), default is None, i.e. no timeout limit applied TIMEOUT = 96:00:00 diff --git a/scripts/sh/run_job_sp_canfar_v2.0.bash b/scripts/sh/run_job_sp_canfar_v2.0.bash index c2104394d..b555077c4 100755 --- a/scripts/sh/run_job_sp_canfar_v2.0.bash +++ b/scripts/sh/run_job_sp_canfar_v2.0.bash @@ -758,7 +758,12 @@ if [[ $do_job != 0 ]]; then # For image_sims: fake PSF runs as part of job 512 (requires sexcat from job 256) # For data: run full exposure-level PSF modelling pipeline if [ "$type" == "image_sims" ]; then - message "Job 64 (fake PSF) is handled as part of job 512 for image_sims — skipping." "$debug_out" -1 + # Fake PSF is handled inside job 512; write placeholder log so the sequence is complete + log_64="$dir/logs/log_job_${ID}_64.txt" + [ "$force" == "1" ] && rm -f "$log_64" + msg="Complete: job 64 placeholder (fake PSF runs as part of job 512)" + message "$msg" "$debug_out" -1 + echo "$msg" > "$log_64" elif [ "$psf" == "psfex" ]; then run_job_logged 64 run_exp_job 64 "SxSePsf${Letter}i" "sextractor_runner:80 psfex_runner:80 psfex_interp_runner:40::warn setools_runner:80:rand_split" else From 995eb6aac654c0ea00063a0f71c0c3ba891938da Mon Sep 17 00:00:00 2001 From: martinkilbinger Date: Fri, 26 Jun 2026 13:29:06 +0200 Subject: [PATCH 19/24] snakemake setup for image simulations --- docs/source/image_sims_calibration.md | 301 ++++++++++++++++++ scripts/image_sims_pipeline/README.md | 80 +++++ scripts/image_sims_pipeline/Snakefile | 193 +++++++++++ .../image_sims_pipeline/config.yaml.template | 34 ++ scripts/image_sims_pipeline/info.py | 230 +++++++++++++ scripts/image_sims_pipeline/monitor_mbias.py | 264 +++++++++++++++ scripts/sh/apptainer_noslurm.sh | 10 + 7 files changed, 1112 insertions(+) create mode 100644 docs/source/image_sims_calibration.md create mode 100644 scripts/image_sims_pipeline/README.md create mode 100644 scripts/image_sims_pipeline/Snakefile create mode 100644 scripts/image_sims_pipeline/config.yaml.template create mode 100644 scripts/image_sims_pipeline/info.py create mode 100644 scripts/image_sims_pipeline/monitor_mbias.py create mode 100755 scripts/sh/apptainer_noslurm.sh diff --git a/docs/source/image_sims_calibration.md b/docs/source/image_sims_calibration.md new file mode 100644 index 000000000..809924adf --- /dev/null +++ b/docs/source/image_sims_calibration.md @@ -0,0 +1,301 @@ +# Image Simulations Calibration Pipeline + +## Overview + +The image simulations calibration pipeline derives multiplicative (m) and additive (c) shear bias corrections from synthetic image simulations. It uses ShapePipe to measure galaxy shapes in five pre-sheared grids, then computes bias by comparing measurements across shear directions. + +**Key outputs:** +- `m_bias_results.yaml` / `m_bias_results.txt` — final m₁, m₂, c₁, c₂ bias estimates with bootstrap errors +- `mbias_cumulative.yaml` — convergence history (m/c as function of n_tiles) +- `mbias_convergence.png` — m and c vs tile count (2 panels) +- `mbias_errors.png` — error shrinkage vs tile count (2 panels) + +--- + +## Quick Start + +### Prerequisites +- ShapePipe container: `/n17data/mkilbing/shapepipe_im_sims.sif` +- SP Validation container: `/n17data/mkilbing/sp_validation_im_sims.sif` +- Pipeline scripts: `~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/` + +### Setup (one time) + +```bash +# Copy template config to run directory +cp ~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/config.yaml.template \ + /n17data/mkilbing/astro/Runs/shapepipe/CFIS/v2.0/image_sims/config.yaml + +# Edit config.yaml with your tile IDs, grid number, etc. +``` + +### Run from the run directory + +**All snakemake commands run from the image_sims run directory:** + +```bash +cd /n17data/mkilbing/astro/Runs/shapepipe/CFIS/v2.0/image_sims/ + +# Full pipeline to calibrated catalogues +snakemake -s ~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/Snakefile \ + --configfile config.yaml -j 5 calibrate_all + +# M-bias computation with convergence tracking +snakemake -s ~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/Snakefile \ + --configfile config.yaml -j 1 mbias + +# Monitor progress +~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/info.py -m -v + +# Incremental m-bias as tiles finish +~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/monitor_mbias.py -v +``` + +--- + +## Configuration + +### Snakemake Config: `config.yaml` + +```yaml +# Run directory +base: /n17data/mkilbing/astro/Runs/shapepipe/CFIS/v2.0/image_sims + +# Tile IDs (all tiles in the survey, or subset for testing) +tile_IDs: ["254.286", "254.287"] + +# catalogue subversion (mask config: mask_v1.X.{sample}_im_sim.yaml) +sample: 9 + +# Simulation type (grid or others) +type: grid + +# Grid number (e.g., grid_2) +num: 2 + +# SMP batch size per ShapePipe job (-1 = use container default) +n_smp: -1 +``` + +### M-bias Calibration Settings + +M-bias computation parameters are hard-coded in the Snakefile `mbias` rule: + +```python +shear_amplitude: 0.02 # Input shear amplitude (absolute value) +match_radius_deg: 0.0002 # Position matching radius for cross-matching +w_col: w_des # Weight column name +n_bootstrap: 500 # Bootstrap resamples for error estimation +catalog_name: shape_catalog_cut_ngmix.fits +``` + +To modify these parameters for a different run, edit the `mbias` rule in the Snakefile. + +--- + +## Pipeline Stages + +The pipeline runs in order, with each stage dependent on the previous: + +### 1. `init_all` — Initialize run directories + +Creates per-sim directories, parameter files (`params.py`), and mask configs. + +**Outputs:** `{grids}/{sim}/params.py`, `{grids}/{sim}/config_mask.yaml` + +### 2. `pipeline_all` — Run ShapePipe on all tiles + +Executes ShapePipe's full job sequence (bits 1→2→4→...→2048) for each tile in each simulation. + +**Note:** `-j 5` means 5 concurrent Snakemake jobs (tile×sim pairs run sequentially within each job). + +**Outputs:** ShapePipe catalogues, logs at `{grids}/{sim}/logs/log_job_{tile}_{bit}.txt` + +**Time:** ~hours per grid depending on tile count and cluster load. + +### 3. `merge_all` — Merge ShapePipe tile outputs + +Combines per-tile catalogs into a single HDF5 file per simulation using `create_final_cat.py`. + +**Outputs:** `{grids}/{sim}/final_cat_{sim}.hdf5` (HDF5 with tile counts as attributes) + +### 4. `extract_all` — Extract comprehensive catalogues + +Reads merged HDF5 and extracts shape information, PSF quantities, and pre-calibration columns using `extract_info.py`. + +**Outputs:** `{grids}/{sim}/shape_catalog_comprehensive_ngmix.hdf5` (with `n_tiles` attribute in header) + +### 5. `calibrate_all` — Apply cuts and calibrate + +Applies selection masks (flags, magnitude, signal-to-noise) and computes shear calibration (m/c per object) using `calibrate_comprehensive_cat.py`. + +**Outputs:** `{grids}/{sim}/shape_catalog_cut_ngmix.fits` (final catalogue, ready for m-bias) + +### 6. `mbias` — Compute m-bias with convergence tracking + +Computes m₁, m₂, c₁, c₂ from the five shear pairs. With `--cumulative`, tracks convergence as tile counts grow. + +**Outputs:** +- `results/m_bias_results.yaml` — final results (YAML) +- `results/m_bias_results.txt` — final results (human-readable text) +- `results/mbias_cumulative.yaml` — convergence history +- `results/mbias_convergence.png` — m/c vs n_tiles (2 panels) +- `results/mbias_errors.png` — error convergence (2 panels) + +--- + +## Monitoring Convergence + +### Live Status: `info.py` + +Monitor pipeline progress with a status table: + +```bash +cd /path/to/run/image_sims +~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/info.py -m -v +``` + +**Status table (example output):** + +| Simulation | #Jobs | Job Bits | Merge | Extract | Calibrate | +|-----------|-------|----------|-------|---------|-----------| +| 1m2z_grid_2 | 12/12 | ✓✓✓✓✓✓✓✓✓✓✓✓ | 2 | ✓ | ✓ | +| 1p2z_grid_2 | 12/12 | ✓✓✓✓✓✓✓✓✓✓✓✓ | 2 | ✓ | ✓ | +| 1z2m_grid_2 | 12/12 | ✓✓✓✓✓✓✓✓✓✓✓✓ | 2 | ✓ | ✓ | +| 1z2p_grid_2 | 12/12 | ✓✓✓✓✓✓✓✓✓✓✓✓ | 2 | ✓ | ✓ | +| 1z2z_grid_2 | 12/12 | ✓✓✓✓✓✓✓✓✓✓✓✓ | 2 | ✓ | ✓ | + +**Column meanings:** +- **#Jobs** — ShapePipe job completion count (max 12 bits) +- **Job Bits** — per-bit status (✓ = all tiles done, · = incomplete) +- **Merge** — tile count in final HDF5 catalogue +- **Extract** — comprehensive catalog status +- **Calibrate** — calibrated catalogue status + +### Incremental M-bias: `monitor_mbias.py` + +Recompute m-bias each time new tiles finish all ShapePipe jobs. Automatically detects completed tiles and updates convergence tracking. + +```bash +cd /path/to/run/image_sims +~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/monitor_mbias.py -v +``` + +Run this repeatedly as tiles complete to watch m-bias converge. + +--- + +## Results Interpretation + +### M-Bias Results Files + +**YAML format** (`m_bias_results.yaml`): +```yaml +m1: -0.7909173336321427 +m1_err: 0.27407603929732716 +c1: -0.0030413689177988708 +c1_err: 0.005899515490890933 +m2: -1.350016719584532 +m2_err: 0.20884008300375373 +c2: -0.004950048084875509 +c2_err: 0.004208875641566414 +``` + +**Text format** (`m_bias_results.txt`): +``` +Multiplicative and additive shear bias from image simulations +============================================================ + +m1 = -0.790917 ± 0.274076 +c1 = -0.003041 ± 0.005900 + +m2 = -1.350017 ± 0.208840 +c2 = -0.004950 ± 0.004209 + +Errors computed via bootstrap resampling (n=500 resamples) +``` + +### Convergence History (`mbias_cumulative.yaml`) + +Tracks m/c evolution as tile count grows: + +```yaml +'2': + c1: -0.0030413689177988708 + c1_err: 0.005899515490890933 + m1: -0.7909173336321427 + m1_err: 0.27407603929732716 + # ... (c2, m2, errors) +'4': + c1: -0.0025... + # ... (more tiles) +``` + +### Understanding Error Bars + +Errors are computed via **bootstrap resampling**: + +1. Draw N=500 random resamples (with replacement) from measured galaxy ellipticities +2. Recompute m and c for each resample +3. Error = standard deviation of the bootstrap distribution + +This captures: +- **Photometric noise** — measurement uncertainties per galaxy +- **Cosmic variance** — shape correlations across the sky +- **Calibration uncertainties** — from shear responsivity scatter + +**Error shrinkage:** Error ∝ 1/√(n_tiles). More tiles → tighter constraints. + +### Convergence Plots + +**`mbias_convergence.png`** — m and c with error bars vs n_tiles +- Left panel: multiplicative bias (m₁, m₂) with error bars +- Right panel: additive bias (c₁, c₂) with error bars +- Shows systematic trends and statistical uncertainties + +**`mbias_errors.png`** — error shrinkage vs n_tiles +- Left panel: m error bars (m₁, m₂) only +- Right panel: c error bars (c₁, c₂) only +- Shows constraint tightening as data accumulates +- Useful for deciding when m-bias is "converged" (errors small enough for science) + +--- + +## File Structure + +``` +/n17data/mkilbing/astro/Runs/shapepipe/CFIS/v2.0/image_sims/ +├── config.yaml # Run configuration +├── .snakemake/ # Snakemake metadata +├── logs/ # Snakemake logs +├── grids/ +│ ├── {sim}_grid_2/ +│ │ ├── logs/ # ShapePipe job logs +│ │ ├── tiles/ # Per-tile ShapePipe outputs +│ │ ├── final_cat_{sim}.hdf5 # Merged catalogue +│ │ ├── shape_catalog_comprehensive_ngmix.hdf5 +│ │ └── shape_catalog_cut_ngmix.fits +│ └── results/ +│ ├── m_bias_results.yaml +│ ├── m_bias_results.txt +│ ├── mbias_cumulative.yaml +│ ├── mbias_convergence.png +│ └── mbias_errors.png +└── monitoring/ # Incremental m-bias workspace + └── {sim}/ + ├── final_cat_{sim}.hdf5 + ├── shape_catalog_comprehensive_ngmix.hdf5 + └── shape_catalog_cut_ngmix.fits +``` + +--- + +## References + +- **Pipeline code:** `~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/` +- **ShapePipe:** `~/astro/repositories/github/shapepipe/` +- **SP Validation:** `~/astro/repositories/github/sp_validation/` + +--- + +*Last updated: 2026-06-26* diff --git a/scripts/image_sims_pipeline/README.md b/scripts/image_sims_pipeline/README.md new file mode 100644 index 000000000..1eb1ff2ae --- /dev/null +++ b/scripts/image_sims_pipeline/README.md @@ -0,0 +1,80 @@ +# Image Simulations Calibration Pipeline + +Snakemake workflow for deriving shear m-bias from image simulations. + +## Setup + +### 1. Create run directory and copy config + +```bash +mkdir -p /path/to/run/image_sims +cd /path/to/run/image_sims + +# Copy template config and edit for your run +cp ~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/config.yaml.template \ + config.yaml + +# Edit config.yaml with your tile IDs, grid number, etc. +``` + +### 2. Run Snakemake from the run directory + +```bash +cd /path/to/run/image_sims + +# Full pipeline +snakemake -s ~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/Snakefile \ + --configfile config.yaml -j 5 calibrate_all + +# M-bias computation +snakemake -s ~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/Snakefile \ + --configfile config.yaml -j 1 mbias +``` + +## Scripts + +**Snakefile** — Main pipeline definition +**info.py** — Monitor pipeline progress with status table +**monitor_mbias.py** — Incrementally compute m-bias as tiles finish +**config.yaml.template** — Configuration template (copy and edit for each run) + +## Usage Examples + +### Monitor progress (from run directory) + +```bash +cd /path/to/run/image_sims +~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/info.py -m -v +``` + +### Incremental m-bias computation + +```bash +cd /path/to/run/image_sims +~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/monitor_mbias.py -v +``` + +Run this repeatedly while `pipeline_all` is in progress to watch m-bias converge. + +## Output Structure + +``` +/path/to/run/image_sims/ +├── config.yaml +├── .snakemake/ +├── logs/ # Snakemake logs +├── grids/ +│ ├── 1m2z_grid_2/ +│ ├── ... +│ └── results/ +│ ├── m_bias_results.yaml +│ ├── m_bias_results.txt +│ ├── mbias_cumulative.yaml +│ ├── mbias_convergence.png +│ └── mbias_errors.png +└── monitoring/ # Temp workspace for incremental m-bias +``` + +## Documentation + +Full documentation: `~/astro/repositories/github/shapepipe/docs/source/image_sims_calibration.md` diff --git a/scripts/image_sims_pipeline/Snakefile b/scripts/image_sims_pipeline/Snakefile new file mode 100644 index 000000000..94ab455a3 --- /dev/null +++ b/scripts/image_sims_pipeline/Snakefile @@ -0,0 +1,193 @@ +"""Snakemake workflow for image-simulation pipeline. + +Run from the run directory: + snakemake -s ~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/Snakefile --configfile config.yaml -j 5 + snakemake -s ~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/Snakefile --configfile config.yaml -j 1 --dry-run +""" + +import os + +# ---------- configuration ---------- +configfile: "config.yaml" + +BASE = config["base"] +TILE_IDs = config.get("tile_IDs", [config.get("tile_ID", "254.286")]) +SAMPLE = str(config["sample"]) +NUM = config["num"] +TYPE = config["type"] +FORCE = "--force" if config.get("force", 0) else "" +N_SMP = config.get("n_smp", -1) + +JOB_SEQUENCE = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048] +JOB = sum(JOB_SEQUENCE) + +PSF = "psfex" + +STR_TYPE = f"_{TYPE}" if TYPE == "grid" else "" +SIMS = [ + f"1{dg1}2{dg2}{STR_TYPE}_{NUM}" + for dg1, dg2 in zip(["m", "p", "z", "z", "z"], + ["z", "z", "m", "p", "z"]) +] +GRIDS = f"{BASE}/{TYPE}s" + +HOME = os.path.expanduser("~") +PATH_SH = f"{HOME}/astro/repositories/github/shapepipe/scripts/sh" +PATH_PY = f"{HOME}/astro/repositories/github/shapepipe/scripts/python" +PATH_NB = f"{HOME}/astro/repositories/github/sp_validation/notebooks" +PATH_SPV = f"{HOME}/astro/repositories/github/sp_validation" + +SP_SIF = "/n17data/mkilbing/shapepipe_im_sims.sif" +SPV_SIF = "/n17data/mkilbing/sp_validation_im_sims.sif" +SPV_SRC = f"{HOME}/astro/repositories/github/sp_validation/src" + +# ShapePipe pipeline stages (uses shapepipe_im_sims.sif — correct MPI build) +_APPTAINER = f"{PATH_SH}/apptainer_noslurm.sh" +APP_CMD = f"{_APPTAINER} --bind /n17data,/n09data,/home {SP_SIF}" + +# sp_validation stages (uses sp_validation_im_sims.sif with local src override) +SPV_CMD = ( + f"{_APPTAINER} --bind /n17data,/n09data,/home" + f" --env PYTHONPATH={SPV_SRC}" + f" {SPV_SIF}" +) +MBIAS_OUT = f"{GRIDS}/results/m_bias_results.yaml" +DIAG_OUT = f"{GRIDS}/results/footprint.png" + +# ---------- targets ---------- +rule all: + input: + expand(f"{GRIDS}/{{sim}}/shape_catalog_cut_ngmix.fits", sim=SIMS) + +# ---------- convenience targets (run in order) ---------- +# 1. init_all +# 2. pipeline_all +# 3. merge_all +# 4. extract_all +# 5. calibrate_all +# 6. mbias +# 7. diagnostics + +rule init_all: + input: + expand(f"{GRIDS}/{{sim}}/params.py", sim=SIMS) + +rule pipeline_all: + input: + expand( + f"{GRIDS}/{{sim}}/logs/log_job_{{tile}}_{JOB_SEQUENCE[-1]}.txt", + sim=SIMS, tile=TILE_IDs + ) + +rule merge_all: + input: + expand(f"{GRIDS}/{{sim}}/final_cat_{{sim}}.hdf5", sim=SIMS) + +rule extract_all: + input: + expand(f"{GRIDS}/{{sim}}/shape_catalog_comprehensive_ngmix.hdf5", sim=SIMS) + +rule calibrate_all: + input: + expand(f"{GRIDS}/{{sim}}/shape_catalog_cut_ngmix.fits", sim=SIMS) + +# ---------- rules ---------- +rule init: + output: + params = f"{GRIDS}/{{sim}}/params.py", + mask = f"{GRIDS}/{{sim}}/config_mask.yaml", + shell: + f"{APP_CMD} {PATH_SH}/init_run_v2.0.sh" + f" -t image_sims -d {GRIDS} -s {{wildcards.sim}} -S {SAMPLE}" + +def _shapepipe_job_input(wildcards): + bit = int(wildcards.bit) + tile = wildcards.tile + bit_idx = JOB_SEQUENCE.index(bit) + tile_idx = TILE_IDs.index(tile) + + if bit_idx == 0: + if tile_idx == 0: + # First tile, first bit: wait for init + return [ + f"{GRIDS}/{wildcards.sim}/params.py", + f"{GRIDS}/{wildcards.sim}/config_mask.yaml", + ] + else: + # Later tile, first bit: wait for last bit of previous tile + prev_tile = TILE_IDs[tile_idx - 1] + return f"{GRIDS}/{wildcards.sim}/logs/log_job_{prev_tile}_{JOB_SEQUENCE[-1]}.txt" + else: + # Same tile, previous bit + return f"{GRIDS}/{wildcards.sim}/logs/log_job_{tile}_{JOB_SEQUENCE[bit_idx - 1]}.txt" + +rule shapepipe_job: + input: _shapepipe_job_input + output: f"{GRIDS}/{{sim}}/logs/log_job_{{tile}}_{{bit}}.txt" + wildcard_constraints: + bit = "|".join(str(b) for b in JOB_SEQUENCE), + tile = "|".join(t.replace(".", r"\.") for t in TILE_IDs), + shell: + f"cd {GRIDS}/{{wildcards.sim}} &&" + f" {APP_CMD} {PATH_SH}/run_job_sp_canfar_v2.0.bash" + f" {FORCE} -N {N_SMP} -e {{wildcards.tile}} -t image_sims -j {{wildcards.bit}}" + +rule merge: + input: + expand( + f"{GRIDS}/{{{{sim}}}}/logs/log_job_{{tile}}_{JOB_SEQUENCE[-1]}.txt", + tile=TILE_IDs + ) + output: + f"{GRIDS}/{{sim}}/final_cat_{{sim}}.hdf5" + shell: + f"cd {GRIDS}/{{wildcards.sim}} &&" + f" {SPV_CMD} python {PATH_PY}/create_final_cat.py" + f" -I -m final_cat_{{wildcards.sim}}.hdf5" + f" -i .. -p cfis/final_cat.param -P {{wildcards.sim}}" + f" -o n_tiles_final.txt -v" + +rule extract: + input: + cat = f"{GRIDS}/{{sim}}/final_cat_{{sim}}.hdf5", + params = f"{GRIDS}/{{sim}}/params.py", + output: + f"{GRIDS}/{{sim}}/shape_catalog_comprehensive_ngmix.hdf5" + shell: + f"cd {GRIDS}/{{wildcards.sim}} &&" + f" {SPV_CMD} python {PATH_NB}/extract_info.py" + +rule calibrate: + input: + cat = f"{GRIDS}/{{sim}}/shape_catalog_comprehensive_ngmix.hdf5", + mask = f"{GRIDS}/{{sim}}/config_mask.yaml", + output: + f"{GRIDS}/{{sim}}/shape_catalog_cut_ngmix.fits" + shell: + f"cd {GRIDS}/{{wildcards.sim}} &&" + f" {SPV_CMD} python {PATH_SPV}/notebooks/calibrate_comprehensive_cat.py -s calibrate" + +rule diagnostics: + input: + expand(f"{GRIDS}/{{sim}}/shape_catalog_cut_ngmix.fits", sim=SIMS) + output: + DIAG_OUT + shell: + f"{SPV_CMD} python {PATH_SPV}/scripts/diagnostics_image_sims.py" + f" -c {MBIAS_CFG} -v" + +rule mbias: + input: + expand(f"{GRIDS}/{{sim}}/shape_catalog_cut_ngmix.fits", sim=SIMS) + output: + MBIAS_OUT, + f"{GRIDS}/results/mbias_cumulative.yaml", + shell: + f"mkdir -p {GRIDS}/results && " + f"python -c \"import yaml; " + f"cfg = {{'grids_dir': '{GRIDS}', 'num': {NUM}, 'catalog_name': 'shape_catalog_cut_ngmix.fits', " + f"'shear_amplitude': 0.02, 'match_radius_deg': 0.0002, 'w_col': 'w_des', 'n_bootstrap': 500, " + f"'output_path': '{GRIDS}/results/m_bias_results.yaml', 'results_dir': '{GRIDS}/results'}}; " + f"yaml.dump(cfg, open('/tmp/mbias_cfg.yaml', 'w'))\" && " + f"{SPV_CMD} python {PATH_SPV}/scripts/compute_m_bias_image_sims.py" + f" -c /tmp/mbias_cfg.yaml -v --cumulative" diff --git a/scripts/image_sims_pipeline/config.yaml.template b/scripts/image_sims_pipeline/config.yaml.template new file mode 100644 index 000000000..ed31223e9 --- /dev/null +++ b/scripts/image_sims_pipeline/config.yaml.template @@ -0,0 +1,34 @@ +# Basic configuration + +## Run directory +base: /n17data/mkilbing/astro/Runs/shapepipe/CFIS/v2.0/image_sims + +## Tile IDs +tile_IDs: ["254.286", "254.287", "254.288", "254.290"] + +## catalogue subversion +sample: 9 + +## simulation type +type: grid + +## Grid number +num: 2 + +## SMP batch size per ShapePipe job (-1 = use config file default = 1) +n_smp: -1 + + +## Calibration + +## Input shear amplitude +shear_amplitude: 0.02 + +## Position matching (for grids) +match_radius_deg: 0.0002 + +## Catalogue columns +w_col: w_des + +## Bootstrap error estimation +n_bootstrap: 500 diff --git a/scripts/image_sims_pipeline/info.py b/scripts/image_sims_pipeline/info.py new file mode 100644 index 000000000..69d9573ca --- /dev/null +++ b/scripts/image_sims_pipeline/info.py @@ -0,0 +1,230 @@ +"""Info script for image-simulation Snakemake pipeline.""" + +import sys +import os +import math +import yaml +import subprocess + + +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +SNAKEFILE = os.path.join(SCRIPT_DIR, "Snakefile") +CONFIGFILE = "config.yaml" +BASE_CMD = f"snakemake -s {SNAKEFILE} --configfile {CONFIGFILE}" + + +def load_config(): + with open(CONFIGFILE) as f: + return yaml.safe_load(f) + + +def get_hdf5_tile_count(hdf5_path): + """Query tile count from HDF5 file using create_final_cat.py -l. + + Returns the count as a string, or "." if file doesn't exist or query fails. + """ + if not os.path.isfile(hdf5_path): + return "." + + try: + HOME = os.path.expanduser("~") + script = f"{HOME}/astro/repositories/github/shapepipe/scripts/python/create_final_cat.py" + result = subprocess.run( + [script, "-I", "-l", "-m", hdf5_path, "-v"], + capture_output=True, + text=True, + timeout=10, + ) + + if result.returncode != 0: + return "?" + + for line in result.stdout.split('\n'): + if line.startswith("Total:"): + parts = line.split() + if len(parts) >= 2: + return parts[1] + + return "?" + except Exception: + return "?" + + +def get_extract_tile_count(sim_path): + """Read tile count from shape_catalog_comprehensive_ngmix.hdf5 attributes. + + Returns the count as a string, or "." if file doesn't exist or count not found. + """ + try: + import h5py + except ImportError: + return "?" + + hdf5_path = os.path.join(sim_path, "shape_catalog_comprehensive_ngmix.hdf5") + if not os.path.isfile(hdf5_path): + return "." + + try: + with h5py.File(hdf5_path, 'r') as hf: + if 'n_tiles' in hf.attrs: + return str(hf.attrs['n_tiles']) + return "." + except Exception: + return "?" + + +def monitor(cfg, verbose=0): + base = cfg["base"] + num = cfg["num"] + sim_type = cfg["type"] + tile_ids = cfg.get("tile_IDs", [cfg.get("tile_ID", "?")]) + n_tiles = len(tile_ids) + job_seq = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048] + job_hex = [format(int(math.log2(j)), 'X') for j in job_seq] + + str_type = f"_{sim_type}" if sim_type == "grid" else "" + sims = [ + f"1{d1}2{d2}{str_type}_{num}" + for d1, d2 in zip(["m","p","z","z","z"], ["z","z","m","p","z"]) + ] + grids = f"{base}/{sim_type}s" + + col_w = max(len(s) for s in sims) + 2 + downstream = [ + ("merge", "final_cat_{sim}.hdf5"), + ("extract", "shape_catalog_comprehensive_ngmix.hdf5"), + ("calibrate","shape_catalog_cut_ngmix.fits"), + ] + ds_w = max(len(s) for s, _ in downstream) + + tile_label = f"({n_tiles} tile{'s' if n_tiles > 1 else ''})" + if verbose >= 1: + mid_head = " ".join(job_hex) + else: + mid_head = "".join(job_hex) + bits_w = len(mid_head) + header1 = f" {'sim':<{col_w}} {'#jobs':>12} {'2^n':<{bits_w}} " + \ + " ".join(f"{s:<{ds_w}}" for s, _ in downstream) + header2 = f" {'':^{col_w}} {'':>12} {mid_head}" + print(f" {tile_label}") + print(header1) + print(header2) + print("-" * len(header1)) + + for sim in sims: + logs_dir = os.path.join(grids, sim, "logs") + + # For each job bit, count how many tiles have the log file + tile_counts = {} + for j in job_seq: + tile_counts[j] = sum( + 1 for t in tile_ids + if os.path.isfile(os.path.join(logs_dir, f"log_job_{t}_{j}.txt")) + ) + + jobs_sum = sum(1 for j in job_seq if tile_counts[j] == n_tiles) + bits_str = "".join("o" if tile_counts[j] == n_tiles else "." for j in job_seq) + + ds_status = [] + for idx, (name, pattern) in enumerate(downstream): + path = os.path.join(grids, sim, pattern.format(sim=sim)) + sim_path = os.path.join(grids, sim) + if name == "merge": + ds_status.append(get_hdf5_tile_count(path)) + elif name == "extract": + ds_status.append(get_extract_tile_count(sim_path)) + else: + ds_status.append("o" if os.path.isfile(path) else "") + + if verbose >= 1: + counts_str = " ".join(f"{tile_counts[j]}" for j in job_seq) # aligns with header 0 1 2 ... + row = f" {sim:<{col_w}} {jobs_sum:>12} {counts_str} " + \ + " ".join(f"{s:<{ds_w}}" for s in ds_status) + else: + row = f" {sim:<{col_w}} {jobs_sum:>12} {bits_str} " + \ + " ".join(f"{s:<{ds_w}}" for s in ds_status) + print(row) + + +def main(): + monitor_mode = "-m" in sys.argv or "--monitor" in sys.argv + verbose = sys.argv.count("-v") + sys.argv.count("--verbose") + \ + (1 if "-vv" in sys.argv else 0) * 2 + + cfg = load_config() + + if monitor_mode: + monitor(cfg, verbose=verbose) + return + + base = cfg["base"] + tile_ids = cfg.get("tile_IDs", [cfg.get("tile_ID", "?")]) + sample = cfg["sample"] + num = cfg["num"] + sim_type = cfg["type"] + job_seq = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048] + + str_type = f"_{sim_type}" if sim_type == "grid" else "" + sims = [ + f"1{d1}2{d2}{str_type}_{num}" + for d1, d2 in zip(["m","p","z","z","z"], ["z","z","m","p","z"]) + ] + grids = f"{base}/{sim_type}s" + + print("=" * 70) + print(" Image simulations pipeline") + print("=" * 70) + print() + print(f" Config : {CONFIGFILE}") + print(f" Base : {base}") + print(f" Tile IDs : {tile_ids}") + print(f" Sample : {sample} (mask config mask_v1.X.{sample}_im_sim.yaml)") + print(f" Run num : {num}") + print(f" Sims : {', '.join(sims)}") + print(f" Job seq : {job_seq}") + print() + + print("# Basic command") + print() + print(f" {BASE_CMD} -j N_CPU ") + print() + + print("# Target order") + print() + stages = [ + ("init_all", "initialise run directories"), + ("pipeline_all", f"run full job sequence {job_seq} in order"), + ("merge_all", "merge ShapePipe output into final_cat_.hdf5"), + ("extract_all", "extract comprehensive shape catalogue (HDF5)"), + ("calibrate_all", "apply masks and calibrate (= all)"), + ("diagnostics", "compute and plot diagnostics"), + ("mbias", "compute multiplicative biases") + ] + for target, desc in stages: + print(f" {target:<16} {desc}") + print() + + print("# Useful options") + print() + print(f" -p print shell commands") + print(f" --config 'force=1' pass --force to run_job_sp_canfar_v2.0.bash") + print(f" --forcerun pipeline_all force Snakemake to rerun all pipeline jobs") + print(f" --config 'job=J' target a specific job bit J") + print(f" --rerun-incomplete if errors occur due to previous interrupted run") + print() + + print("# Monitor progress") + print() + print(f" python info_image_sims.py -m") + print() + + print("# Run a single shapepipe job for one sim") + print() + print(f" {BASE_CMD} -j 1 \\") + print(f" {grids}//logs/log_job_J.txt") + print() + + + +if __name__ == "__main__": + main() diff --git a/scripts/image_sims_pipeline/monitor_mbias.py b/scripts/image_sims_pipeline/monitor_mbias.py new file mode 100644 index 000000000..000cf8067 --- /dev/null +++ b/scripts/image_sims_pipeline/monitor_mbias.py @@ -0,0 +1,264 @@ +#!/usr/bin/env python +"""Monitor m-bias convergence as pipeline_all tiles complete. + +For tiles that have finished all ShapePipe jobs (bit 2048) in every sim, +runs merge/extract/calibrate in a monitoring subdirectory, then computes +m1, m2, c1, c2 and reports them alongside the tile count. + +Run repeatedly while pipeline_all is in progress: merge is incremental +(adds new tiles, skips tiles already in the HDF5), so each run picks up +newly finished tiles automatically. + +Usage (from run directory): + python ~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/monitor_mbias.py [-c config.yaml] [-v] +""" + +import sys +import os +import subprocess +import shutil +import yaml +import argparse + + +CONFIGFILE = "config_image_sims.yaml" +JOB_LAST = 2048 + +HOME = os.path.expanduser("~") +PATH_SH = f"{HOME}/astro/repositories/github/shapepipe/scripts/sh" +PATH_PY = f"{HOME}/astro/repositories/github/shapepipe/scripts/python" +PATH_NB = f"{HOME}/astro/repositories/github/sp_validation/notebooks" +PATH_SPV = f"{HOME}/astro/repositories/github/sp_validation" +SPV_SRC = f"{PATH_SPV}/src" +SPV_SIF = "/n17data/mkilbing/sp_validation_im_sims.sif" +APPTAINER = f"{PATH_SH}/apptainer_noslurm.sh" + +SPV_PREFIX = [ + APPTAINER, + "--bind", "/n17data,/n09data,/home", + "--env", f"PYTHONPATH={SPV_SRC}", + SPV_SIF, + "python", +] + + +def parse_args(): + p = argparse.ArgumentParser(description=__doc__) + p.add_argument( + "-c", "--config", default=CONFIGFILE, + help=f"Snakemake config YAML (default: {CONFIGFILE})" + ) + p.add_argument("-v", "--verbose", action="store_true") + return p.parse_args() + + +def load_config(path): + with open(path) as f: + return yaml.safe_load(f) + + +def sims_from_config(cfg): + num = cfg["num"] + str_type = f"_{cfg['type']}" if cfg["type"] == "grid" else "" + return [ + f"1{d1}2{d2}{str_type}_{num}" + for d1, d2 in zip(["m", "p", "z", "z", "z"], + ["z", "z", "m", "p", "z"]) + ] + + +def done_tiles(grids, sims, tile_ids): + """Return tiles that have log_job_{tile}_2048.txt in every sim.""" + result = [] + for tile in tile_ids: + if all( + os.path.isfile(f"{grids}/{sim}/logs/log_job_{tile}_{JOB_LAST}.txt") + for sim in sims + ): + result.append(tile) + return result + + +def setup_sim_dir(mon_sim_dir, real_sim_dir, verbose): + """Prepare monitoring sim dir with symlinks and copied params.py.""" + os.makedirs(mon_sim_dir, exist_ok=True) + + for name in ("cfis", "config_mask.yaml"): + link = os.path.join(mon_sim_dir, name) + if not os.path.exists(link): + target = os.path.join(real_sim_dir, name) + if os.path.exists(target): + os.symlink(target, link) + elif verbose: + print(f" Warning: {target} not found, skipping symlink") + + params_src = os.path.join(real_sim_dir, "params.py") + params_dst = os.path.join(mon_sim_dir, "params.py") + if os.path.exists(params_src): + shutil.copy2(params_src, params_dst) + elif verbose: + print(f" Warning: {params_src} not found") + + +def run_spv(script_args, cwd, label, verbose): + """Run a script inside the SPV container; return True on success.""" + cmd = SPV_PREFIX + script_args + if verbose: + print(f" cmd: {' '.join(cmd)}") + print(f" cwd: {cwd}") + result = subprocess.run( + cmd, cwd=cwd, + capture_output=(not verbose), + text=True, + ) + if result.returncode != 0: + print(f" {label} FAILED (exit {result.returncode})") + if not verbose and result.stderr: + print(result.stderr[-2000:]) + return False + return True + + +def run_merge(mon_sim_dir, grids, sim, verbose): + return run_spv( + [ + f"{PATH_PY}/create_final_cat.py", + "-I", + "-m", f"final_cat_{sim}.hdf5", + "-i", grids, + "-P", sim, + "-p", f"{grids}/{sim}/cfis/final_cat.param", + "-o", "n_tiles_monitor.txt", + ] + (["-v"] if verbose else []), + cwd=mon_sim_dir, + label="merge", + verbose=verbose, + ) + + +def run_extract(mon_sim_dir, verbose): + return run_spv( + [f"{PATH_NB}/extract_info.py"], + cwd=mon_sim_dir, + label="extract", + verbose=verbose, + ) + + +def run_calibrate(mon_sim_dir, verbose): + return run_spv( + [f"{PATH_NB}/calibrate_comprehensive_cat.py", "-s", "calibrate"], + cwd=mon_sim_dir, + label="calibrate", + verbose=verbose, + ) + + +def run_mbias(mon_dir, num, verbose): + cfg_path = os.path.join(mon_dir, "image_sims_m_bias_monitor.yaml") + out_path = os.path.join(mon_dir, "m_bias_monitor.yaml") + mbias_cfg = { + "grids_dir": mon_dir, + "num": num, + "catalog_name": "shape_catalog_cut_ngmix.fits", + "shear_amplitude": 0.02, + "match_radius_deg": 0.0002, + "w_col": "w_des", + "n_bootstrap": 500, + "output_path": out_path, + } + with open(cfg_path, "w") as f: + yaml.dump(mbias_cfg, f, default_flow_style=False) + + ok = run_spv( + [f"{PATH_SPV}/scripts/compute_m_bias_image_sims.py", "-c", cfg_path] + + (["-v"] if verbose else []), + cwd=mon_dir, + label="mbias", + verbose=verbose, + ) + return ok, out_path + + +def n_tiles_from_file(path): + """Read tile count written by create_final_cat.py -o.""" + try: + with open(path) as f: + return int(f.read().strip()) + except Exception: + return None + + +def main(): + args = parse_args() + cfg = load_config(args.config) + + base = cfg["base"] + num = cfg["num"] + tile_ids = cfg.get("tile_IDs", [cfg.get("tile_ID", "254.286")]) + grids = f"{base}/{cfg['type']}s" + sims = sims_from_config(cfg) + mon_dir = os.path.join(grids, "monitoring") + + # -- Detect done tiles ----------------------------------------------- + done = done_tiles(grids, sims, tile_ids) + n_done = len(done) + n_total = len(tile_ids) + + print(f"Tiles finished in all sims: {n_done} / {n_total}") + if done: + print(f" {done}") + + if n_done == 0: + print("No tiles complete yet. Nothing to compute.") + return 0 + + # -- Set up monitoring directory ------------------------------------ + print(f"\nMonitoring dir: {mon_dir}") + os.makedirs(mon_dir, exist_ok=True) + + # -- Per-sim: merge → extract → calibrate -------------------------- + for sim in sims: + real_sim_dir = os.path.join(grids, sim) + mon_sim_dir = os.path.join(mon_dir, sim) + print(f"\n{sim}") + + setup_sim_dir(mon_sim_dir, real_sim_dir, args.verbose) + + print(" merge ...", end="", flush=True) + if not run_merge(mon_sim_dir, grids, sim, args.verbose): + return 1 + n_found = n_tiles_from_file(os.path.join(mon_sim_dir, "n_tiles_monitor.txt")) + print(f" ok ({n_found} tiles in HDF5)") + + print(" extract ...", end="", flush=True) + if not run_extract(mon_sim_dir, args.verbose): + return 1 + print(" ok") + + print(" calibrate...", end="", flush=True) + if not run_calibrate(mon_sim_dir, args.verbose): + return 1 + print(" ok") + + # -- m-bias -------------------------------------------------------- + print("\nComputing m-bias...") + ok, results_path = run_mbias(mon_dir, num, args.verbose) + if not ok: + return 1 + + if os.path.isfile(results_path): + with open(results_path) as f: + res = yaml.safe_load(f) + print() + print(f" n_tiles = {n_done} / {n_total}") + print(f" m1 = {res['m1']:+.4f} ± {res['m1_err']:.4f}") + print(f" c1 = {res['c1']:+.4f} ± {res['c1_err']:.4f}") + print(f" m2 = {res['m2']:+.4f} ± {res['m2_err']:.4f}") + print(f" c2 = {res['c2']:+.4f} ± {res['c2_err']:.4f}") + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/sh/apptainer_noslurm.sh b/scripts/sh/apptainer_noslurm.sh new file mode 100755 index 000000000..4d28dfb64 --- /dev/null +++ b/scripts/sh/apptainer_noslurm.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# Run apptainer exec after stripping all SLURM/PMI/PMIX/OMPI environment +# variables that cause OpenMPI to crash when running inside a SLURM job. +# Usage: apptainer_noslurm.sh [apptainer_args...] -- [cmd_args...] + +for v in $(env | grep -Eo "^(SLURM|PMI|PMIX|OMPI)[^=]*"); do + unset "$v" +done + +exec apptainer exec "$@" From 2a6bb94706f8819f544af796d68104e4828d6f97 Mon Sep 17 00:00:00 2001 From: martinkilbinger Date: Sat, 27 Jun 2026 09:04:46 +0200 Subject: [PATCH 20/24] fixed snakefile --- scripts/image_sims_pipeline/Snakefile | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/scripts/image_sims_pipeline/Snakefile b/scripts/image_sims_pipeline/Snakefile index 94ab455a3..a9bc7657d 100644 --- a/scripts/image_sims_pipeline/Snakefile +++ b/scripts/image_sims_pipeline/Snakefile @@ -52,7 +52,6 @@ SPV_CMD = ( f" {SPV_SIF}" ) MBIAS_OUT = f"{GRIDS}/results/m_bias_results.yaml" -DIAG_OUT = f"{GRIDS}/results/footprint.png" # ---------- targets ---------- rule all: @@ -167,15 +166,6 @@ rule calibrate: f"cd {GRIDS}/{{wildcards.sim}} &&" f" {SPV_CMD} python {PATH_SPV}/notebooks/calibrate_comprehensive_cat.py -s calibrate" -rule diagnostics: - input: - expand(f"{GRIDS}/{{sim}}/shape_catalog_cut_ngmix.fits", sim=SIMS) - output: - DIAG_OUT - shell: - f"{SPV_CMD} python {PATH_SPV}/scripts/diagnostics_image_sims.py" - f" -c {MBIAS_CFG} -v" - rule mbias: input: expand(f"{GRIDS}/{{sim}}/shape_catalog_cut_ngmix.fits", sim=SIMS) @@ -191,3 +181,19 @@ rule mbias: f"yaml.dump(cfg, open('/tmp/mbias_cfg.yaml', 'w'))\" && " f"{SPV_CMD} python {PATH_SPV}/scripts/compute_m_bias_image_sims.py" f" -c /tmp/mbias_cfg.yaml -v --cumulative" + +rule diagnostics: + input: + expand(f"{GRIDS}/{{sim}}/shape_catalog_cut_ngmix.fits", sim=SIMS) + output: + f"{GRIDS}/results/footprint.png", + shell: + f"mkdir -p {GRIDS}/results && " + f"python -c \"import yaml; " + f"cfg = {{'grids_dir': '{GRIDS}', 'num': {NUM}, 'catalog_name': 'shape_catalog_cut_ngmix.fits', " + f"'shear_amplitude': 0.02, 'match_radius_deg': 0.0002, 'w_col': 'w_des', 'n_bootstrap': 500, " + f"'output_path': '{GRIDS}/results/m_bias_results.yaml', 'results_dir': '{GRIDS}/results', " + f"'diagnostics_dir': '{GRIDS}/results'}}; " + f"yaml.dump(cfg, open('/tmp/diag_cfg.yaml', 'w'))\" && " + f"{SPV_CMD} python {PATH_SPV}/scripts/diagnostics_image_sims.py" + f" -c /tmp/diag_cfg.yaml -v" From 4f7635d18ad6cc1c32f39ea8be80f3b034b51488 Mon Sep 17 00:00:00 2001 From: martinkilbinger Date: Sun, 28 Jun 2026 14:15:47 +0200 Subject: [PATCH 21/24] test for zero size FITS output files --- scripts/image_sims_pipeline/README.md | 80 ----------------- scripts/image_sims_pipeline/Snakefile | 87 +++++++++++++++---- .../image_sims_pipeline/config.yaml.template | 9 +- scripts/image_sims_pipeline/info.py | 47 ++++++---- scripts/python/create_final_cat.py | 4 + scripts/sh/init_run_v2.0.sh | 52 ++++++++--- scripts/sh/run_job_sp_canfar_v2.0.bash | 2 +- .../vignetmaker_package/vignetmaker.py | 79 +++++++++++++---- 8 files changed, 217 insertions(+), 143 deletions(-) delete mode 100644 scripts/image_sims_pipeline/README.md diff --git a/scripts/image_sims_pipeline/README.md b/scripts/image_sims_pipeline/README.md deleted file mode 100644 index 1eb1ff2ae..000000000 --- a/scripts/image_sims_pipeline/README.md +++ /dev/null @@ -1,80 +0,0 @@ -# Image Simulations Calibration Pipeline - -Snakemake workflow for deriving shear m-bias from image simulations. - -## Setup - -### 1. Create run directory and copy config - -```bash -mkdir -p /path/to/run/image_sims -cd /path/to/run/image_sims - -# Copy template config and edit for your run -cp ~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/config.yaml.template \ - config.yaml - -# Edit config.yaml with your tile IDs, grid number, etc. -``` - -### 2. Run Snakemake from the run directory - -```bash -cd /path/to/run/image_sims - -# Full pipeline -snakemake -s ~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/Snakefile \ - --configfile config.yaml -j 5 calibrate_all - -# M-bias computation -snakemake -s ~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/Snakefile \ - --configfile config.yaml -j 1 mbias -``` - -## Scripts - -**Snakefile** — Main pipeline definition -**info.py** — Monitor pipeline progress with status table -**monitor_mbias.py** — Incrementally compute m-bias as tiles finish -**config.yaml.template** — Configuration template (copy and edit for each run) - -## Usage Examples - -### Monitor progress (from run directory) - -```bash -cd /path/to/run/image_sims -~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/info.py -m -v -``` - -### Incremental m-bias computation - -```bash -cd /path/to/run/image_sims -~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/monitor_mbias.py -v -``` - -Run this repeatedly while `pipeline_all` is in progress to watch m-bias converge. - -## Output Structure - -``` -/path/to/run/image_sims/ -├── config.yaml -├── .snakemake/ -├── logs/ # Snakemake logs -├── grids/ -│ ├── 1m2z_grid_2/ -│ ├── ... -│ └── results/ -│ ├── m_bias_results.yaml -│ ├── m_bias_results.txt -│ ├── mbias_cumulative.yaml -│ ├── mbias_convergence.png -│ └── mbias_errors.png -└── monitoring/ # Temp workspace for incremental m-bias -``` - -## Documentation - -Full documentation: `~/astro/repositories/github/shapepipe/docs/source/image_sims_calibration.md` diff --git a/scripts/image_sims_pipeline/Snakefile b/scripts/image_sims_pipeline/Snakefile index a9bc7657d..8c60d6f55 100644 --- a/scripts/image_sims_pipeline/Snakefile +++ b/scripts/image_sims_pipeline/Snakefile @@ -11,7 +11,17 @@ import os configfile: "config.yaml" BASE = config["base"] -TILE_IDs = config.get("tile_IDs", [config.get("tile_ID", "254.286")]) + +# Load tile IDs from file (always a file path) +_tile_ids_file = config.get("tile_IDs", "tile_numbers.txt") +with open(_tile_ids_file) as f: + TILE_IDs = [line.strip() for line in f if line.strip()] + +# Apply exclusive tile list if provided (for testing/limiting tiles) +_exclusive = config.get("tile_IDs_exclusive") +if _exclusive and isinstance(_exclusive, list) and _exclusive: # Non-empty list + TILE_IDs = _exclusive + SAMPLE = str(config["sample"]) NUM = config["num"] TYPE = config["type"] @@ -19,6 +29,10 @@ FORCE = "--force" if config.get("force", 0) else "" N_SMP = config.get("n_smp", -1) JOB_SEQUENCE = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048] +# Filter to single job if specified via --config "job=N" +if config.get("job"): + target_job = int(config["job"]) + JOB_SEQUENCE = [j for j in JOB_SEQUENCE if j == target_job] JOB = sum(JOB_SEQUENCE) PSF = "psfex" @@ -53,6 +67,23 @@ SPV_CMD = ( ) MBIAS_OUT = f"{GRIDS}/results/m_bias_results.yaml" +# ---------- helper functions ---------- +def validate_output_files(output_files): + """Validate that output files exist and are non-empty. + + Raises an error if any output file is missing or zero-sized, + preventing empty/corrupt files from being treated as valid outputs. + """ + if isinstance(output_files, str): + output_files = [output_files] + + for f in output_files: + if not os.path.isfile(f): + raise FileNotFoundError(f"Output file missing: {f}") + size = os.path.getsize(f) + if size == 0: + raise ValueError(f"Output file is empty (0 bytes): {f}") + # ---------- targets ---------- rule all: input: @@ -67,6 +98,17 @@ rule all: # 6. mbias # 7. diagnostics +rule generate_tile_list: + """Generate tile_numbers.txt from config's tile_IDs.""" + output: + f"{GRIDS}/tile_numbers.txt" + run: + os.makedirs(f"{GRIDS}", exist_ok=True) + with open(output[0], "w") as f: + for tile in TILE_IDs: + f.write(f"{tile}\n") + print(f"Generated {output[0]} with {len(TILE_IDs)} tiles") + rule init_all: input: expand(f"{GRIDS}/{{sim}}/params.py", sim=SIMS) @@ -92,12 +134,15 @@ rule calibrate_all: # ---------- rules ---------- rule init: + input: + tiles = f"{GRIDS}/tile_numbers.txt" output: params = f"{GRIDS}/{{sim}}/params.py", mask = f"{GRIDS}/{{sim}}/config_mask.yaml", shell: f"{APP_CMD} {PATH_SH}/init_run_v2.0.sh" f" -t image_sims -d {GRIDS} -s {{wildcards.sim}} -S {SAMPLE}" + f" -T {input.tiles}" def _shapepipe_job_input(wildcards): bit = int(wildcards.bit) @@ -129,7 +174,9 @@ rule shapepipe_job: shell: f"cd {GRIDS}/{{wildcards.sim}} &&" f" {APP_CMD} {PATH_SH}/run_job_sp_canfar_v2.0.bash" - f" {FORCE} -N {N_SMP} -e {{wildcards.tile}} -t image_sims -j {{wildcards.bit}}" + f" {FORCE} -N {N_SMP} -e {{wildcards.tile}} -t image_sims -j {{wildcards.bit}} &&" + f" python3 -c \"import os; f='{output}'; " + f"os.path.getsize(f) > 0 or exit(1)\"" rule merge: input: @@ -173,12 +220,17 @@ rule mbias: MBIAS_OUT, f"{GRIDS}/results/mbias_cumulative.yaml", shell: - f"mkdir -p {GRIDS}/results && " - f"python -c \"import yaml; " - f"cfg = {{'grids_dir': '{GRIDS}', 'num': {NUM}, 'catalog_name': 'shape_catalog_cut_ngmix.fits', " - f"'shear_amplitude': 0.02, 'match_radius_deg': 0.0002, 'w_col': 'w_des', 'n_bootstrap': 500, " - f"'output_path': '{GRIDS}/results/m_bias_results.yaml', 'results_dir': '{GRIDS}/results'}}; " - f"yaml.dump(cfg, open('/tmp/mbias_cfg.yaml', 'w'))\" && " + f"mkdir -p {GRIDS}/results && cat > /tmp/mbias_cfg.yaml << 'EOF'\n" + f"grids_dir: {GRIDS}\n" + f"num: {NUM}\n" + f"catalog_name: shape_catalog_cut_ngmix.fits\n" + f"shear_amplitude: 0.02\n" + f"match_radius_deg: 0.0002\n" + f"w_col: w_des\n" + f"n_bootstrap: 500\n" + f"output_path: {GRIDS}/results/m_bias_results.yaml\n" + f"results_dir: {GRIDS}/results\n" + f"EOF\n" f"{SPV_CMD} python {PATH_SPV}/scripts/compute_m_bias_image_sims.py" f" -c /tmp/mbias_cfg.yaml -v --cumulative" @@ -188,12 +240,17 @@ rule diagnostics: output: f"{GRIDS}/results/footprint.png", shell: - f"mkdir -p {GRIDS}/results && " - f"python -c \"import yaml; " - f"cfg = {{'grids_dir': '{GRIDS}', 'num': {NUM}, 'catalog_name': 'shape_catalog_cut_ngmix.fits', " - f"'shear_amplitude': 0.02, 'match_radius_deg': 0.0002, 'w_col': 'w_des', 'n_bootstrap': 500, " - f"'output_path': '{GRIDS}/results/m_bias_results.yaml', 'results_dir': '{GRIDS}/results', " - f"'diagnostics_dir': '{GRIDS}/results'}}; " - f"yaml.dump(cfg, open('/tmp/diag_cfg.yaml', 'w'))\" && " + f"mkdir -p {GRIDS}/results && cat > /tmp/diag_cfg.yaml << 'EOF'\n" + f"grids_dir: {GRIDS}\n" + f"num: {NUM}\n" + f"catalog_name: shape_catalog_cut_ngmix.fits\n" + f"shear_amplitude: 0.02\n" + f"match_radius_deg: 0.0002\n" + f"w_col: w_des\n" + f"n_bootstrap: 500\n" + f"output_path: {GRIDS}/results/m_bias_results.yaml\n" + f"results_dir: {GRIDS}/results\n" + f"diagnostics_dir: {GRIDS}/results\n" + f"EOF\n" f"{SPV_CMD} python {PATH_SPV}/scripts/diagnostics_image_sims.py" f" -c /tmp/diag_cfg.yaml -v" diff --git a/scripts/image_sims_pipeline/config.yaml.template b/scripts/image_sims_pipeline/config.yaml.template index ed31223e9..8faee1e92 100644 --- a/scripts/image_sims_pipeline/config.yaml.template +++ b/scripts/image_sims_pipeline/config.yaml.template @@ -3,8 +3,13 @@ ## Run directory base: /n17data/mkilbing/astro/Runs/shapepipe/CFIS/v2.0/image_sims -## Tile IDs -tile_IDs: ["254.286", "254.287", "254.288", "254.290"] +## Tile IDs: path to file containing tile IDs (one per line) +tile_IDs: /home/mkilbing/shapepipe/auxdir/CFIS/im_sims_202606/tile_numbers_test.txt + +## Optional: exclusive list of tiles to process (for testing) +## If provided, only these tiles will be used instead of all tiles from tile_IDs +## Leave empty/commented out to use all tiles from tile_IDs +# tile_IDs_exclusive: ["251.288", "251.289", "252.287"] ## catalogue subversion sample: 9 diff --git a/scripts/image_sims_pipeline/info.py b/scripts/image_sims_pipeline/info.py index 69d9573ca..33e1f5b4d 100644 --- a/scripts/image_sims_pipeline/info.py +++ b/scripts/image_sims_pipeline/info.py @@ -18,6 +18,20 @@ def load_config(): return yaml.safe_load(f) +def load_tile_ids(cfg): + """Load tile IDs from config, handling both lists and file paths.""" + tile_ids = cfg.get("tile_IDs", [cfg.get("tile_ID", ["?"])]) + + if isinstance(tile_ids, str): + if os.path.isfile(tile_ids): + with open(tile_ids) as f: + tile_ids = [line.strip() for line in f if line.strip()] + else: + tile_ids = [tile_ids] + + return tile_ids if isinstance(tile_ids, list) else [tile_ids] + + def get_hdf5_tile_count(hdf5_path): """Query tile count from HDF5 file using create_final_cat.py -l. @@ -77,7 +91,7 @@ def monitor(cfg, verbose=0): base = cfg["base"] num = cfg["num"] sim_type = cfg["type"] - tile_ids = cfg.get("tile_IDs", [cfg.get("tile_ID", "?")]) + tile_ids = load_tile_ids(cfg) n_tiles = len(tile_ids) job_seq = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048] job_hex = [format(int(math.log2(j)), 'X') for j in job_seq] @@ -91,15 +105,17 @@ def monitor(cfg, verbose=0): col_w = max(len(s) for s in sims) + 2 downstream = [ - ("merge", "final_cat_{sim}.hdf5"), - ("extract", "shape_catalog_comprehensive_ngmix.hdf5"), - ("calibrate","shape_catalog_cut_ngmix.fits"), + ("merge", "final_cat_{sim}.hdf5"), + ("extr", "shape_catalog_comprehensive_ngmix.hdf5"), + ("calib", "shape_catalog_cut_ngmix.fits"), + ("diag", "results/diagnostics.log"), + ("bias", "results/m_bias_results.yaml"), ] ds_w = max(len(s) for s, _ in downstream) tile_label = f"({n_tiles} tile{'s' if n_tiles > 1 else ''})" if verbose >= 1: - mid_head = " ".join(job_hex) + mid_head = " ".join(f"{h:>2}" for h in job_hex) else: mid_head = "".join(job_hex) bits_w = len(mid_head) @@ -131,18 +147,18 @@ def monitor(cfg, verbose=0): sim_path = os.path.join(grids, sim) if name == "merge": ds_status.append(get_hdf5_tile_count(path)) - elif name == "extract": + elif name == "extr": ds_status.append(get_extract_tile_count(sim_path)) else: ds_status.append("o" if os.path.isfile(path) else "") if verbose >= 1: - counts_str = " ".join(f"{tile_counts[j]}" for j in job_seq) # aligns with header 0 1 2 ... + counts_str = " ".join(f"{tile_counts[j]:>2}" for j in job_seq) row = f" {sim:<{col_w}} {jobs_sum:>12} {counts_str} " + \ - " ".join(f"{s:<{ds_w}}" for s in ds_status) + " ".join(f"{s:>{ds_w}}" for s in ds_status) else: row = f" {sim:<{col_w}} {jobs_sum:>12} {bits_str} " + \ - " ".join(f"{s:<{ds_w}}" for s in ds_status) + " ".join(f"{s:>{ds_w}}" for s in ds_status) print(row) @@ -158,7 +174,7 @@ def main(): return base = cfg["base"] - tile_ids = cfg.get("tile_IDs", [cfg.get("tile_ID", "?")]) + tile_ids = load_tile_ids(cfg) sample = cfg["sample"] num = cfg["num"] sim_type = cfg["type"] @@ -206,11 +222,12 @@ def main(): print("# Useful options") print() - print(f" -p print shell commands") - print(f" --config 'force=1' pass --force to run_job_sp_canfar_v2.0.bash") - print(f" --forcerun pipeline_all force Snakemake to rerun all pipeline jobs") - print(f" --config 'job=J' target a specific job bit J") - print(f" --rerun-incomplete if errors occur due to previous interrupted run") + print(f" -p print shell commands") + print(f" --config 'force=1' pass --force to run_job_sp_canfar_v2.0.bash") + print(f" --forcerun pipeline_all force Snakemake to rerun all pipeline jobs") + print(f" --config 'job=J' target a specific job bit J") + print(f" --rerun-incomplete if errors occur due to previous interrupted run") + print(f" pipeline --config 'job=J' --rerun-incomplete check and fix runs") print() print("# Monitor progress") diff --git a/scripts/python/create_final_cat.py b/scripts/python/create_final_cat.py index 8219e7d13..75e3a67c0 100755 --- a/scripts/python/create_final_cat.py +++ b/scripts/python/create_final_cat.py @@ -259,6 +259,10 @@ def print_list(params): with open(params["output_summary"], "w") as f_out: print(n_tiles, file=f_out) + # Write n_tiles to HDF5 file header + with h5py.File(params["merged_cat_path"], "a") as hdf5_file: + hdf5_file.attrs["n_tiles"] = n_tiles + def get_patch_group(hdf5_file, patch, verbose=False): """Get Patch group. diff --git a/scripts/sh/init_run_v2.0.sh b/scripts/sh/init_run_v2.0.sh index 293a78515..c349d0178 100755 --- a/scripts/sh/init_run_v2.0.sh +++ b/scripts/sh/init_run_v2.0.sh @@ -36,11 +36,15 @@ usage="Usage: $(basename "$0") [OPTIONS] -s, --subdir SUBDIR subdir for image simulations, default='$subdir'\n -d, --dir DIR\tbase run directory, default='$base_dir'\n -P, --params PATH\tsource params.py to copy, default='$params_src'\n + -T, --tiles PATH\ttile numbers file (image_sims only), overrides default\n + -c, --config CONFIG config file for input directories (image_sims only)\n --force\t\trecreate existing symlinks and parameter files\n -S, --sample SAMPLE\tsample version for mask config (e.g. 6)\n " ## Parse command line +tiles_src_override="" +config_file="" while [ $# -gt 0 ]; do case "$1" in -h) @@ -63,6 +67,14 @@ while [ $# -gt 0 ]; do params_src="$2" shift ;; + -T|--tiles) + tiles_src_override="$2" + shift + ;; + -c|--config) + config_file="$2" + shift + ;; --force) force=1 ;; @@ -91,11 +103,23 @@ if [ "$type" == "data" ]; then elif [ "$type" == "image_sims" ]; then config_dir="$sp_root/example/cfis_image_sims" - tiles_src="$sp_root/auxdir/CFIS/im_sims_202606/numbers.txt" + # Use override if provided, otherwise use default + if [ -n "$tiles_src_override" ]; then + tiles_src="$tiles_src_override" + else + tiles_src="$sp_root/auxdir/CFIS/im_sims_202606/numbers.txt" + fi - input_dir_base="/n09data/hervas/skills_out" - input_dir_tiles="$input_dir_base/$subdir/images/SP_tiles" - input_dir_exp="$input_dir_base/$subdir/images/SP_exp" + # Read input directories from config file if provided + input_dir_tiles="" + input_dir_exp="" + if [ -n "$config_file" ]; then + if [ ! -f "$config_file" ]; then + echo "ERROR: Config file not found: $config_file" + exit 2 + fi + source "$config_file" + fi else @@ -113,11 +137,13 @@ mkdir -p "$base_dir/$subdir" cd "$base_dir/$subdir" if [ "$type" == "image_sims" ]; then - for link in input_tiles input_exp; do - src=$([ "$link" == "input_tiles" ] && echo "$input_dir_tiles" || echo "$input_dir_exp") - [ $force -eq 1 ] && rm -f "$link" - [ ! -e "$link" ] && ln -s "$src" "$link" - done + if [ -n "$input_dir_tiles" ] && [ -n "$input_dir_exp" ]; then + for link in input_tiles input_exp; do + src=$([ "$link" == "input_tiles" ] && echo "$input_dir_tiles" || echo "$input_dir_exp") + [ $force -eq 1 ] && rm -f "$link" + [ ! -e "$link" ] && ln -s "$src" "$link" + done + fi if [ -n "$sample" ]; then mask_src="$HOME/astro/repositories/github/sp_validation/config/calibration/mask_v1.X.${sample}_im_sim.yaml" @@ -207,6 +233,10 @@ echo " ├── cfis -> ${config_dir}" echo " ├── tile_numbers.txt -> ${tiles_src}" echo " └── params.py <- ${params_src}" if [ "$type" == "image_sims" ]; then - echo " ___ input_dir_tiles -> $input_dir_tiles" - echo " ___ input_dir_exp -> $input_dir_exp" + if [ -n "$input_dir_tiles" ] && [ -n "$input_dir_exp" ]; then + echo " ├── input_tiles -> $input_dir_tiles" + echo " └── input_exp -> $input_dir_exp" + else + echo " (no input symlinks: config file not provided)" + fi fi diff --git a/scripts/sh/run_job_sp_canfar_v2.0.bash b/scripts/sh/run_job_sp_canfar_v2.0.bash index b555077c4..da464ff23 100755 --- a/scripts/sh/run_job_sp_canfar_v2.0.bash +++ b/scripts/sh/run_job_sp_canfar_v2.0.bash @@ -822,7 +822,7 @@ if [[ $do_job != 0 ]]; then run_tile_job 64 "fpsf" "fake_psf_runner:1" run_tile_job 512 "ViVi ViVi" "vignetmaker_runner_run_1:1 vignetmaker_runner_run_2:3" if [ "$run_and_check" == "1" ]; then - check_only=1; local saved_force_512im=$force; force=0 + check_only=1; saved_force_512im=$force; force=0 out1=$(run_tile_job 64 "fpsf" "fake_psf_runner:1" 2>&1) out2=$(run_tile_job 512 "ViVi ViVi" "vignetmaker_runner_run_1:1 vignetmaker_runner_run_2:3" 2>&1) check_only=0; force=$saved_force_512im diff --git a/src/shapepipe/modules/vignetmaker_package/vignetmaker.py b/src/shapepipe/modules/vignetmaker_package/vignetmaker.py index 74e26915b..2ffe33157 100644 --- a/src/shapepipe/modules/vignetmaker_package/vignetmaker.py +++ b/src/shapepipe/modules/vignetmaker_package/vignetmaker.py @@ -119,14 +119,36 @@ def get_pos(self, pos_params): Array of the positions """ - file = file_io.FITSCatalogue(self._galcat_path, SEx_catalogue=True) - file.open() + try: + file = file_io.FITSCatalogue(self._galcat_path, SEx_catalogue=True) + except Exception as e: + self._w_log.error(f"Error creating FITSCatalogue for {self._galcat_path}: {e}") + raise + + try: + file.open() + except Exception as e: + self._w_log.error(f"Error opening catalogue file {self._galcat_path}: {e}") + raise - pos = np.array( - [file.get_data()[pos_params[1]], file.get_data()[pos_params[0]]] - ).T + try: + data = file.get_data() + if data is None or len(data) == 0: + raise ValueError(f"Catalogue file is empty or corrupt: {self._galcat_path}") - file.close() + self._w_log.info(f"Successfully read catalogue {self._galcat_path} with {len(data)} entries") + + pos = np.array( + [data[pos_params[1]], data[pos_params[0]]] + ).T + except KeyError as e: + self._w_log.error(f"Column not found in {self._galcat_path}: {e}") + raise + except Exception as e: + self._w_log.error(f"Error reading data from catalogue {self._galcat_path}: {e}") + raise + finally: + file.close() return pos @@ -147,25 +169,44 @@ def convert_pos(self, image_path): New positions in pixel coordinates """ - # Get image header - file = file_io.FITSCatalogue(image_path) - file.open() - head = file.get_header(0) - file.close() + try: + file = file_io.FITSCatalogue(image_path) + except Exception as e: + self._w_log.error(f"Error creating FITSCatalogue for image {image_path}: {e}") + raise - # Get WCS transformation matrix - wcs = WCS(head) + try: + file.open() + except Exception as e: + self._w_log.error(f"Error opening image file {image_path}: {e}") + raise - # Create copy of input positions - pos_tmp = np.copy(self._pos) + try: + head = file.get_header(0) + if head is None: + raise ValueError(f"Image file header is empty or corrupt: {image_path}") + self._w_log.info(f"Successfully read header from image {image_path}") + except Exception as e: + self._w_log.error(f"Error reading header from image {image_path}: {e}") + raise + finally: + file.close() - # Exchange x and y + try: + wcs = WCS(head) + except Exception as e: + self._w_log.error(f"Error creating WCS from header of {image_path}: {e}") + raise + + pos_tmp = np.copy(self._pos) pos_tmp[:, [0, 1]] = pos_tmp[:, [1, 0]] - # Transform from world to pixel coordinates - new_pos = wcs.all_world2pix(pos_tmp, 1) + try: + new_pos = wcs.all_world2pix(pos_tmp, 1) + except Exception as e: + self._w_log.error(f"Error converting world to pixel coordinates for {image_path}: {e}") + raise - # Exchange x and y back to original new_pos[:, [0, 1]] = new_pos[:, [1, 0]] return new_pos From 16cb304dc25132bbb37a348135204190804e7b5e Mon Sep 17 00:00:00 2001 From: martinkilbinger Date: Sun, 28 Jun 2026 14:16:25 +0200 Subject: [PATCH 22/24] zero-size output fixes --- .../CFIS/im_sims_202606/tile_numbers_test.txt | 43 +++++++++++++++++++ example/cfis_image_sims/input_dirs.conf | 12 ++++++ 2 files changed, 55 insertions(+) create mode 100644 auxdir/CFIS/im_sims_202606/tile_numbers_test.txt create mode 100644 example/cfis_image_sims/input_dirs.conf diff --git a/auxdir/CFIS/im_sims_202606/tile_numbers_test.txt b/auxdir/CFIS/im_sims_202606/tile_numbers_test.txt new file mode 100644 index 000000000..449bd74d6 --- /dev/null +++ b/auxdir/CFIS/im_sims_202606/tile_numbers_test.txt @@ -0,0 +1,43 @@ +251.288 +251.289 +251.291 +252.287 +252.288 +252.289 +252.291 +253.287 +253.288 +253.291 +254.286 +254.287 +254.288 +255.286 +255.288 +255.290 +256.286 +256.287 +257.285 +257.286 +257.287 +257.288 +258.287 +258.289 +259.285 +259.286 +259.287 +259.288 +259.289 +260.284 +260.285 +260.286 +260.287 +260.288 +260.289 +261.284 +261.285 +261.286 +261.287 +261.288 +262.285 +262.286 +262.287 diff --git a/example/cfis_image_sims/input_dirs.conf b/example/cfis_image_sims/input_dirs.conf new file mode 100644 index 000000000..cd3661983 --- /dev/null +++ b/example/cfis_image_sims/input_dirs.conf @@ -0,0 +1,12 @@ +# Configuration template for init_run_v2.0.sh +# Specifies input directories for image simulations +# Copy and edit this file, then pass with: -c /path/to/config + +# Base directory for input data +input_dir_base="/n09data/hervas/skills_out" + +# Tile images subdirectory +input_dir_tiles="${input_dir_base}/${subdir}/images/SP_tiles" + +# Exposure images subdirectory +input_dir_exp="${input_dir_base}/${subdir}/images/SP_exp" From 28084ca288f656a8805b2d26d5cb5a72acb62d88 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Fri, 3 Jul 2026 21:52:45 +0200 Subject: [PATCH 23/24] retarget(im_sims): revert incidental stale-base edits to shared modules Keep only genuine sims-enablement; revert shared-module changes that were side-effects of the stale ngmix_v2.0 base and collide with in-flight work: - make_cat: restore HSM_G*_PSF grammar (fix/ngmix-psf-columns) + spread_model (714847c2, split-761); drops the input_file_List NameError - vignetmaker: drop broad try/except churn (_w_log AttributeError -> test fail) - mask: restore CDSCLIENT_PATH knob (live in 4 example configs) - restore pipeline_tutorial.md (toc.rst ref -> Sphinx build) + run_scratch_local.sh Verification branch only; not for push. Grounds the #766 review. --- docs/source/pipeline_tutorial.md | 341 ++++++++++++++++++ scripts/sh/run_scratch_local.sh | 178 +++++++++ .../modules/make_cat_package/make_cat.py | 83 ++++- src/shapepipe/modules/make_cat_runner.py | 49 ++- .../modules/mask_package/__init__.py | 2 + src/shapepipe/modules/mask_package/mask.py | 22 +- .../vignetmaker_package/vignetmaker.py | 79 +--- 7 files changed, 677 insertions(+), 77 deletions(-) create mode 100644 docs/source/pipeline_tutorial.md create mode 100755 scripts/sh/run_scratch_local.sh diff --git a/docs/source/pipeline_tutorial.md b/docs/source/pipeline_tutorial.md new file mode 100644 index 000000000..95a0aa94a --- /dev/null +++ b/docs/source/pipeline_tutorial.md @@ -0,0 +1,341 @@ +# ShapePipe Tutorial + +## Quick start + +Run the entire pipeline on a single example CFIS image with tile ID 246.290: +1. [Install](installation.md) `ShapePipe` — the recommended path is the container image, which bundles everything needed to run the pipeline. +3. Run the job script +```bash +job_sp 246.290 -j 127 +``` + +## Introduction + +The `ShapePipe` pipeline processes single-exposure images and stacked images. Input images have to be calibrated beforehand for astrometry and photometry. This tutorial of an entire `ShapePipe` run covers specifically images from CFIS, the Canada-France Imaging Survey. CFIS stacks are so-called tiles, which are the co-adds of on average three exposures in the r-band. + +### File types and names + +The `ShapePipe` pipeline handles different image and file types, some of which +are created by the pipeline during the analysis. These file types are listed below. + +All files follow a (configurable) naming and numbering convention, to facilitate bookkeeping for +tracking relevant image information. In general, the convention is **_.fits**. +`ID` can be a combination of numbers and special characters such as `-`. +Naming and numbering of the input files can closely follow the original image names and (ID) numbers provided by the telescope and pre-processing software, with some required modifications as described below. + +- Single-exposure mosaic image. + Multi-HDU FITS file containing a mosaic from multiple CCDs of a single exposure (an exposure is also called epoch). + Each CCD is stored in a different HDU. + These files are used on input by `ShapePipe`. The pixel data can contain the observed image, a weight map, or a flag map. + These images are typically created by a telescope analysis software (e.g.~`pitcairn`). Examples from CFIS are + `2228303p.fits.fz`, `2214439p.flag.fits.fz`. These names need to be modified to be correctly identified by `ShapePipe`: + The `p` needs to be removed, the image type needs to precede the ID, and the file name can only contain a single dot (`.`) delimiting the file extension. We create the extension `fitsfz` for compressed FITS file. + Default convention: **-.fitsfz**. + Examples: `image-2228303.fitsfz`, `flag-2214439.fitsfz` + +- Single-exposure single-CCD image. + FITS file containing a single CCD from an individual exposure. The pixel data can contain the observed image, a weight + map, or a flag map. + Default convention: **--.fits** + Examples: `image-2079614-9.fits`, `weight-2079614-3.fits` + +- Stacked images + FITS file containing a stack by co-adding different single exposures, created by software such as `swarp`. + A stacked image is also called *tile*. These files are used on input by `ShapePipe`. + The pixel data can contain the observed image, a weight map, or a flag map. Tile images and weights are created in the + case of CFIS by Stephen Gwyn using a combination of `swarp` and his own software. Examples of file names are + `CFIS.316.246.r.fits`, `CFIS.205.267.r.weight.fits.fz`, the latter is a compressed FITS file, see below. Tile flag files + are created the mask module of `ShapePipe` (see [Mask images](#mask-images)). The tile ID needs to be modified such that the `.` between the two tile numbers (RA and DEC indicator) is not mistaken for a file extension delimiter. For the same reason, the extension `.fits.fz` is changed to `.fitzfz`. In addition, for + clarity, we include the string `image` for a tile image type. + Default convention: **-.fits** + Examples: `CFIS_image-277-282.fits`, `CFIS_weight-274-282.fitsfz`, `pipeline_flag-239-293.fits` + +- Database catalogue files + For very large files that combine information from multiple tiles or single exposures, `ShapePipe` creates `sqlite` + data base catalogues. + Examples: `log_exp_headers.sqlite`, exposure header information + +- Numpy array binary files + Some large files are stored as numpy arrays. These contain FITS header information. + Example: `headers-2366993.npy` + +- PSF files + `PSFEx` and `SExtractor` produce FITS files with file exentions other than `.fits`: `.psf` for files containing PSF + model information for a single CCD, and `.cat` for a PSF catalogue. + +- _final_ shape catalogue + The end product of `ShapePipe` is a _final_ catalogue containing a large number of information for each galaxy, including its + shape parameters, the ellipticity components :math:`e_1` and :math:`e_2`. This catalogue also contains shapes of artificially + sheared images. This information is used in post-processing to compute calibrated shear estimates via metacalibration. + +- Summary statistic files + The `SETools` module that creates samples of objects according to some user-defined selection criteria (see [Select stars](#select-stars)) also outputs ASCII + files with user-defined summary statistics for each CCD, for example the number of selected stars, or mean and standard deviation of their FWHM. + Example: `star_stat-2366993-18.txt` + +- Tile ID list + ASCII file with a tile number on each line. Used for the `get_image_runner` module to download CFIS images (see [Download tiles](#download-tiles)). + +- Single-exposure name list + ASCII file with a single-exposure name on each line. Produced by the `find_exposure_runner` module to identify single exposures that were used to create + a given tile. See [Find exposures](#find-exposures)). + +- Plots + The `SETools` module can also produce plots of the objects properties that were selected for a given CCD. + The type of plot (histogram, scatter plot, ...) and quantities to plot as well as plot decorations can be specified in the + selection criteria config file (see [Select stars](#select-stars)). + Example: `hist_mag_stars-2104133-5.png` + +- Log files + The pipeline core and all called modules write ASCII log files to disk. + Examples: `process-2366993-6.log`, `log_sp_exp.log`. + +### CFIS processing + +`ShapePipe` splits the processing of CFIS images into several parts: +These are the retrieval and preparation of input images, processing of single exposures, +processing of tile images, creation and upload (optional) of _final_ shape catalogues. + +The following flowchart visualised the processing parts and steps. + +![ShapePipe_FlowChart](img/ShapePipe_v0.0.1.png) + +Below, the individual processing steps are described in detail. + +### Input and output paths + +All required paths are automatically set in the job script `job_sp`. + +If an example config file is run outside this script, +the following path variables might need to be defined. +- `$SP_RUN`: Run directory of `ShapePipe`. In general this is just `pwd`, and can be set via + ```bash + export SP_RUN=`pwd` + ``` + but on a cluster this directory might be different. +- `$SP_CONFIG`: Path to configuration files. In our example this is `$SP_BASE/example/cfis`. + +In addition, the output path `$SP_RUN/output` needs to be created by the user before running `ShapePipe`. + +### Job and pipeline scripts + +The job script to run the pipeline in its entity or in parts is `job_sp[.bash]`. Type +```bash +job_sp -h +``` +for all options. + +This script creates the subdirectory `$SP_RUN/output` to store all pipeline outputs +(log files, diagnostics, statistics, output images, catalogues, single-exposure headers with WCS information). + +Optionally, the subdir `output_star_cat` is created by the used to store the external star catalogues for masking. This is only necessary if the pipeline is run on a cluster without internet connection to access star catalogues. In that case, the star catalogues need to be retrieved outside the pipeline, for example on a login node, and copied to `output_star_cat`. + +The job script automaticall performs a number of subsequent calls to the `ShapePipe` executable `shapepipe_run`, as +```bash +shapepipe_run -c $SP_CONFIG/.ini +``` +The config file `.ini` contains the configuration for one or more modules. +See the main `ShapePipe` readme for more details. + +The user specifies which steps are run with the command line option `-j JOB`. The integer value `JOB` +is bit-coded such that arbitrary combinations of steps can be run with a single call to `job_sp`. For +example, to run steps #1 and #2, type `job_sp -j 3`. + +### Select tiles + +To run the job script, one or more CFIS tiles need to be chosen. If the tile IDs are known, they are provided to `job_sp` on the command line. + +If the tile IDs are not known a priori, they can be selected via sky coordinates, with the script `cfis_field_select`. +For example, to find the tile number for a Planck cluster at R.A.=213.68 deg, dec=57.79 deg, run: +```bash +cfis_field_select -i /path/to/shapepipe/auxdir/CFIS/tiles_202007/tiles_all_order.txt --coord 213.68deg_54.79deg -t tile --input_format ID_only --out_name_only --out_ID_only -s +``` +The input text file (provide via the flag `-i`) contains a list of CFIS tiles, this can also be directory containing the tile FITS files. + + +The following sections describe the different steps that are performed with `job_sp`. + +## Run the pipeline + +### Retrieve input images + +The command +```bash +job_sp TILE_ID -j 1 +``` +retrieves the image and weight corresponding to TILE_ID using the module `get_images`. +It then identifies the exposures that were used to create the tile image via the `find_exposures` runner. +Finally, another call to `get_images` retrieves the exposure images, weights, and flag files. + +For the retrieval method the user can choose betwen +- download from VOspace (`-r vos`); +- create symbolic link to existing file on disk (`-r symlink`). + +Note that internet access is required for this step if the download method is `vos`. + +An output directory `run_sp_GitFeGie` (in `output`) is created containing the results of `get_images` for tiles (`Git`), +`find_exposures` (`Fe`), and `get_images` for exposures (`Gie`). + +## Prepare input images + +With +```bash +job_sp TILE_ID -j 2 +``` +the compressed tile weight image is uncompressed via the `uncompress_fits` module. Then, the single-exposure images, weight, and flags are split into single-exposure single-CCD file +(one FITS file per CCD) with `split_exp`. +Finally, the headers of all single-exposure single-CCD files are merged into a single `sqlite` file, to store the WCS information of the input exposures. + +Two output directories are created, `run_sp_Uz` for `uncompress_fits`, and `run_sp_exp_SpMh` for the output of the modules +`split_exp` (`Sp`) and `merge_headers` (`Mh`). + +## Mask images + +Run +```bash +job_sp TILE_ID -j 4 +``` +to mask tile and single-exposure single-CCD images. Both tasks are performed by two calls to the `mask` runner. + +Note that internet access is required for this step, since a reference star catalogue is downloaded. + +The output of both masking runs are stored in the output directory `run_sp_MaMa`, with run 1 (2) of +`mask` corresponding to tiles (exposures). + +**Diagnostics:** Open a single-exposure single-CCD image and the corresponding pipeline flag +in `ds9`, and display both frames next to each other. Example +```bash +ds9 image-2113737-10.fits pipeline_flag-2113737-10.fits +``` +Choose `zoom fit` for both frames, click `scale zscale` for the image, and `color aips0` for the flag, to display something like this: + + + +By eye the correspondence between the different flag types and the image can be +seen. Note that the two frames might not match perfectly, since (a) WCS +information is not available in the flag file FITS headers; (b) the image can +have a zero-padded pixel border, which is not accounted for by `ds9`. + +## Detect objects on tiles and process stars on single exposures + +The call +```bash +job_sp TILE_ID -j 8 +``` +performs a number of steps. First, objects on the tiles are deteced with the `sextractor` runner. +Next, the following tasks are run on the single-exposure single-CCD images: +- Objects are deteced with `sextractor`. +- Star candidates are selected via `setools`. +- The PSF model is created, either with `psfex` for PSFex, or + with `mccd_preprocessing` and `mccd_fit_val` for MCCD. +- The PSF model is interpolated to star positions for validation. For the PSFEx model, this is done + via a call to `psfex_interp`. For MCCD, the modules `merge_starcat`, `mccd_plots`, and + `mccd_interp` are called. + +The output directory for both the `mccd` and `psfex` options is `run_sp_tile_Sx_exp_SxSePsf`. +This stores the output of SExtractor on the tiles (`tile_Sx`), on the exposures (`exp_Sx`), +`setools` (`Se`), and the Psf model (`Psf`). + +The following plots show an example of a single CCD, in the center of the focal plane. + +| Size-magnitude plot | Star magnitude histogram | Stars in CCD (mag) | Stars in CCD (size) | +| --- | --- | --- | --- | +| | | | | +| The stellar locus is well-defined | Magnitude distribution looks reasonable | Stars are relatively homogeneously distributed over the CCD | The uniform and small seeing of CFHT is evident | + +To contrast the last plot, here is the case of the CCD in the lower right corner, which shows a known (but yet unexplained) lack of stars +in the lower parts: + + + +The statistics output file for the center CCD #10: +```bash +cat star_stat-2113737-10.txt +# Statistics +Nb objects full cat = 1267 +Nb stars = 160 +stars/deg^2 = 6345.70450519073 +Mean star fwhm selected (arcsec) = 0.7441293125152588 +Standard deviation fwhm star selected (arcsec) = 0.014217643037438393 +Mode fwhm used (arcsec) = 0.7345179691314697 +Min fwhm cut (arcesec) = 0.7159179691314698 +Max fwhm cut (arcsec) = 0.7531179691314697 +``` + +### Global star sample statistics + +The statistics on stars from all CCD can be combined to create histograms, with the non-pipeline script `stats_global.py`. +Run +```bash +stats_global -o stats -v -c $SP_CONFIG/config_stats.ini +``` +to create histograms (as `.txt` tables and `.png` plots) in the directory `stats`. Here are some example plots : + +| Non-masked objects per CCD | Stars per CCD | FWHM mode | +| --- | --- | --- | +| | | | +| No CCD with a very large masked area | No CCD with insufficient stars | Rather broad seeing distribution | + +Note that `stats_global` read all `SETool` output stats files found in a given input directory tree. It can thus produce histogram combining +several runs. + + +## Galaxy selection + +The focus of the next step, +```bash +job_sp TILE_ID -j 16 +``` +is the selection of galaxies as extended objects compared to the PSF. +First, the PSF model is interpolated to galaxy positions, according to the PSF model +with `psfex_interp` or `mccd_interp`. Next, postage stamps around galaxies +of the weights maps are created via `vignetmaker`. Then, the spread model +is computed by the `spread_model` module. Finally, postage stamps +around galaxies of single-exposure data is extracted with another call +to `vignetmaker`. + +The output directory is +- `run_sp_MiViSmVi` if the PSF model is `mccd`; +- `run_sp_tile_PsViSmVi` for the `PSFEx` PSF model. + +This corresponds to the MCCD/PSFex interpolation (`Mi`/`Pi`), `vignetmaker` (`Vi`), `spread_model` (`Sm`), and the +second call to `vignetmaker` (`Vi`). + + +## Shape measurement + +The call +```bash +job_sp TILE_ID -j 32 +``` +computes galaxy shapes using the multi-epoch model-fitting method `ngmix`. At the same time, +shapes of artifically sheared galaxies are obtained for metacalibration. + +Shape measurement is performed in parallel for each tile, the number of processes can be specified +by the user with the option `--nsh_jobs NJOB`. This creates `NJOB` output directories `run_sp_tile_ngmix_Ngu`. +with `X` = 1 ... `NJOB` containing the result of `ngmix`. + + +## Paste catalogues + +The last real processing step is +```bash +job_sp TILE_ID -j 64 +``` +This task first merges the `NJOB` parallel `ngmix` output files from the previous step into +one output file. Then, previously obtained information are pasted into a _final_ shape catalogue via `make_cat`. +Included are galaxy detection and basic measurement parameters, the PSF model at +galaxy positions, the spread-model classification, and the shape measurement. + +Two output directories are created. +The first one is `run_sp_Ms` for the `merge_sep` run. +The second is `run_sp_Mc` for the `make_cat` task; the name is the same for both the `MCCD` and `PSFEx` PSF model. + + +## Upload results + +Optionally, after the pipeline is finished, results can be uploaded to VOspace via +```bash +job_sp TILE_ID -j 128 +``` + diff --git a/scripts/sh/run_scratch_local.sh b/scripts/sh/run_scratch_local.sh new file mode 100755 index 000000000..1223e6683 --- /dev/null +++ b/scripts/sh/run_scratch_local.sh @@ -0,0 +1,178 @@ +#!/bin/bash + +# Command line arguments +## Default values +job=-1 +ID=-1 +N_SMP=1 +dry_run=0 +dir=`pwd` +debug_out=-1 +scratch=/n17data/`whoami`/scratch +exec_path=$HOME/shapepipe/scripts/sh +slurm=1 + +# mh_local is 0 (1) if merge_header_runner is run on all exposures, +# which is standard so far (run on exposures of given tile only; new) +mh_local=0 + +# sp_local is 0 (1) is split_headers_runner and mask_runner is run +# on all exposures (locally). Not 100% automatic yet. +sp_local=1 +VERBOSE=1 + +pat="-- " + +# Help string +usage="Usage: $(basename "$0") -j JOB -e ID -k KIND [OPTIONS] +\n\nOptions:\n + -h\tthis message\n + -j, --job JOB\tRUnning JOB, bit-coded\n + -e, --exclusive ID + \timage ID\n + -p, --psf MODEL\n + \tPSF model, one in ['psfex'|'mccd'], default='$psf'\n + -m, --mh_local MH\n + \tmerged header file local (MH=0) or global (MH=1); default is $mh_local\n + -N, --N_SMP N_SMOp\n + \tnumber of jobs (SMP mode only), default from original config files\n + -d, --directory\n + \trun directory, default is pwd ($dir)\n + -S, --scratch\n + \tprocessing scratch directory, default is $scratch\n + -n, --dry_run LEVEL\n + \tdry run, no actuall processing\n + --debug_out PATH\n + \tdebug output file PATH, default not used\n +" + +## Help if no arguments +if [ -z $1 ]; then + echo -ne $usage + exit 1 +fi + +## Parse command line +while [ $# -gt 0 ]; do + case "$1" in + -h) + echo -ne $usage + exit 0 + ;; + -j|--job) + job="$2" + shift + ;; + -e|--exclusive) + ID="$2" + shift + ;; + -p|--psf) + psf="$2" + shift + ;; + -m|--mh_local) + mh_local="$2" + shift + ;; + -N|--N_SMP) + N_SMP="$2" + shift + ;; + -d|--directory) + dir="$2" + shift + ;; + -S|--scratch) + scratch="$2" + shift + ;; + -n|--dry_run) + dry_run="$2" + shift + ;; + --debug_out) + debug_out="$2" + shift + ;; + esac + shift +done + +## Check options +if [ "$job" == "-1" ]; then + echo "No job indicated, use option -j" + exit 2 +fi + +if [ "$exclusive" == "-1" ]; then + echo "No image ID indicated, use option -e" + exit 3 +fi + +if [ "$psf" != "psfex" ] && [ "$psf" != "mccd" ]; then + echo "PSF (option -p) needs to be 'psfex' or 'mccd'" + exit 4 +fi + + +source $HOME/shapepipe/scripts/sh/functions.sh + + +kind=$(get_kind_from_job $job) + + +# Load common functions +source $HOME/shapepipe/scripts/sh/functions.sh + + +# Start script + +if [ "$scratch" != "-1" ]; then + + command "mkdir -p $scratch/${kind}_runs" $dry_run + command "cp -R ${kind}_runs/$ID $scratch/${kind}_runs" $dry_run + command "cd $scratch" $dry_run + +fi + + if [ "$slurm" == "0" ]; then + command "init_run_exclusive_canfar.sh -j $job -p $psf -m $mh_local -N $N_SMP -e $ID" $dry_run + else + STATUS=$(sbatch --output=./sbatch-$ID.out --partition=comp --job-name="j${job}_${ID}" --ntasks-per-node=$N_SMP --time=32:00:00 --mem=64G $exec_path/init_run_exclusive_canfar.sh -j $job -p $psf -m $mh_local -N $N_SMP -e $ID) + + JOB_ID=$(echo $STATUS | cut -d ' ' -f 4) + echo "JOB_ID=$JOB_ID" + + # Wait for the job to finish + while true; do + STATUS=$(squeue -j "$JOB_ID" -h -o "%T") + if [[ -z "$STATUS" ]]; then + echo "job $JOB_ID no longer in the queue" + break + fi + + echo "Waiting for job $JOB_ID in state '$STATUS' to complete..." + sleep 10 + done + + echo "Job $JOB_ID has completed. Proceeding with the script..." + fi + +if [ "$scratch" != "-1" ]; then + + if [ "$job" == "32" ]; then + command "mv ${kind}_runs/$ID/output/run_sp_exp_SxSe* $dir/${kind}_runs/$ID/output" $dry_run + elif [ "$job" == "64" ]; then + command "mv ${kind}_runs/$ID/output/run_sp_tile_PsViSm* $dir/${kind}_runs/$ID/output" $dry_run + elif [ "$job" == "128" ]; then + command "mv ${kind}_runs/$ID/output/run_sp_tile_ngmix_* $dir/${kind}_runs/$ID/output" $dry_run + fi + + command "rm -rf ${kind}_runs/$ID" $dry_run + command "cd $dir/${kind}_runs/$ID" $dry_run + # Gave Input/Output python error + #command "update_runs_log_file.py" $dry_run + command "cd $dir" $dry_run + +fi diff --git a/src/shapepipe/modules/make_cat_package/make_cat.py b/src/shapepipe/modules/make_cat_package/make_cat.py index e28ac6438..a6b4ab968 100644 --- a/src/shapepipe/modules/make_cat_package/make_cat.py +++ b/src/shapepipe/modules/make_cat_package/make_cat.py @@ -139,6 +139,77 @@ def save_sextractor_data(final_cat_file, sexcat_path, remove_vignet=True): return cat_size +def save_sm_data( + final_cat_file, + sexcat_sm_path, + do_classif=True, + star_thresh=0.003, + gal_thresh=0.01, + n_obj=-1, +): + r"""Save Spread-Model Data. + + Save the spread-model data into the final catalogue. + + Parameters + ---------- + final_cat_file : file_io.FITSCatalogue + Final catalogue + sexcat_sm_path : str + Path to spread-model catalogue to save. If ``None``, spread_model is + set to 99 + do_classif : bool + If ``True`` objects will be classified into stars, galaxies, and other, + using the classifier + :math:`{\rm class} = {\rm sm} + 2 * {\rm sm}_{\rm err}` + star_thresh : float + Threshold for star selection; object is classified as star if + :math:`|{\rm class}| <` ``star_thresh`` + gal_thresh : float + Threshold for galaxy selection; object is classified as galaxy if + :math:`{\rm class} >` ``gal_thresh`` + nobj : int, optional + Number of objects, only used if sexcat_sm_path is ``-1`` + + Returns + ------- + int + Number of objects saved + """ + final_cat_file.open() + + if sexcat_sm_path is not None: + sexcat_sm_file = file_io.FITSCatalogue( + sexcat_sm_path, + SEx_catalogue=True, + ) + sexcat_sm_file.open() + + sm = np.copy(sexcat_sm_file.get_data()["SPREAD_MODEL"]) + sm_err = np.copy(sexcat_sm_file.get_data()["SPREADERR_MODEL"]) + + sexcat_sm_file.close() + + else: + sm = np.ones(n_obj) * 99 + sm_err = np.ones(n_obj) * 99 + + final_cat_file.add_col("SPREAD_MODEL", sm) + final_cat_file.add_col("SPREADERR_MODEL", sm_err) + + if do_classif: + obj_flag = np.ones_like(sm, dtype="int16") * 2 + classif = sm + 2.0 * sm_err + obj_flag[np.where(np.abs(classif) < star_thresh)] = 0 + obj_flag[np.where(classif > gal_thresh)] = 1 + + final_cat_file.add_col("SPREAD_CLASS", obj_flag) + + final_cat_file.close() + + return n_obj + + class SaveCatalogue: """Save Catalogue. @@ -621,13 +692,13 @@ def _save_psf_data(self, galaxy_psf_path): max_epoch = np.max(self._final_cat_file.get_data()["N_EPOCH"]) + 1 self._output_dict = { - f"HSM_E1_PSF_{idx + 1}": np.ones(len(self._obj_id)) * -10.0 + f"HSM_G1_PSF_{idx + 1}": np.ones(len(self._obj_id)) * -10.0 for idx in range(max_epoch) } self._output_dict = { **self._output_dict, **{ - f"HSM_E2_PSF_{idx + 1}": np.ones(len(self._obj_id)) * -10.0 + f"HSM_G2_PSF_{idx + 1}": np.ones(len(self._obj_id)) * -10.0 for idx in range(max_epoch) }, } @@ -661,12 +732,12 @@ def _save_psf_data(self, galaxy_psf_path): continue self._add2dict( - f"HSM_E1_PSF_{epoch + 1}", - gpc_data["SHAPES"]["HSM_E1_PSF"], idx + f"HSM_G1_PSF_{epoch + 1}", + gpc_data["SHAPES"]["HSM_G1_PSF"], idx ) self._add2dict( - f"HSM_E2_PSF_{epoch + 1}", - gpc_data["SHAPES"]["HSM_E2_PSF"], idx + f"HSM_G2_PSF_{epoch + 1}", + gpc_data["SHAPES"]["HSM_G2_PSF"], idx ) # HSM_T_PSF already holds T (sigma_to_T applied at the diff --git a/src/shapepipe/modules/make_cat_runner.py b/src/shapepipe/modules/make_cat_runner.py index 6c2fe0015..fa1fc43ff 100644 --- a/src/shapepipe/modules/make_cat_runner.py +++ b/src/shapepipe/modules/make_cat_runner.py @@ -22,7 +22,7 @@ ], file_pattern=[ "tile_sexcat", - "sexcat", + "sexcat_sm", "galaxy_psf", "ngmix", ], @@ -46,10 +46,30 @@ def make_cat_runner( galaxy_psf_path, shape1_cat_path, ) = input_file_list + sexcat_sm_path = None else: - raise IndexError( - f"Invalid number of input files {len(input_file_List)}, expected 3." - ) + # With spread model input + ( + tile_sexcat_path, + sexcat_sm_path, + galaxy_psf_path, + shape1_cat_path, + ) = input_file_list[0:4] + if len(input_file_list) == 5: + # With second shape catalogue input + shape2_cat_path = input_file_list[4] + + # Fetch classification options + do_classif = config.getboolean( + module_config_sec, + "SM_DO_CLASSIFICATION", + ) + if do_classif: + star_thresh = config.getfloat(module_config_sec, "SM_STAR_THRESH") + gal_thresh = config.getfloat(module_config_sec, "SM_GAL_THRESH") + else: + star_thresh = None + gal_thresh = None # Fetch shape measurement type shape_type_list = config.getlist( @@ -79,6 +99,27 @@ def make_cat_runner( n_obj = make_cat.save_sextractor_data(final_cat_file, tile_sexcat_path) cat_size_sextractor = n_obj + # Save spread-model data + if sexcat_sm_path is None: + w_log.info("No sm cat input, setting spread model to 99") + else: + w_log.info("Save spread-model data") + cat_size_sm = make_cat.save_sm_data( + final_cat_file, + sexcat_sm_path, + do_classif, + star_thresh, + gal_thresh, + n_obj=n_obj + ) + + if cat_size_sextractor != cat_size_sm: + w_log( + f"Warnign: SExtractor catalogue {tile_sexcat_path} has different size" + + f" ({cat_size_sextractor} than spread_model catalogue" + + f" {sexcat_sm_path} ({cat_size_sm})" + ) + # Save shape data sc_inst = make_cat.SaveCatalogue(final_cat_file, cat_size_sextractor, w_log) w_log.info("Save shape measurement data") diff --git a/src/shapepipe/modules/mask_package/__init__.py b/src/shapepipe/modules/mask_package/__init__.py index 8f203eafd..bdfece65b 100644 --- a/src/shapepipe/modules/mask_package/__init__.py +++ b/src/shapepipe/modules/mask_package/__init__.py @@ -83,6 +83,8 @@ environment will be used WW_CONFIG_FILE : str Path to the WeightWatcher configuration file +CDSCLIENT_PATH : str, optional + Path to CDS client executable; required if ``USE_EXT_STAR = False`` [BORDER_PARAMETERS] ------------------- diff --git a/src/shapepipe/modules/mask_package/mask.py b/src/shapepipe/modules/mask_package/mask.py index 740b1be2c..17e052a6e 100644 --- a/src/shapepipe/modules/mask_package/mask.py +++ b/src/shapepipe/modules/mask_package/mask.py @@ -106,8 +106,9 @@ def __init__( # Search path for existing mask files self._check_existing_dir = check_existing_dir - # External star catalogue path, if given (None otherwise) - self._star_cat_path = star_cat_path + # Set external star catalogue path if given + if star_cat_path is not None: + self._star_cat_path = star_cat_path self._hdu = hdu @@ -171,8 +172,17 @@ def _get_config(self): self._config["PATH"]["WW_configfile"] = conf.getexpanded( "PROGRAM_PATH", "WW_CONFIG_FILE" ) - if self._star_cat_path is not None: + if conf.has_option("PROGRAM_PATH", "CDSCLIENT_PATH"): + self._config["PATH"]["CDSclient"] = conf.getexpanded( + "PROGRAM_PATH", "CDSCLIENT_PATH" + ) + elif self._star_cat_path is not None: self._config["PATH"]["star_cat"] = self._star_cat_path + else: + raise ValueError( + "Either [PROGRAM_PATH]:CDSCLIENT_PATH in the mask config file " + + " or a star catalogue as module input needs to be present" + ) self._config["PATH"]["temp_dir"] = self._get_temp_dir_path( conf.getexpanded("OTHER", "TEMP_DIRECTORY") @@ -419,11 +429,9 @@ def make_mask(self): ) # Handle stdout / stderr - general_stdout = "" + general_stdout = f"\nCDSClient\n{self._CDS_stdout}" general_stderr = "" - if hasattr(self, "_CDS_stdout"): - general_stdout += f"\nCDSClient\n{self._CDS_stdout}" - if hasattr(self, "_CDS_stderr") and self._CDS_stderr != "": + if self._CDS_stderr != "": general_stderr += f"\nCDSClient\n{self._CDS_stderr}" if hasattr(self, "_WW_stdout") or hasattr(self, "_WW_stdout"): general_stdout += f"\n\nWeightWatcher\n{self._WW_stdout}" diff --git a/src/shapepipe/modules/vignetmaker_package/vignetmaker.py b/src/shapepipe/modules/vignetmaker_package/vignetmaker.py index 2ffe33157..74e26915b 100644 --- a/src/shapepipe/modules/vignetmaker_package/vignetmaker.py +++ b/src/shapepipe/modules/vignetmaker_package/vignetmaker.py @@ -119,36 +119,14 @@ def get_pos(self, pos_params): Array of the positions """ - try: - file = file_io.FITSCatalogue(self._galcat_path, SEx_catalogue=True) - except Exception as e: - self._w_log.error(f"Error creating FITSCatalogue for {self._galcat_path}: {e}") - raise - - try: - file.open() - except Exception as e: - self._w_log.error(f"Error opening catalogue file {self._galcat_path}: {e}") - raise + file = file_io.FITSCatalogue(self._galcat_path, SEx_catalogue=True) + file.open() - try: - data = file.get_data() - if data is None or len(data) == 0: - raise ValueError(f"Catalogue file is empty or corrupt: {self._galcat_path}") + pos = np.array( + [file.get_data()[pos_params[1]], file.get_data()[pos_params[0]]] + ).T - self._w_log.info(f"Successfully read catalogue {self._galcat_path} with {len(data)} entries") - - pos = np.array( - [data[pos_params[1]], data[pos_params[0]]] - ).T - except KeyError as e: - self._w_log.error(f"Column not found in {self._galcat_path}: {e}") - raise - except Exception as e: - self._w_log.error(f"Error reading data from catalogue {self._galcat_path}: {e}") - raise - finally: - file.close() + file.close() return pos @@ -169,44 +147,25 @@ def convert_pos(self, image_path): New positions in pixel coordinates """ - try: - file = file_io.FITSCatalogue(image_path) - except Exception as e: - self._w_log.error(f"Error creating FITSCatalogue for image {image_path}: {e}") - raise + # Get image header + file = file_io.FITSCatalogue(image_path) + file.open() + head = file.get_header(0) + file.close() - try: - file.open() - except Exception as e: - self._w_log.error(f"Error opening image file {image_path}: {e}") - raise - - try: - head = file.get_header(0) - if head is None: - raise ValueError(f"Image file header is empty or corrupt: {image_path}") - self._w_log.info(f"Successfully read header from image {image_path}") - except Exception as e: - self._w_log.error(f"Error reading header from image {image_path}: {e}") - raise - finally: - file.close() - - try: - wcs = WCS(head) - except Exception as e: - self._w_log.error(f"Error creating WCS from header of {image_path}: {e}") - raise + # Get WCS transformation matrix + wcs = WCS(head) + # Create copy of input positions pos_tmp = np.copy(self._pos) + + # Exchange x and y pos_tmp[:, [0, 1]] = pos_tmp[:, [1, 0]] - try: - new_pos = wcs.all_world2pix(pos_tmp, 1) - except Exception as e: - self._w_log.error(f"Error converting world to pixel coordinates for {image_path}: {e}") - raise + # Transform from world to pixel coordinates + new_pos = wcs.all_world2pix(pos_tmp, 1) + # Exchange x and y back to original new_pos[:, [0, 1]] = new_pos[:, [1, 0]] return new_pos From b0804ce12984bc212efd015ecf8d5b5423649b8c Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Fri, 3 Jul 2026 22:41:13 +0200 Subject: [PATCH 24/24] retarget(im_sims): slim to ShapePipe module enablement; orchestration -> sp_validation The image-sims workflow splits by concern along the repo dependency arrow (shapepipe produces catalogues; sp_validation consumes + validates them): - ShapePipe (this PR): only the pipeline-module changes needed to run on background-free sims -- the fake_psf module, ngmix BKG_SUB path, numeric sim-exposure-ID handling (exp_utils/sextractor/merge_headers), the cfis_image_sims module configs, create_final_cat's image_sims mode, and make_data_pujol. - sp_validation (separate PR): the snakemake orchestration + the merge/calibrate/m-bias half, rebuilt on sp_validation's native container workflow. The orchestration files removed here are preserved in git history (origin/im_sims) and move there. Incidental stale-base edits to shared modules/standard configs/tests reverted to ngmix_v2.0 (which already carries the fixes: 662fad42 HSM g-cols, 714847c2 spread_model). Retargeted onto ngmix_v2.0; uv.lock regenerated. --- auxdir/CFIS/im_sims_202606/numbers.txt | 200 --------- auxdir/CFIS/im_sims_202606/numbers_run.txt | 200 --------- .../CFIS/im_sims_202606/tile_numbers_test.txt | 43 -- docs/source/image_sims_calibration.md | 301 -------------- docs/source/pipeline_v2.0.md | 330 --------------- example/cfis/config_exp_Ma_onthefly.ini | 3 + ...fex.ini => config_make_cat_psfex_nosm.ini} | 2 +- example/cfis/config_tile_Mh_exp.ini | 4 - .../cfis/config_tile_Ng_batch_psfex_uc.ini | 2 +- example/cfis/final_cat.param | 4 - pyproject.toml | 10 +- scripts/image_sims_pipeline/Snakefile | 256 ------------ .../image_sims_pipeline/config.yaml.template | 39 -- scripts/image_sims_pipeline/info.py | 247 ----------- scripts/image_sims_pipeline/monitor_mbias.py | 264 ------------ scripts/sh/apptainer_noslurm.sh | 10 - scripts/sh/init_run_v2.0.sh | 177 +------- scripts/sh/job_list_help.bash | 2 +- scripts/sh/job_sp_canfar_v2.0.bash | 39 +- scripts/sh/run_job_sp_canfar_v2.0.bash | 387 ++++-------------- tests/module/test_psf_grammar_properties.py | 15 +- tests/module/test_psfex_interp.py | 16 +- 22 files changed, 139 insertions(+), 2412 deletions(-) delete mode 100644 auxdir/CFIS/im_sims_202606/numbers.txt delete mode 100644 auxdir/CFIS/im_sims_202606/numbers_run.txt delete mode 100644 auxdir/CFIS/im_sims_202606/tile_numbers_test.txt delete mode 100644 docs/source/image_sims_calibration.md delete mode 100644 docs/source/pipeline_v2.0.md rename example/cfis/{config_tile_Mc_psfex.ini => config_make_cat_psfex_nosm.ini} (98%) delete mode 100644 scripts/image_sims_pipeline/Snakefile delete mode 100644 scripts/image_sims_pipeline/config.yaml.template delete mode 100644 scripts/image_sims_pipeline/info.py delete mode 100644 scripts/image_sims_pipeline/monitor_mbias.py delete mode 100755 scripts/sh/apptainer_noslurm.sh diff --git a/auxdir/CFIS/im_sims_202606/numbers.txt b/auxdir/CFIS/im_sims_202606/numbers.txt deleted file mode 100644 index f5964b285..000000000 --- a/auxdir/CFIS/im_sims_202606/numbers.txt +++ /dev/null @@ -1,200 +0,0 @@ -233.293 -235.293 -236.293 -237.292 -237.293 -238.292 -238.293 -239.291 -239.292 -239.293 -240.291 -240.292 -240.293 -241.293 -242.290 -242.291 -242.292 -242.293 -243.290 -243.291 -243.292 -243.293 -244.293 -245.289 -245.290 -245.291 -245.292 -246.289 -246.290 -246.291 -247.289 -247.290 -247.291 -247.292 -247.293 -248.288 -248.289 -248.290 -248.291 -248.292 -249.288 -249.289 -249.290 -249.291 -249.292 -250.289 -250.290 -250.291 -250.292 -251.287 -251.288 -251.289 -251.290 -251.291 -252.287 -252.288 -252.289 -252.290 -252.291 -253.287 -253.288 -253.291 -254.286 -254.287 -254.288 -254.290 -255.286 -255.288 -255.289 -255.290 -256.286 -256.287 -256.289 -257.285 -257.286 -257.287 -257.288 -257.289 -258.287 -258.288 -258.289 -259.285 -259.286 -259.287 -259.288 -259.289 -260.284 -260.285 -260.286 -260.287 -260.288 -260.289 -261.284 -261.285 -261.286 -261.287 -261.288 -262.285 -262.286 -262.287 -262.288 -263.283 -263.284 -263.285 -263.286 -264.283 -264.284 -264.285 -264.286 -264.287 -265.283 -265.284 -265.285 -265.286 -265.287 -266.282 -266.283 -266.285 -266.286 -266.287 -267.283 -267.284 -267.285 -267.286 -268.282 -268.283 -268.284 -268.285 -269.281 -269.282 -269.283 -269.284 -269.286 -270.281 -270.282 -270.283 -271.281 -271.282 -271.283 -271.284 -271.285 -272.281 -272.282 -272.283 -272.284 -272.285 -273.280 -273.282 -273.283 -273.284 -274.279 -274.280 -274.282 -274.283 -274.284 -275.279 -275.280 -275.281 -275.282 -275.283 -275.284 -276.279 -276.280 -276.281 -276.282 -276.283 -277.279 -277.280 -277.281 -277.282 -278.279 -278.280 -278.281 -278.282 -278.283 -279.280 -279.281 -279.282 -280.279 -280.280 -280.281 -280.282 -281.279 -281.280 -281.281 -281.282 -282.280 -282.281 -283.279 -283.280 -284.279 -284.281 -285.279 -285.280 -286.279 -286.280 -287.279 -287.280 -288.279 -289.279 diff --git a/auxdir/CFIS/im_sims_202606/numbers_run.txt b/auxdir/CFIS/im_sims_202606/numbers_run.txt deleted file mode 100644 index 634931641..000000000 --- a/auxdir/CFIS/im_sims_202606/numbers_run.txt +++ /dev/null @@ -1,200 +0,0 @@ -233-293 -235-293 -236-293 -237-292 -237-293 -238-292 -238-293 -239-291 -239-292 -239-293 -240-291 -240-292 -240-293 -241-293 -242-290 -242-291 -242-292 -242-293 -243-290 -243-291 -243-292 -243-293 -244-293 -245-289 -245-290 -245-291 -245-292 -246-289 -246-290 -246-291 -247-289 -247-290 -247-291 -247-292 -247-293 -248-288 -248-289 -248-290 -248-291 -248-292 -249-288 -249-289 -249-290 -249-291 -249-292 -250-289 -250-290 -250-291 -250-292 -251-287 -251-288 -251-289 -251-290 -251-291 -252-287 -252-288 -252-289 -252-290 -252-291 -253-287 -253-288 -253-291 -254-286 -254-287 -254-288 -254-290 -255-286 -255-288 -255-289 -255-290 -256-286 -256-287 -256-289 -257-285 -257-286 -257-287 -257-288 -257-289 -258-287 -258-288 -258-289 -259-285 -259-286 -259-287 -259-288 -259-289 -260-284 -260-285 -260-286 -260-287 -260-288 -260-289 -261-284 -261-285 -261-286 -261-287 -261-288 -262-285 -262-286 -262-287 -262-288 -263-283 -263-284 -263-285 -263-286 -264-283 -264-284 -264-285 -264-286 -264-287 -265-283 -265-284 -265-285 -265-286 -265-287 -266-282 -266-283 -266-285 -266-286 -266-287 -267-283 -267-284 -267-285 -267-286 -268-282 -268-283 -268-284 -268-285 -269-281 -269-282 -269-283 -269-284 -269-286 -270-281 -270-282 -270-283 -271-281 -271-282 -271-283 -271-284 -271-285 -272-281 -272-282 -272-283 -272-284 -272-285 -273-280 -273-282 -273-283 -273-284 -274-279 -274-280 -274-282 -274-283 -274-284 -275-279 -275-280 -275-281 -275-282 -275-283 -275-284 -276-279 -276-280 -276-281 -276-282 -276-283 -277-279 -277-280 -277-281 -277-282 -278-279 -278-280 -278-281 -278-282 -278-283 -279-280 -279-281 -279-282 -280-279 -280-280 -280-281 -280-282 -281-279 -281-280 -281-281 -281-282 -282-280 -282-281 -283-279 -283-280 -284-279 -284-281 -285-279 -285-280 -286-279 -286-280 -287-279 -287-280 -288-279 -289-279 diff --git a/auxdir/CFIS/im_sims_202606/tile_numbers_test.txt b/auxdir/CFIS/im_sims_202606/tile_numbers_test.txt deleted file mode 100644 index 449bd74d6..000000000 --- a/auxdir/CFIS/im_sims_202606/tile_numbers_test.txt +++ /dev/null @@ -1,43 +0,0 @@ -251.288 -251.289 -251.291 -252.287 -252.288 -252.289 -252.291 -253.287 -253.288 -253.291 -254.286 -254.287 -254.288 -255.286 -255.288 -255.290 -256.286 -256.287 -257.285 -257.286 -257.287 -257.288 -258.287 -258.289 -259.285 -259.286 -259.287 -259.288 -259.289 -260.284 -260.285 -260.286 -260.287 -260.288 -260.289 -261.284 -261.285 -261.286 -261.287 -261.288 -262.285 -262.286 -262.287 diff --git a/docs/source/image_sims_calibration.md b/docs/source/image_sims_calibration.md deleted file mode 100644 index 809924adf..000000000 --- a/docs/source/image_sims_calibration.md +++ /dev/null @@ -1,301 +0,0 @@ -# Image Simulations Calibration Pipeline - -## Overview - -The image simulations calibration pipeline derives multiplicative (m) and additive (c) shear bias corrections from synthetic image simulations. It uses ShapePipe to measure galaxy shapes in five pre-sheared grids, then computes bias by comparing measurements across shear directions. - -**Key outputs:** -- `m_bias_results.yaml` / `m_bias_results.txt` — final m₁, m₂, c₁, c₂ bias estimates with bootstrap errors -- `mbias_cumulative.yaml` — convergence history (m/c as function of n_tiles) -- `mbias_convergence.png` — m and c vs tile count (2 panels) -- `mbias_errors.png` — error shrinkage vs tile count (2 panels) - ---- - -## Quick Start - -### Prerequisites -- ShapePipe container: `/n17data/mkilbing/shapepipe_im_sims.sif` -- SP Validation container: `/n17data/mkilbing/sp_validation_im_sims.sif` -- Pipeline scripts: `~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/` - -### Setup (one time) - -```bash -# Copy template config to run directory -cp ~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/config.yaml.template \ - /n17data/mkilbing/astro/Runs/shapepipe/CFIS/v2.0/image_sims/config.yaml - -# Edit config.yaml with your tile IDs, grid number, etc. -``` - -### Run from the run directory - -**All snakemake commands run from the image_sims run directory:** - -```bash -cd /n17data/mkilbing/astro/Runs/shapepipe/CFIS/v2.0/image_sims/ - -# Full pipeline to calibrated catalogues -snakemake -s ~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/Snakefile \ - --configfile config.yaml -j 5 calibrate_all - -# M-bias computation with convergence tracking -snakemake -s ~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/Snakefile \ - --configfile config.yaml -j 1 mbias - -# Monitor progress -~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/info.py -m -v - -# Incremental m-bias as tiles finish -~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/monitor_mbias.py -v -``` - ---- - -## Configuration - -### Snakemake Config: `config.yaml` - -```yaml -# Run directory -base: /n17data/mkilbing/astro/Runs/shapepipe/CFIS/v2.0/image_sims - -# Tile IDs (all tiles in the survey, or subset for testing) -tile_IDs: ["254.286", "254.287"] - -# catalogue subversion (mask config: mask_v1.X.{sample}_im_sim.yaml) -sample: 9 - -# Simulation type (grid or others) -type: grid - -# Grid number (e.g., grid_2) -num: 2 - -# SMP batch size per ShapePipe job (-1 = use container default) -n_smp: -1 -``` - -### M-bias Calibration Settings - -M-bias computation parameters are hard-coded in the Snakefile `mbias` rule: - -```python -shear_amplitude: 0.02 # Input shear amplitude (absolute value) -match_radius_deg: 0.0002 # Position matching radius for cross-matching -w_col: w_des # Weight column name -n_bootstrap: 500 # Bootstrap resamples for error estimation -catalog_name: shape_catalog_cut_ngmix.fits -``` - -To modify these parameters for a different run, edit the `mbias` rule in the Snakefile. - ---- - -## Pipeline Stages - -The pipeline runs in order, with each stage dependent on the previous: - -### 1. `init_all` — Initialize run directories - -Creates per-sim directories, parameter files (`params.py`), and mask configs. - -**Outputs:** `{grids}/{sim}/params.py`, `{grids}/{sim}/config_mask.yaml` - -### 2. `pipeline_all` — Run ShapePipe on all tiles - -Executes ShapePipe's full job sequence (bits 1→2→4→...→2048) for each tile in each simulation. - -**Note:** `-j 5` means 5 concurrent Snakemake jobs (tile×sim pairs run sequentially within each job). - -**Outputs:** ShapePipe catalogues, logs at `{grids}/{sim}/logs/log_job_{tile}_{bit}.txt` - -**Time:** ~hours per grid depending on tile count and cluster load. - -### 3. `merge_all` — Merge ShapePipe tile outputs - -Combines per-tile catalogs into a single HDF5 file per simulation using `create_final_cat.py`. - -**Outputs:** `{grids}/{sim}/final_cat_{sim}.hdf5` (HDF5 with tile counts as attributes) - -### 4. `extract_all` — Extract comprehensive catalogues - -Reads merged HDF5 and extracts shape information, PSF quantities, and pre-calibration columns using `extract_info.py`. - -**Outputs:** `{grids}/{sim}/shape_catalog_comprehensive_ngmix.hdf5` (with `n_tiles` attribute in header) - -### 5. `calibrate_all` — Apply cuts and calibrate - -Applies selection masks (flags, magnitude, signal-to-noise) and computes shear calibration (m/c per object) using `calibrate_comprehensive_cat.py`. - -**Outputs:** `{grids}/{sim}/shape_catalog_cut_ngmix.fits` (final catalogue, ready for m-bias) - -### 6. `mbias` — Compute m-bias with convergence tracking - -Computes m₁, m₂, c₁, c₂ from the five shear pairs. With `--cumulative`, tracks convergence as tile counts grow. - -**Outputs:** -- `results/m_bias_results.yaml` — final results (YAML) -- `results/m_bias_results.txt` — final results (human-readable text) -- `results/mbias_cumulative.yaml` — convergence history -- `results/mbias_convergence.png` — m/c vs n_tiles (2 panels) -- `results/mbias_errors.png` — error convergence (2 panels) - ---- - -## Monitoring Convergence - -### Live Status: `info.py` - -Monitor pipeline progress with a status table: - -```bash -cd /path/to/run/image_sims -~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/info.py -m -v -``` - -**Status table (example output):** - -| Simulation | #Jobs | Job Bits | Merge | Extract | Calibrate | -|-----------|-------|----------|-------|---------|-----------| -| 1m2z_grid_2 | 12/12 | ✓✓✓✓✓✓✓✓✓✓✓✓ | 2 | ✓ | ✓ | -| 1p2z_grid_2 | 12/12 | ✓✓✓✓✓✓✓✓✓✓✓✓ | 2 | ✓ | ✓ | -| 1z2m_grid_2 | 12/12 | ✓✓✓✓✓✓✓✓✓✓✓✓ | 2 | ✓ | ✓ | -| 1z2p_grid_2 | 12/12 | ✓✓✓✓✓✓✓✓✓✓✓✓ | 2 | ✓ | ✓ | -| 1z2z_grid_2 | 12/12 | ✓✓✓✓✓✓✓✓✓✓✓✓ | 2 | ✓ | ✓ | - -**Column meanings:** -- **#Jobs** — ShapePipe job completion count (max 12 bits) -- **Job Bits** — per-bit status (✓ = all tiles done, · = incomplete) -- **Merge** — tile count in final HDF5 catalogue -- **Extract** — comprehensive catalog status -- **Calibrate** — calibrated catalogue status - -### Incremental M-bias: `monitor_mbias.py` - -Recompute m-bias each time new tiles finish all ShapePipe jobs. Automatically detects completed tiles and updates convergence tracking. - -```bash -cd /path/to/run/image_sims -~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/monitor_mbias.py -v -``` - -Run this repeatedly as tiles complete to watch m-bias converge. - ---- - -## Results Interpretation - -### M-Bias Results Files - -**YAML format** (`m_bias_results.yaml`): -```yaml -m1: -0.7909173336321427 -m1_err: 0.27407603929732716 -c1: -0.0030413689177988708 -c1_err: 0.005899515490890933 -m2: -1.350016719584532 -m2_err: 0.20884008300375373 -c2: -0.004950048084875509 -c2_err: 0.004208875641566414 -``` - -**Text format** (`m_bias_results.txt`): -``` -Multiplicative and additive shear bias from image simulations -============================================================ - -m1 = -0.790917 ± 0.274076 -c1 = -0.003041 ± 0.005900 - -m2 = -1.350017 ± 0.208840 -c2 = -0.004950 ± 0.004209 - -Errors computed via bootstrap resampling (n=500 resamples) -``` - -### Convergence History (`mbias_cumulative.yaml`) - -Tracks m/c evolution as tile count grows: - -```yaml -'2': - c1: -0.0030413689177988708 - c1_err: 0.005899515490890933 - m1: -0.7909173336321427 - m1_err: 0.27407603929732716 - # ... (c2, m2, errors) -'4': - c1: -0.0025... - # ... (more tiles) -``` - -### Understanding Error Bars - -Errors are computed via **bootstrap resampling**: - -1. Draw N=500 random resamples (with replacement) from measured galaxy ellipticities -2. Recompute m and c for each resample -3. Error = standard deviation of the bootstrap distribution - -This captures: -- **Photometric noise** — measurement uncertainties per galaxy -- **Cosmic variance** — shape correlations across the sky -- **Calibration uncertainties** — from shear responsivity scatter - -**Error shrinkage:** Error ∝ 1/√(n_tiles). More tiles → tighter constraints. - -### Convergence Plots - -**`mbias_convergence.png`** — m and c with error bars vs n_tiles -- Left panel: multiplicative bias (m₁, m₂) with error bars -- Right panel: additive bias (c₁, c₂) with error bars -- Shows systematic trends and statistical uncertainties - -**`mbias_errors.png`** — error shrinkage vs n_tiles -- Left panel: m error bars (m₁, m₂) only -- Right panel: c error bars (c₁, c₂) only -- Shows constraint tightening as data accumulates -- Useful for deciding when m-bias is "converged" (errors small enough for science) - ---- - -## File Structure - -``` -/n17data/mkilbing/astro/Runs/shapepipe/CFIS/v2.0/image_sims/ -├── config.yaml # Run configuration -├── .snakemake/ # Snakemake metadata -├── logs/ # Snakemake logs -├── grids/ -│ ├── {sim}_grid_2/ -│ │ ├── logs/ # ShapePipe job logs -│ │ ├── tiles/ # Per-tile ShapePipe outputs -│ │ ├── final_cat_{sim}.hdf5 # Merged catalogue -│ │ ├── shape_catalog_comprehensive_ngmix.hdf5 -│ │ └── shape_catalog_cut_ngmix.fits -│ └── results/ -│ ├── m_bias_results.yaml -│ ├── m_bias_results.txt -│ ├── mbias_cumulative.yaml -│ ├── mbias_convergence.png -│ └── mbias_errors.png -└── monitoring/ # Incremental m-bias workspace - └── {sim}/ - ├── final_cat_{sim}.hdf5 - ├── shape_catalog_comprehensive_ngmix.hdf5 - └── shape_catalog_cut_ngmix.fits -``` - ---- - -## References - -- **Pipeline code:** `~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/` -- **ShapePipe:** `~/astro/repositories/github/shapepipe/` -- **SP Validation:** `~/astro/repositories/github/sp_validation/` - ---- - -*Last updated: 2026-06-26* diff --git a/docs/source/pipeline_v2.0.md b/docs/source/pipeline_v2.0.md deleted file mode 100644 index d08f2d896..000000000 --- a/docs/source/pipeline_v2.0.md +++ /dev/null @@ -1,330 +0,0 @@ -# Running `ShapePipe` processing and post-processing pipelines on CANFAR - -Documentation to create ShapePipe output products for catalogues v2.x. - -## Initialise directory structure - -```bash -init_run_v2.0.sh [-t data|image_sims] -``` -sets up the directory structure. This will be - -v2.0/ -├── tiles/ -│ ├── 301/ -│ │ ├── 301.278/ -│ │ ├── 301.279/ -│ │ └── ... -│ │ ... -├── exp/ -│ ├── 21/ -│ │ ├── 21163916 -│ │ └── ... -│ │ ... -├── cfis -> -├── tile_numbers -> -└── logs/ -└── debug/ - -Additionally, for image_sims: -├── input_tiles -> -├── input_exp -> - - - -### Interactive job from the terminal for a single tile - -Run bit-coded jobs -```bash -run_job_canfar_v2.0.sh -e ID -j -``` -# with job processing tiles: - 1: retrieve tile images and weights (download/symlink) - 2: uncompress weights (no processing for image_sims) - 4: find exposures -# then exposures - 8: retrieve exposures (download/symlink) -16: split exposures, get WCS header -32: mask exposures -64: process stars on exposures, PSF model (no processing for image sims) -# back to tiles -128: merge exposure WCS headers into tile-level sqlite log -256: object selection on tiles (external cat/SExtractor) -512: postage stamp creation -1024: multi-epoch shape measurement -2048: create final catalogue - -## Candide setup - -## CANFAR setup - -### For image simulations - -Download docker image - -```bash -apptainer pull shapepipe_im_sims.sif docker://ghcr.io/cosmostat/shapepipe:im_sims-runtime -``` - -Activate container - -```bash -apptainer shell --bind /n17data,/n09data,/home /path/to/shapepipe_im_sims.sif -``` - -Run jobs -```bash -run_job_sp_canfar_v2.0.bash -e 233.293 -t image_sims -j -``` - -Help -```bash -run_job_sp_canfar_v2.0.bash -h -``` - - -### CANFAR login - -Login to the canfar system with - -```bash -canfar auth login -``` - -This can be done from at notebook or terminal within the canfar science portal, -or any remote terminal that has the canfar library installed. - -Check authentication status with - -```bash -canfar auth list -``` - -If not on "default", run - -```bash -canfar auth switch default -``` - - -## From previous setup - -### Merge all final catalogues - -The last step of `ShapePipe` processing is, per patch, to merget all final catalogues. This is done via a python script, as follows. -First, change to parent directory `/path/to/version` and run the following command for all patches - -```bash -patchnum=`tr $patch P ''` -create_final_cat.py -m final_cat_$patch.hdf5 -i . -p $patch/cfis/final_cat.param \ - -P $patchnum -o $patch/n_tiles_final.txt -v -``` - -## Additional `ShapePipe` processing - - -### Create star Catalogue - -We can additionaly create a combined star catalogue, with star shapes projecte from detector to world coordinates. -This is useful for validation and galaxy-PSF/star correlation diagnostics. - -#### Combine all PSF runs - -In each patch directory /path/to/version/$patch, run - -```bash -combine_runs.bash -p $psf -c psf -``` - -to create a single output directory of PSF files (symbolic links). - -Optionally, to create and plot results for this patch only: - -```bash -shapepipe_run -c $SP_CONFIG/config_Ms_$psf.ini -shapepipe_run -c $SP_CONFIG/config_Pl_$psf.ini -``` - -#### Convert star catalogue to wCS - -Convert all input validation PSF files and create directories per patch `P?`. -Create files `validation_psf_conv--.fits` (for the v1.4 setup only one file): - -```bash -cd /path/to/version -mkdir stat_car -cd star_cat -``` - -For each patch run - -```bash -convert_psf_pix2world.py -i .. -P $patchnum -v -``` - -Combine previously created files as links within one ShapePipe run directory (for the v1.4 setup only one link). -First (and optiohnal), create a subdir for a run and link to the input patches: - -```bash -cd /path/to/version/star_cat -mkdir v1.6 -ln -s ../P1 -ln -s ../P2 -... -``` - -Next, create links to all `validation_conv` runs: - -```bash -combine_runs.bash -p psfex -c psf_conv -``` - -Merge all converted star catalogues and create `final-starcat.fits`: - -```bash -export SP_RUN=`pwd` -shapepipe_run -c ~/shapepipe/example/cfis/config_Ms_psfex_conv.ini -``` - -Rename to general PSF and star catalogue used for all ("a") sub-versions: - - -```bash -cp output/run_sp_Ms/merge_starcat_runner/output/full_starcat-0000000.fits \ - unions_shapepipe_psf_2024_v1.6.a.fits -``` - -The FITS file `CATTYPE` (newer version) should be `validation_psf_conf`. - -## Post-processing - -The following post-processing steps are performed with the library `sp_validation`. - -### Extract Information - -First, we extract all information from the final catalogue, per patch. We copy -the parameter file and set links to the catalogues and `ShapePipe` config directory. - -```bash -cd /path/to/version/$patch -cp ~/astro/repositories/github/sp_validation/notebooks/params.py . -ln -s /path/to/final_cat_$patchnum.hdf5 # not relative path ../final_cat_P$patchnum.hdf5 ! -ln -s output/run_sp_MsPl/mccd_merge_starcat_runner/output/full_starcat-0000000.fits -ln -s ~/astro/repositories/github/shapepipe/example/cfis -``` - -Then edit `params.py`: Set patch name; set `wrap_ra` for P2. - -Now we can run the script, recommended via job submission on candide. For large patches, -this requies a job with a large memory, e.g. with `mem=380000` - - -```bash -[squeue] python ~/astro/repositories/github/sp_validation/notebooks/extract_info.py -``` - -This creates a patch-wise comprehensive catalogue. - -### Create global comprehensive catalogues - -```bash -cd /patch/to/version -[squeue] python ~/astro/repositories/github/sp_validation/scripts/create_joint_comprehensive_cat.py \ - -v v1.6.c -v -p P1+P2+P3+P4+P5+P6+P7+P8+P9 -``` - -This creates the file `unions_shapepipe_comprehensive_2024_v1.6.c.hdf5`. - - -### Apply structural masks - -First, edit the Python script `~/astro/repositories/github/sp_validation/notebooks/demo_apply_hsp_masks.py` -to match catalogue name. Check the coverage mask input file (see below). -Run the script to apply the healsparse structural masks: - -```bash -[squeue] python ~/astro/repositories/github/sp_validation/notebooks/demo_apply_hsp_masks.py -``` - -This creates the file `unions_shapepipe_comprehensive_struct_2024_v1.6.c.hdf5`. - - -### Define sample, calibrate catalogue - -We are close to finally perform the last post-processing step, which is the calibration. First, the final galaxy sample -in question needs to be defined, with masks and cuts to apply from a `yaml` config file. A number of pre-defined files -can be found in `~/astro/repositories/github/sp_validation/calibration`. - -For example, to create `v1.6.6`, the steps are: - -```bash -cd /path/to/version -mkdir -p v1.6.6 -cd v1.6.6 -ln -s ~/astro/repositories/github/sp_validation/calibration/mask_v1.X.6.yaml config_mask.yaml -ln -s ..//unions_shapepipe_comprehensive_struct_2024_v1.6.c.hdf5 unions_shapepipe_comprehensive_struct_2024_v1.X.c.hdf5 -[squeue] python ~/astro/repositories/github/sp_validation/calibrate_comprehensive_cat.py -``` - -calibrate_comprehensive - - -### Create matched star catalogue - -For diagnostics, a catalogue with multi-epoch shapes measured by ngmix matched with the validation star catalogue is used. -This is created as follows: - -```bash -cd /path/to/version -merge_psf_cat.py [-V v1.6|-P P1+P2+...] -v -``` - -This creates the joint catalogue unions_shapepipe_star_2024_v1.6.a.fits . - -### Create coverage mask - -First, on canfar, move to the directory that has the patch subdirectories. - -```bash -cd /path/to/version -``` - -#### Get exposure numbers - -If the file `$patch/exp_numbers.txt` does not exist for a given patch, create it with the summary program - -```bash -summary_run $patch 1 -``` - -Now, create the list of CCDs that have PSF information with - -```bash -get_ccds_with_psf -v -V v1.6 -``` - -Next, download exposures headers - -```bash -download_headers -i ccds_with_psfs_v1.6.txt -o headers_v1.6 -v -``` - -From the headers, the CCD corner coordinates are extracted with -```bash -extract_field_corners -i headers_v1.6 -v -``` - -Then, build the healsparse coverage mask file as -```bash -build_coverage_map -``` - -Use `plot_coverage_map` to create plots of the coverage mask. - -## Extra Utilities - -### Run in Terminal in Parallel - -```bash -cat IDs.txt | xargs -I {} -P 16 bash -c 'init_run_exclusive_canfar.sh -j 512 -e {}' -``` diff --git a/example/cfis/config_exp_Ma_onthefly.ini b/example/cfis/config_exp_Ma_onthefly.ini index de58b56ae..2ab1c041c 100644 --- a/example/cfis/config_exp_Ma_onthefly.ini +++ b/example/cfis/config_exp_Ma_onthefly.ini @@ -74,3 +74,6 @@ USE_EXT_STAR = False # File name suffix for the output flag files (optional) PREFIX = pipeline + +# Path to check for existing output mask files +CHECK_EXISTING_DIR = $SP_RUN/output/run_sp_Ma_exp/mask_runner/output diff --git a/example/cfis/config_tile_Mc_psfex.ini b/example/cfis/config_make_cat_psfex_nosm.ini similarity index 98% rename from example/cfis/config_tile_Mc_psfex.ini rename to example/cfis/config_make_cat_psfex_nosm.ini index 77b52ee1d..1983f91c7 100644 --- a/example/cfis/config_tile_Mc_psfex.ini +++ b/example/cfis/config_make_cat_psfex_nosm.ini @@ -9,7 +9,7 @@ VERBOSE = True # Name of run (optional) default: shapepipe_run -RUN_NAME = run_sp_tile_Mc_psfex +RUN_NAME = run_sp_Mc # Add date and time to RUN_NAME, optional, default: True ; RUN_DATETIME = False diff --git a/example/cfis/config_tile_Mh_exp.ini b/example/cfis/config_tile_Mh_exp.ini index 6b0abb7b3..0d3f9f8b3 100644 --- a/example/cfis/config_tile_Mh_exp.ini +++ b/example/cfis/config_tile_Mh_exp.ini @@ -70,7 +70,3 @@ NUMBERING_SCHEME = -000-000 # Root directory containing all per-exposure work directories. # The runner will walk this tree to collect headers-.npy files. EXP_BASE_DIR = $SP_EXP -# If True, log a warning and continue when split_exp_runner output is missing -# for some exposures (e.g. those that failed job 16). If False (default), -# raise an error and stop. -WARN_MISSING_EXP = False diff --git a/example/cfis/config_tile_Ng_batch_psfex_uc.ini b/example/cfis/config_tile_Ng_batch_psfex_uc.ini index 4a49c31f1..b223e05a1 100644 --- a/example/cfis/config_tile_Ng_batch_psfex_uc.ini +++ b/example/cfis/config_tile_Ng_batch_psfex_uc.ini @@ -11,7 +11,7 @@ VERBOSE = True RUN_NAME = run_sp_tile_Ng # Add date and time to RUN_NAME, optional, default: False -RUN_DATETIME = True +RUN_DATETIME = False ## ShapePipe execution options diff --git a/example/cfis/final_cat.param b/example/cfis/final_cat.param index f97d5a94c..8ddfebd37 100644 --- a/example/cfis/final_cat.param +++ b/example/cfis/final_cat.param @@ -6,10 +6,6 @@ YWIN_WORLD # Can maybe be removed. TILE_ID -# SExtractor number. TODO: Create unique UNIONS number -# from TILD_ID + NUMBER -NUMBER - # flags FLAGS IMAFLAGS_ISO diff --git a/pyproject.toml b/pyproject.toml index cca93fa7e..69c73f067 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,11 +78,11 @@ fitsio = ["fitsio"] dev = ["shapepipe[doc,jupyter,lint,release,test,fitsio]"] [project.scripts] -shapepipe_run = "shapepipe.shapepipe_run:main" -summary_run = "shapepipe.summary_run:main" -canfar_submit_job = "shapepipe.canfar_run:run_job" -canfar_monitor = "shapepipe.canfar_run:run_log" -canfar_monitor_log = "shapepipe.canfar_run:run_monitor_log" +shapepipe_run = "shapepipe.shapepipe_run:main" +summary_run = "shapepipe.summary_run:main" +canfar_submit_job = "shapepipe.canfar_run:run_job" +canfar_monitor = "shapepipe.canfar_run:run_log" +canfar_monitor_log = "shapepipe.canfar_run:run_monitor_log" [tool.uv] # shapepipe targets Linux only; skip Windows/macOS wheel resolution diff --git a/scripts/image_sims_pipeline/Snakefile b/scripts/image_sims_pipeline/Snakefile deleted file mode 100644 index 8c60d6f55..000000000 --- a/scripts/image_sims_pipeline/Snakefile +++ /dev/null @@ -1,256 +0,0 @@ -"""Snakemake workflow for image-simulation pipeline. - -Run from the run directory: - snakemake -s ~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/Snakefile --configfile config.yaml -j 5 - snakemake -s ~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/Snakefile --configfile config.yaml -j 1 --dry-run -""" - -import os - -# ---------- configuration ---------- -configfile: "config.yaml" - -BASE = config["base"] - -# Load tile IDs from file (always a file path) -_tile_ids_file = config.get("tile_IDs", "tile_numbers.txt") -with open(_tile_ids_file) as f: - TILE_IDs = [line.strip() for line in f if line.strip()] - -# Apply exclusive tile list if provided (for testing/limiting tiles) -_exclusive = config.get("tile_IDs_exclusive") -if _exclusive and isinstance(_exclusive, list) and _exclusive: # Non-empty list - TILE_IDs = _exclusive - -SAMPLE = str(config["sample"]) -NUM = config["num"] -TYPE = config["type"] -FORCE = "--force" if config.get("force", 0) else "" -N_SMP = config.get("n_smp", -1) - -JOB_SEQUENCE = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048] -# Filter to single job if specified via --config "job=N" -if config.get("job"): - target_job = int(config["job"]) - JOB_SEQUENCE = [j for j in JOB_SEQUENCE if j == target_job] -JOB = sum(JOB_SEQUENCE) - -PSF = "psfex" - -STR_TYPE = f"_{TYPE}" if TYPE == "grid" else "" -SIMS = [ - f"1{dg1}2{dg2}{STR_TYPE}_{NUM}" - for dg1, dg2 in zip(["m", "p", "z", "z", "z"], - ["z", "z", "m", "p", "z"]) -] -GRIDS = f"{BASE}/{TYPE}s" - -HOME = os.path.expanduser("~") -PATH_SH = f"{HOME}/astro/repositories/github/shapepipe/scripts/sh" -PATH_PY = f"{HOME}/astro/repositories/github/shapepipe/scripts/python" -PATH_NB = f"{HOME}/astro/repositories/github/sp_validation/notebooks" -PATH_SPV = f"{HOME}/astro/repositories/github/sp_validation" - -SP_SIF = "/n17data/mkilbing/shapepipe_im_sims.sif" -SPV_SIF = "/n17data/mkilbing/sp_validation_im_sims.sif" -SPV_SRC = f"{HOME}/astro/repositories/github/sp_validation/src" - -# ShapePipe pipeline stages (uses shapepipe_im_sims.sif — correct MPI build) -_APPTAINER = f"{PATH_SH}/apptainer_noslurm.sh" -APP_CMD = f"{_APPTAINER} --bind /n17data,/n09data,/home {SP_SIF}" - -# sp_validation stages (uses sp_validation_im_sims.sif with local src override) -SPV_CMD = ( - f"{_APPTAINER} --bind /n17data,/n09data,/home" - f" --env PYTHONPATH={SPV_SRC}" - f" {SPV_SIF}" -) -MBIAS_OUT = f"{GRIDS}/results/m_bias_results.yaml" - -# ---------- helper functions ---------- -def validate_output_files(output_files): - """Validate that output files exist and are non-empty. - - Raises an error if any output file is missing or zero-sized, - preventing empty/corrupt files from being treated as valid outputs. - """ - if isinstance(output_files, str): - output_files = [output_files] - - for f in output_files: - if not os.path.isfile(f): - raise FileNotFoundError(f"Output file missing: {f}") - size = os.path.getsize(f) - if size == 0: - raise ValueError(f"Output file is empty (0 bytes): {f}") - -# ---------- targets ---------- -rule all: - input: - expand(f"{GRIDS}/{{sim}}/shape_catalog_cut_ngmix.fits", sim=SIMS) - -# ---------- convenience targets (run in order) ---------- -# 1. init_all -# 2. pipeline_all -# 3. merge_all -# 4. extract_all -# 5. calibrate_all -# 6. mbias -# 7. diagnostics - -rule generate_tile_list: - """Generate tile_numbers.txt from config's tile_IDs.""" - output: - f"{GRIDS}/tile_numbers.txt" - run: - os.makedirs(f"{GRIDS}", exist_ok=True) - with open(output[0], "w") as f: - for tile in TILE_IDs: - f.write(f"{tile}\n") - print(f"Generated {output[0]} with {len(TILE_IDs)} tiles") - -rule init_all: - input: - expand(f"{GRIDS}/{{sim}}/params.py", sim=SIMS) - -rule pipeline_all: - input: - expand( - f"{GRIDS}/{{sim}}/logs/log_job_{{tile}}_{JOB_SEQUENCE[-1]}.txt", - sim=SIMS, tile=TILE_IDs - ) - -rule merge_all: - input: - expand(f"{GRIDS}/{{sim}}/final_cat_{{sim}}.hdf5", sim=SIMS) - -rule extract_all: - input: - expand(f"{GRIDS}/{{sim}}/shape_catalog_comprehensive_ngmix.hdf5", sim=SIMS) - -rule calibrate_all: - input: - expand(f"{GRIDS}/{{sim}}/shape_catalog_cut_ngmix.fits", sim=SIMS) - -# ---------- rules ---------- -rule init: - input: - tiles = f"{GRIDS}/tile_numbers.txt" - output: - params = f"{GRIDS}/{{sim}}/params.py", - mask = f"{GRIDS}/{{sim}}/config_mask.yaml", - shell: - f"{APP_CMD} {PATH_SH}/init_run_v2.0.sh" - f" -t image_sims -d {GRIDS} -s {{wildcards.sim}} -S {SAMPLE}" - f" -T {input.tiles}" - -def _shapepipe_job_input(wildcards): - bit = int(wildcards.bit) - tile = wildcards.tile - bit_idx = JOB_SEQUENCE.index(bit) - tile_idx = TILE_IDs.index(tile) - - if bit_idx == 0: - if tile_idx == 0: - # First tile, first bit: wait for init - return [ - f"{GRIDS}/{wildcards.sim}/params.py", - f"{GRIDS}/{wildcards.sim}/config_mask.yaml", - ] - else: - # Later tile, first bit: wait for last bit of previous tile - prev_tile = TILE_IDs[tile_idx - 1] - return f"{GRIDS}/{wildcards.sim}/logs/log_job_{prev_tile}_{JOB_SEQUENCE[-1]}.txt" - else: - # Same tile, previous bit - return f"{GRIDS}/{wildcards.sim}/logs/log_job_{tile}_{JOB_SEQUENCE[bit_idx - 1]}.txt" - -rule shapepipe_job: - input: _shapepipe_job_input - output: f"{GRIDS}/{{sim}}/logs/log_job_{{tile}}_{{bit}}.txt" - wildcard_constraints: - bit = "|".join(str(b) for b in JOB_SEQUENCE), - tile = "|".join(t.replace(".", r"\.") for t in TILE_IDs), - shell: - f"cd {GRIDS}/{{wildcards.sim}} &&" - f" {APP_CMD} {PATH_SH}/run_job_sp_canfar_v2.0.bash" - f" {FORCE} -N {N_SMP} -e {{wildcards.tile}} -t image_sims -j {{wildcards.bit}} &&" - f" python3 -c \"import os; f='{output}'; " - f"os.path.getsize(f) > 0 or exit(1)\"" - -rule merge: - input: - expand( - f"{GRIDS}/{{{{sim}}}}/logs/log_job_{{tile}}_{JOB_SEQUENCE[-1]}.txt", - tile=TILE_IDs - ) - output: - f"{GRIDS}/{{sim}}/final_cat_{{sim}}.hdf5" - shell: - f"cd {GRIDS}/{{wildcards.sim}} &&" - f" {SPV_CMD} python {PATH_PY}/create_final_cat.py" - f" -I -m final_cat_{{wildcards.sim}}.hdf5" - f" -i .. -p cfis/final_cat.param -P {{wildcards.sim}}" - f" -o n_tiles_final.txt -v" - -rule extract: - input: - cat = f"{GRIDS}/{{sim}}/final_cat_{{sim}}.hdf5", - params = f"{GRIDS}/{{sim}}/params.py", - output: - f"{GRIDS}/{{sim}}/shape_catalog_comprehensive_ngmix.hdf5" - shell: - f"cd {GRIDS}/{{wildcards.sim}} &&" - f" {SPV_CMD} python {PATH_NB}/extract_info.py" - -rule calibrate: - input: - cat = f"{GRIDS}/{{sim}}/shape_catalog_comprehensive_ngmix.hdf5", - mask = f"{GRIDS}/{{sim}}/config_mask.yaml", - output: - f"{GRIDS}/{{sim}}/shape_catalog_cut_ngmix.fits" - shell: - f"cd {GRIDS}/{{wildcards.sim}} &&" - f" {SPV_CMD} python {PATH_SPV}/notebooks/calibrate_comprehensive_cat.py -s calibrate" - -rule mbias: - input: - expand(f"{GRIDS}/{{sim}}/shape_catalog_cut_ngmix.fits", sim=SIMS) - output: - MBIAS_OUT, - f"{GRIDS}/results/mbias_cumulative.yaml", - shell: - f"mkdir -p {GRIDS}/results && cat > /tmp/mbias_cfg.yaml << 'EOF'\n" - f"grids_dir: {GRIDS}\n" - f"num: {NUM}\n" - f"catalog_name: shape_catalog_cut_ngmix.fits\n" - f"shear_amplitude: 0.02\n" - f"match_radius_deg: 0.0002\n" - f"w_col: w_des\n" - f"n_bootstrap: 500\n" - f"output_path: {GRIDS}/results/m_bias_results.yaml\n" - f"results_dir: {GRIDS}/results\n" - f"EOF\n" - f"{SPV_CMD} python {PATH_SPV}/scripts/compute_m_bias_image_sims.py" - f" -c /tmp/mbias_cfg.yaml -v --cumulative" - -rule diagnostics: - input: - expand(f"{GRIDS}/{{sim}}/shape_catalog_cut_ngmix.fits", sim=SIMS) - output: - f"{GRIDS}/results/footprint.png", - shell: - f"mkdir -p {GRIDS}/results && cat > /tmp/diag_cfg.yaml << 'EOF'\n" - f"grids_dir: {GRIDS}\n" - f"num: {NUM}\n" - f"catalog_name: shape_catalog_cut_ngmix.fits\n" - f"shear_amplitude: 0.02\n" - f"match_radius_deg: 0.0002\n" - f"w_col: w_des\n" - f"n_bootstrap: 500\n" - f"output_path: {GRIDS}/results/m_bias_results.yaml\n" - f"results_dir: {GRIDS}/results\n" - f"diagnostics_dir: {GRIDS}/results\n" - f"EOF\n" - f"{SPV_CMD} python {PATH_SPV}/scripts/diagnostics_image_sims.py" - f" -c /tmp/diag_cfg.yaml -v" diff --git a/scripts/image_sims_pipeline/config.yaml.template b/scripts/image_sims_pipeline/config.yaml.template deleted file mode 100644 index 8faee1e92..000000000 --- a/scripts/image_sims_pipeline/config.yaml.template +++ /dev/null @@ -1,39 +0,0 @@ -# Basic configuration - -## Run directory -base: /n17data/mkilbing/astro/Runs/shapepipe/CFIS/v2.0/image_sims - -## Tile IDs: path to file containing tile IDs (one per line) -tile_IDs: /home/mkilbing/shapepipe/auxdir/CFIS/im_sims_202606/tile_numbers_test.txt - -## Optional: exclusive list of tiles to process (for testing) -## If provided, only these tiles will be used instead of all tiles from tile_IDs -## Leave empty/commented out to use all tiles from tile_IDs -# tile_IDs_exclusive: ["251.288", "251.289", "252.287"] - -## catalogue subversion -sample: 9 - -## simulation type -type: grid - -## Grid number -num: 2 - -## SMP batch size per ShapePipe job (-1 = use config file default = 1) -n_smp: -1 - - -## Calibration - -## Input shear amplitude -shear_amplitude: 0.02 - -## Position matching (for grids) -match_radius_deg: 0.0002 - -## Catalogue columns -w_col: w_des - -## Bootstrap error estimation -n_bootstrap: 500 diff --git a/scripts/image_sims_pipeline/info.py b/scripts/image_sims_pipeline/info.py deleted file mode 100644 index 33e1f5b4d..000000000 --- a/scripts/image_sims_pipeline/info.py +++ /dev/null @@ -1,247 +0,0 @@ -"""Info script for image-simulation Snakemake pipeline.""" - -import sys -import os -import math -import yaml -import subprocess - - -SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) -SNAKEFILE = os.path.join(SCRIPT_DIR, "Snakefile") -CONFIGFILE = "config.yaml" -BASE_CMD = f"snakemake -s {SNAKEFILE} --configfile {CONFIGFILE}" - - -def load_config(): - with open(CONFIGFILE) as f: - return yaml.safe_load(f) - - -def load_tile_ids(cfg): - """Load tile IDs from config, handling both lists and file paths.""" - tile_ids = cfg.get("tile_IDs", [cfg.get("tile_ID", ["?"])]) - - if isinstance(tile_ids, str): - if os.path.isfile(tile_ids): - with open(tile_ids) as f: - tile_ids = [line.strip() for line in f if line.strip()] - else: - tile_ids = [tile_ids] - - return tile_ids if isinstance(tile_ids, list) else [tile_ids] - - -def get_hdf5_tile_count(hdf5_path): - """Query tile count from HDF5 file using create_final_cat.py -l. - - Returns the count as a string, or "." if file doesn't exist or query fails. - """ - if not os.path.isfile(hdf5_path): - return "." - - try: - HOME = os.path.expanduser("~") - script = f"{HOME}/astro/repositories/github/shapepipe/scripts/python/create_final_cat.py" - result = subprocess.run( - [script, "-I", "-l", "-m", hdf5_path, "-v"], - capture_output=True, - text=True, - timeout=10, - ) - - if result.returncode != 0: - return "?" - - for line in result.stdout.split('\n'): - if line.startswith("Total:"): - parts = line.split() - if len(parts) >= 2: - return parts[1] - - return "?" - except Exception: - return "?" - - -def get_extract_tile_count(sim_path): - """Read tile count from shape_catalog_comprehensive_ngmix.hdf5 attributes. - - Returns the count as a string, or "." if file doesn't exist or count not found. - """ - try: - import h5py - except ImportError: - return "?" - - hdf5_path = os.path.join(sim_path, "shape_catalog_comprehensive_ngmix.hdf5") - if not os.path.isfile(hdf5_path): - return "." - - try: - with h5py.File(hdf5_path, 'r') as hf: - if 'n_tiles' in hf.attrs: - return str(hf.attrs['n_tiles']) - return "." - except Exception: - return "?" - - -def monitor(cfg, verbose=0): - base = cfg["base"] - num = cfg["num"] - sim_type = cfg["type"] - tile_ids = load_tile_ids(cfg) - n_tiles = len(tile_ids) - job_seq = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048] - job_hex = [format(int(math.log2(j)), 'X') for j in job_seq] - - str_type = f"_{sim_type}" if sim_type == "grid" else "" - sims = [ - f"1{d1}2{d2}{str_type}_{num}" - for d1, d2 in zip(["m","p","z","z","z"], ["z","z","m","p","z"]) - ] - grids = f"{base}/{sim_type}s" - - col_w = max(len(s) for s in sims) + 2 - downstream = [ - ("merge", "final_cat_{sim}.hdf5"), - ("extr", "shape_catalog_comprehensive_ngmix.hdf5"), - ("calib", "shape_catalog_cut_ngmix.fits"), - ("diag", "results/diagnostics.log"), - ("bias", "results/m_bias_results.yaml"), - ] - ds_w = max(len(s) for s, _ in downstream) - - tile_label = f"({n_tiles} tile{'s' if n_tiles > 1 else ''})" - if verbose >= 1: - mid_head = " ".join(f"{h:>2}" for h in job_hex) - else: - mid_head = "".join(job_hex) - bits_w = len(mid_head) - header1 = f" {'sim':<{col_w}} {'#jobs':>12} {'2^n':<{bits_w}} " + \ - " ".join(f"{s:<{ds_w}}" for s, _ in downstream) - header2 = f" {'':^{col_w}} {'':>12} {mid_head}" - print(f" {tile_label}") - print(header1) - print(header2) - print("-" * len(header1)) - - for sim in sims: - logs_dir = os.path.join(grids, sim, "logs") - - # For each job bit, count how many tiles have the log file - tile_counts = {} - for j in job_seq: - tile_counts[j] = sum( - 1 for t in tile_ids - if os.path.isfile(os.path.join(logs_dir, f"log_job_{t}_{j}.txt")) - ) - - jobs_sum = sum(1 for j in job_seq if tile_counts[j] == n_tiles) - bits_str = "".join("o" if tile_counts[j] == n_tiles else "." for j in job_seq) - - ds_status = [] - for idx, (name, pattern) in enumerate(downstream): - path = os.path.join(grids, sim, pattern.format(sim=sim)) - sim_path = os.path.join(grids, sim) - if name == "merge": - ds_status.append(get_hdf5_tile_count(path)) - elif name == "extr": - ds_status.append(get_extract_tile_count(sim_path)) - else: - ds_status.append("o" if os.path.isfile(path) else "") - - if verbose >= 1: - counts_str = " ".join(f"{tile_counts[j]:>2}" for j in job_seq) - row = f" {sim:<{col_w}} {jobs_sum:>12} {counts_str} " + \ - " ".join(f"{s:>{ds_w}}" for s in ds_status) - else: - row = f" {sim:<{col_w}} {jobs_sum:>12} {bits_str} " + \ - " ".join(f"{s:>{ds_w}}" for s in ds_status) - print(row) - - -def main(): - monitor_mode = "-m" in sys.argv or "--monitor" in sys.argv - verbose = sys.argv.count("-v") + sys.argv.count("--verbose") + \ - (1 if "-vv" in sys.argv else 0) * 2 - - cfg = load_config() - - if monitor_mode: - monitor(cfg, verbose=verbose) - return - - base = cfg["base"] - tile_ids = load_tile_ids(cfg) - sample = cfg["sample"] - num = cfg["num"] - sim_type = cfg["type"] - job_seq = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048] - - str_type = f"_{sim_type}" if sim_type == "grid" else "" - sims = [ - f"1{d1}2{d2}{str_type}_{num}" - for d1, d2 in zip(["m","p","z","z","z"], ["z","z","m","p","z"]) - ] - grids = f"{base}/{sim_type}s" - - print("=" * 70) - print(" Image simulations pipeline") - print("=" * 70) - print() - print(f" Config : {CONFIGFILE}") - print(f" Base : {base}") - print(f" Tile IDs : {tile_ids}") - print(f" Sample : {sample} (mask config mask_v1.X.{sample}_im_sim.yaml)") - print(f" Run num : {num}") - print(f" Sims : {', '.join(sims)}") - print(f" Job seq : {job_seq}") - print() - - print("# Basic command") - print() - print(f" {BASE_CMD} -j N_CPU ") - print() - - print("# Target order") - print() - stages = [ - ("init_all", "initialise run directories"), - ("pipeline_all", f"run full job sequence {job_seq} in order"), - ("merge_all", "merge ShapePipe output into final_cat_.hdf5"), - ("extract_all", "extract comprehensive shape catalogue (HDF5)"), - ("calibrate_all", "apply masks and calibrate (= all)"), - ("diagnostics", "compute and plot diagnostics"), - ("mbias", "compute multiplicative biases") - ] - for target, desc in stages: - print(f" {target:<16} {desc}") - print() - - print("# Useful options") - print() - print(f" -p print shell commands") - print(f" --config 'force=1' pass --force to run_job_sp_canfar_v2.0.bash") - print(f" --forcerun pipeline_all force Snakemake to rerun all pipeline jobs") - print(f" --config 'job=J' target a specific job bit J") - print(f" --rerun-incomplete if errors occur due to previous interrupted run") - print(f" pipeline --config 'job=J' --rerun-incomplete check and fix runs") - print() - - print("# Monitor progress") - print() - print(f" python info_image_sims.py -m") - print() - - print("# Run a single shapepipe job for one sim") - print() - print(f" {BASE_CMD} -j 1 \\") - print(f" {grids}//logs/log_job_J.txt") - print() - - - -if __name__ == "__main__": - main() diff --git a/scripts/image_sims_pipeline/monitor_mbias.py b/scripts/image_sims_pipeline/monitor_mbias.py deleted file mode 100644 index 000cf8067..000000000 --- a/scripts/image_sims_pipeline/monitor_mbias.py +++ /dev/null @@ -1,264 +0,0 @@ -#!/usr/bin/env python -"""Monitor m-bias convergence as pipeline_all tiles complete. - -For tiles that have finished all ShapePipe jobs (bit 2048) in every sim, -runs merge/extract/calibrate in a monitoring subdirectory, then computes -m1, m2, c1, c2 and reports them alongside the tile count. - -Run repeatedly while pipeline_all is in progress: merge is incremental -(adds new tiles, skips tiles already in the HDF5), so each run picks up -newly finished tiles automatically. - -Usage (from run directory): - python ~/astro/repositories/github/shapepipe/scripts/image_sims_pipeline/monitor_mbias.py [-c config.yaml] [-v] -""" - -import sys -import os -import subprocess -import shutil -import yaml -import argparse - - -CONFIGFILE = "config_image_sims.yaml" -JOB_LAST = 2048 - -HOME = os.path.expanduser("~") -PATH_SH = f"{HOME}/astro/repositories/github/shapepipe/scripts/sh" -PATH_PY = f"{HOME}/astro/repositories/github/shapepipe/scripts/python" -PATH_NB = f"{HOME}/astro/repositories/github/sp_validation/notebooks" -PATH_SPV = f"{HOME}/astro/repositories/github/sp_validation" -SPV_SRC = f"{PATH_SPV}/src" -SPV_SIF = "/n17data/mkilbing/sp_validation_im_sims.sif" -APPTAINER = f"{PATH_SH}/apptainer_noslurm.sh" - -SPV_PREFIX = [ - APPTAINER, - "--bind", "/n17data,/n09data,/home", - "--env", f"PYTHONPATH={SPV_SRC}", - SPV_SIF, - "python", -] - - -def parse_args(): - p = argparse.ArgumentParser(description=__doc__) - p.add_argument( - "-c", "--config", default=CONFIGFILE, - help=f"Snakemake config YAML (default: {CONFIGFILE})" - ) - p.add_argument("-v", "--verbose", action="store_true") - return p.parse_args() - - -def load_config(path): - with open(path) as f: - return yaml.safe_load(f) - - -def sims_from_config(cfg): - num = cfg["num"] - str_type = f"_{cfg['type']}" if cfg["type"] == "grid" else "" - return [ - f"1{d1}2{d2}{str_type}_{num}" - for d1, d2 in zip(["m", "p", "z", "z", "z"], - ["z", "z", "m", "p", "z"]) - ] - - -def done_tiles(grids, sims, tile_ids): - """Return tiles that have log_job_{tile}_2048.txt in every sim.""" - result = [] - for tile in tile_ids: - if all( - os.path.isfile(f"{grids}/{sim}/logs/log_job_{tile}_{JOB_LAST}.txt") - for sim in sims - ): - result.append(tile) - return result - - -def setup_sim_dir(mon_sim_dir, real_sim_dir, verbose): - """Prepare monitoring sim dir with symlinks and copied params.py.""" - os.makedirs(mon_sim_dir, exist_ok=True) - - for name in ("cfis", "config_mask.yaml"): - link = os.path.join(mon_sim_dir, name) - if not os.path.exists(link): - target = os.path.join(real_sim_dir, name) - if os.path.exists(target): - os.symlink(target, link) - elif verbose: - print(f" Warning: {target} not found, skipping symlink") - - params_src = os.path.join(real_sim_dir, "params.py") - params_dst = os.path.join(mon_sim_dir, "params.py") - if os.path.exists(params_src): - shutil.copy2(params_src, params_dst) - elif verbose: - print(f" Warning: {params_src} not found") - - -def run_spv(script_args, cwd, label, verbose): - """Run a script inside the SPV container; return True on success.""" - cmd = SPV_PREFIX + script_args - if verbose: - print(f" cmd: {' '.join(cmd)}") - print(f" cwd: {cwd}") - result = subprocess.run( - cmd, cwd=cwd, - capture_output=(not verbose), - text=True, - ) - if result.returncode != 0: - print(f" {label} FAILED (exit {result.returncode})") - if not verbose and result.stderr: - print(result.stderr[-2000:]) - return False - return True - - -def run_merge(mon_sim_dir, grids, sim, verbose): - return run_spv( - [ - f"{PATH_PY}/create_final_cat.py", - "-I", - "-m", f"final_cat_{sim}.hdf5", - "-i", grids, - "-P", sim, - "-p", f"{grids}/{sim}/cfis/final_cat.param", - "-o", "n_tiles_monitor.txt", - ] + (["-v"] if verbose else []), - cwd=mon_sim_dir, - label="merge", - verbose=verbose, - ) - - -def run_extract(mon_sim_dir, verbose): - return run_spv( - [f"{PATH_NB}/extract_info.py"], - cwd=mon_sim_dir, - label="extract", - verbose=verbose, - ) - - -def run_calibrate(mon_sim_dir, verbose): - return run_spv( - [f"{PATH_NB}/calibrate_comprehensive_cat.py", "-s", "calibrate"], - cwd=mon_sim_dir, - label="calibrate", - verbose=verbose, - ) - - -def run_mbias(mon_dir, num, verbose): - cfg_path = os.path.join(mon_dir, "image_sims_m_bias_monitor.yaml") - out_path = os.path.join(mon_dir, "m_bias_monitor.yaml") - mbias_cfg = { - "grids_dir": mon_dir, - "num": num, - "catalog_name": "shape_catalog_cut_ngmix.fits", - "shear_amplitude": 0.02, - "match_radius_deg": 0.0002, - "w_col": "w_des", - "n_bootstrap": 500, - "output_path": out_path, - } - with open(cfg_path, "w") as f: - yaml.dump(mbias_cfg, f, default_flow_style=False) - - ok = run_spv( - [f"{PATH_SPV}/scripts/compute_m_bias_image_sims.py", "-c", cfg_path] - + (["-v"] if verbose else []), - cwd=mon_dir, - label="mbias", - verbose=verbose, - ) - return ok, out_path - - -def n_tiles_from_file(path): - """Read tile count written by create_final_cat.py -o.""" - try: - with open(path) as f: - return int(f.read().strip()) - except Exception: - return None - - -def main(): - args = parse_args() - cfg = load_config(args.config) - - base = cfg["base"] - num = cfg["num"] - tile_ids = cfg.get("tile_IDs", [cfg.get("tile_ID", "254.286")]) - grids = f"{base}/{cfg['type']}s" - sims = sims_from_config(cfg) - mon_dir = os.path.join(grids, "monitoring") - - # -- Detect done tiles ----------------------------------------------- - done = done_tiles(grids, sims, tile_ids) - n_done = len(done) - n_total = len(tile_ids) - - print(f"Tiles finished in all sims: {n_done} / {n_total}") - if done: - print(f" {done}") - - if n_done == 0: - print("No tiles complete yet. Nothing to compute.") - return 0 - - # -- Set up monitoring directory ------------------------------------ - print(f"\nMonitoring dir: {mon_dir}") - os.makedirs(mon_dir, exist_ok=True) - - # -- Per-sim: merge → extract → calibrate -------------------------- - for sim in sims: - real_sim_dir = os.path.join(grids, sim) - mon_sim_dir = os.path.join(mon_dir, sim) - print(f"\n{sim}") - - setup_sim_dir(mon_sim_dir, real_sim_dir, args.verbose) - - print(" merge ...", end="", flush=True) - if not run_merge(mon_sim_dir, grids, sim, args.verbose): - return 1 - n_found = n_tiles_from_file(os.path.join(mon_sim_dir, "n_tiles_monitor.txt")) - print(f" ok ({n_found} tiles in HDF5)") - - print(" extract ...", end="", flush=True) - if not run_extract(mon_sim_dir, args.verbose): - return 1 - print(" ok") - - print(" calibrate...", end="", flush=True) - if not run_calibrate(mon_sim_dir, args.verbose): - return 1 - print(" ok") - - # -- m-bias -------------------------------------------------------- - print("\nComputing m-bias...") - ok, results_path = run_mbias(mon_dir, num, args.verbose) - if not ok: - return 1 - - if os.path.isfile(results_path): - with open(results_path) as f: - res = yaml.safe_load(f) - print() - print(f" n_tiles = {n_done} / {n_total}") - print(f" m1 = {res['m1']:+.4f} ± {res['m1_err']:.4f}") - print(f" c1 = {res['c1']:+.4f} ± {res['c1_err']:.4f}") - print(f" m2 = {res['m2']:+.4f} ± {res['m2_err']:.4f}") - print(f" c2 = {res['c2']:+.4f} ± {res['c2_err']:.4f}") - - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/scripts/sh/apptainer_noslurm.sh b/scripts/sh/apptainer_noslurm.sh deleted file mode 100755 index 4d28dfb64..000000000 --- a/scripts/sh/apptainer_noslurm.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash -# Run apptainer exec after stripping all SLURM/PMI/PMIX/OMPI environment -# variables that cause OpenMPI to crash when running inside a SLURM job. -# Usage: apptainer_noslurm.sh [apptainer_args...] -- [cmd_args...] - -for v in $(env | grep -Eo "^(SLURM|PMI|PMIX|OMPI)[^=]*"); do - unset "$v" -done - -exec apptainer exec "$@" diff --git a/scripts/sh/init_run_v2.0.sh b/scripts/sh/init_run_v2.0.sh index c349d0178..74ab260c0 100755 --- a/scripts/sh/init_run_v2.0.sh +++ b/scripts/sh/init_run_v2.0.sh @@ -9,15 +9,6 @@ # Version version="2.0" -# Input type: data or image_sims -type="data" - -# Subdir for image_sims -subdir="1z2z_grid_1" - -force=0 -sample="" - # Default base run directory (permanent storage) #base_dir="$HOME/cosmostat/v2/v${version}" base_dir=`pwd` @@ -25,63 +16,30 @@ base_dir=`pwd` # ShapePipe repository root (for config symlink and tile list) sp_root="$HOME/shapepipe" -# Source params.py to copy into the run directory -params_src="$HOME/astro/repositories/github/sp_validation/notebooks/params.py" +# Tile list source (full filenames, will be stripped to NNN.MMM) +tiles_src="$sp_root/auxdir/CFIS/tiles_202604/tiles_r.txt" + +# Config directory (will be symlinked as $base_dir/cfis) +config_dir="$sp_root/example/cfis" ## Help string usage="Usage: $(basename "$0") [OPTIONS] \nOptions:\n -h\t\tthis message\n - -t, --type TYPE input type, allowed are 'data', 'image_sims', default='$type'\n - -s, --subdir SUBDIR subdir for image simulations, default='$subdir'\n -d, --dir DIR\tbase run directory, default='$base_dir'\n - -P, --params PATH\tsource params.py to copy, default='$params_src'\n - -T, --tiles PATH\ttile numbers file (image_sims only), overrides default\n - -c, --config CONFIG config file for input directories (image_sims only)\n - --force\t\trecreate existing symlinks and parameter files\n - -S, --sample SAMPLE\tsample version for mask config (e.g. 6)\n " ## Parse command line -tiles_src_override="" -config_file="" while [ $# -gt 0 ]; do case "$1" in -h) echo -ne $usage exit 0 ;; - -t|--type) - type="$2" - shift - ;; - -s|--subdir) - subdir="$2" - shift - ;; -d|--dir) base_dir="$2" shift ;; - -P|--params) - params_src="$2" - shift - ;; - -T|--tiles) - tiles_src_override="$2" - shift - ;; - -c|--config) - config_file="$2" - shift - ;; - --force) - force=1 - ;; - -S|--sample) - sample="$2" - shift - ;; *) echo "Unknown option: $1" echo -ne $usage @@ -91,71 +49,12 @@ while [ $# -gt 0 ]; do shift done -# Check options -if [ "$type" == "data" ]; then - - # Config file directory - config_dir="$sp_root/example/cfis" - - # Input tile list - tiles_src="$sp_root/auxdir/CFIS/tiles_202604/tiles_r.txt" - -elif [ "$type" == "image_sims" ]; then - - config_dir="$sp_root/example/cfis_image_sims" - # Use override if provided, otherwise use default - if [ -n "$tiles_src_override" ]; then - tiles_src="$tiles_src_override" - else - tiles_src="$sp_root/auxdir/CFIS/im_sims_202606/numbers.txt" - fi - - # Read input directories from config file if provided - input_dir_tiles="" - input_dir_exp="" - if [ -n "$config_file" ]; then - if [ ! -f "$config_file" ]; then - echo "ERROR: Config file not found: $config_file" - exit 2 - fi - source "$config_file" - fi - -else - - echo "Invalid input type $type" - exit 3 - -fi - - echo "Initialising ShapePipe v${version} run directory: $base_dir" echo "" # --- Base directory --- -mkdir -p "$base_dir/$subdir" -cd "$base_dir/$subdir" - -if [ "$type" == "image_sims" ]; then - if [ -n "$input_dir_tiles" ] && [ -n "$input_dir_exp" ]; then - for link in input_tiles input_exp; do - src=$([ "$link" == "input_tiles" ] && echo "$input_dir_tiles" || echo "$input_dir_exp") - [ $force -eq 1 ] && rm -f "$link" - [ ! -e "$link" ] && ln -s "$src" "$link" - done - fi - - if [ -n "$sample" ]; then - mask_src="$HOME/astro/repositories/github/sp_validation/config/calibration/mask_v1.X.${sample}_im_sim.yaml" - [ $force -eq 1 ] && rm -f config_mask.yaml - if [ ! -e config_mask.yaml ]; then - ln -s "$mask_src" config_mask.yaml - echo "Created symlink: config_mask.yaml -> $mask_src" - else - echo "config_mask.yaml already exists, skipping" - fi - fi -fi +mkdir -p "$base_dir" +cd "$base_dir" echo "Creating tiles/ directory..." mkdir -p tiles @@ -172,71 +71,33 @@ mkdir -p logs mkdir -p debug # --- Config symlink --- - -# Config directory (will be symlinked) - -if [ -d cfis ] && [ ! -L cfis ]; then +if [ -L cfis ]; then + echo "cfis symlink already exists, skipping" +elif [ -d cfis ]; then echo "WARNING: cfis/ exists as a directory, not creating symlink" else - [ $force -eq 1 ] && rm -f cfis - if [ ! -e cfis ]; then - ln -s "$config_dir" cfis - echo "Created symlink: cfis -> $config_dir" - else - echo "cfis symlink already exists, skipping" - fi + ln -s "$config_dir" cfis + echo "Created symlink: cfis -> $config_dir" fi # --- Tile number list --- echo "Creating tile_numbers.txt symlink..." -if [ -f tile_numbers.txt ] && [ ! -L tile_numbers.txt ]; then +if [ -L tile_numbers.txt ]; then + echo " tile_numbers.txt symlink already exists, skipping" +elif [ -f tile_numbers.txt ]; then echo " WARNING: tile_numbers.txt exists as a regular file, not creating symlink" else - [ $force -eq 1 ] && rm -f tile_numbers.txt - if [ ! -e tile_numbers.txt ]; then - ln -s "$tiles_src" tile_numbers.txt - echo " Created symlink: tile_numbers.txt -> $tiles_src" - else - echo " tile_numbers.txt symlink already exists, skipping" - fi + ln -s "$tiles_src" tile_numbers.txt + echo " Created symlink: tile_numbers.txt -> $tiles_src" fi n_tiles=$(wc -l < tile_numbers.txt) echo " $n_tiles tiles" -# --- params.py --- -echo "Creating params.py..." -if [ -f params.py ] && [ $force -eq 0 ]; then - echo " params.py already exists, skipping" -else - cp "$params_src" params.py - sed -i "s/[\"']IMAFLAGS_ISO[\"'],\s*//g; s/,\s*[\"']IMAFLAGS_ISO[\"']//g" params.py - if [ "$type" == "image_sims" ]; then - sed -i \ - -e "s/^name = .*/name = '$subdir'/" \ - -e "s/^star_cat_path = .*/star_cat_path = None/" \ - -e "s/^output_format = .*/output_format = '.hdf5'/" \ - params.py - echo " Copied and adapted: params.py <- $params_src" - echo " name = '$subdir', star_cat_path = None, output_format = '.hdf5'" - else - echo " Copied: params.py <- $params_src" - fi -fi - echo "" echo "Done. Directory structure:" -echo " $base_dir/$subdir" +echo " $base_dir/" echo " ├── tiles/" echo " ├── exp/" echo " ├── logs/" echo " ├── cfis -> ${config_dir}" -echo " ├── tile_numbers.txt -> ${tiles_src}" -echo " └── params.py <- ${params_src}" -if [ "$type" == "image_sims" ]; then - if [ -n "$input_dir_tiles" ] && [ -n "$input_dir_exp" ]; then - echo " ├── input_tiles -> $input_dir_tiles" - echo " └── input_exp -> $input_dir_exp" - else - echo " (no input symlinks: config file not provided)" - fi -fi +echo " └── tile_numbers.txt -> ${tiles_src}" diff --git a/scripts/sh/job_list_help.bash b/scripts/sh/job_list_help.bash index a4a43bd3f..52430a434 100644 --- a/scripts/sh/job_list_help.bash +++ b/scripts/sh/job_list_help.bash @@ -13,7 +13,7 @@ JOB_LIST_HELP="\ \t 32: mask exposures (online if star_cat_for_mask=onthefly)\n\ \t 64: process stars on exposures, PSF model (offline)\n\ \t 128: merge exposure WCS headers into tile-level sqlite log\n\ - \t 256: object selection on tiles (online if UNIONS catalogue or star_cat_for_mask=onthefly)\n\ + \t 256: object selection on tiles (online if UNIONS catalogue or tar_cat_for_mask=onthefly)\n\ \t 512: postage stamp creation\n\ \t1024: multi-epoch shape measurement\n\ \t2048: create final catalogue\n\ diff --git a/scripts/sh/job_sp_canfar_v2.0.bash b/scripts/sh/job_sp_canfar_v2.0.bash index a8f1ed927..1c1ce89ba 100755 --- a/scripts/sh/job_sp_canfar_v2.0.bash +++ b/scripts/sh/job_sp_canfar_v2.0.bash @@ -16,7 +16,7 @@ source $HOME/shapepipe/scripts/sh/job_list_help.bash ## Default values job=255 config_dir=$HOME/shapepipe/example/cfis -psf='psfex' +psf='mccd' retrieve='vos' star_cat_for_mask='onthefly' tile_det='sx' @@ -124,8 +124,8 @@ while [ $# -gt 0 ]; do done ## Check options -if [ "$psf" != "psfex" ] && [ "$psf" != "mccd" ] && [ "$psf" != "psf" ]; then - echo "PSF (option -p) needs to be 'psfex', 'mccd', or 'psf' (image sims)" +if [ "$psf" != "psfex" ] && [ "$psf" != "mccd" ]; then + echo "PSF (option -p) needs to be 'psfex' or 'mccd'" exit 2 fi @@ -134,8 +134,9 @@ if [ "$star_cat_for_mask" != "onthefly" ] && [ "$star_cat_for_mask" != "save" ]; exit 4 fi -if [ "$retrieve" != "vos" ] && [ "$retrieve" != "symlink" ]; then - echo "Invalid method to retrieve images $retrieve (option -r), needs to be 'vos' or 'symlink'" +#if [ "$retrieve" != "vos" ] && [ "$retrieve" != "symlink" ]; then +if [ "$retrieve" != "vos" ]; then + echo "method to retrieve images (option -r) needs to be 'vos' for v2.0" exit 5 fi @@ -161,19 +162,17 @@ fi # Run path and location of input image directories export SP_RUN=`pwd` -# Config file path — use value exported by run_job_sp_canfar_v2.0.bash if set, -# otherwise fall back to the cfis symlink in the run directory. -export SP_CONFIG=${SP_CONFIG:-$SP_RUN/cfis} +# Config file path +export SP_CONFIG=$SP_RUN/cfis # Path for updated (per-job) config file copies export SP_CONFIG_MOD=$SP_RUN/cfis_mod # Root directory for per-exposure work directories. -# Set SP_EXP in the environment to override; otherwise use SP_DIR (the run -# root, always exported by run_job_sp_canfar_v2.0.bash for both data and -# image_sims) so exp/ is always a sibling of tiles/ under the same root. +# Set SP_EXP in the environment to override; otherwise falls back to the +# conventional layout (SP_RUN = .../v2.0/tiles/IDra/ID, three levels up + exp). if [ -z "${SP_EXP}" ]; then - export SP_EXP="$SP_DIR/exp" + export SP_EXP=$(realpath "$SP_RUN/../../../exp") echo "Setting SP_EXP to $SP_EXP" fi @@ -238,10 +237,10 @@ function command () { else echo -e "${RED}error, return value = $res${NC}" if [ $STOP == 1 ]; then - echo "${RED}exiting '$(basename "$0")', error in command '$cmd'${NC}" + echo "${RED}exiting 'canfar_sp.bash', error in command '$cmd'${NC}" exit $res else - echo "${RED}continuing '$(basename "$0")', error in command '$cmd'${NC}" + echo "${RED}continuing 'canfar_sp.bash', error in command '$cmd'${NC}" fi fi fi @@ -292,7 +291,7 @@ function command_cfg_shapepipe() { batch_flag="--batch_size $_n_smp" fi - local cmd="shapepipe_run -c $config $batch_flag" + local cmd="shapepipe_run.py -c $config $batch_flag" command "$cmd" "$str" } @@ -348,12 +347,12 @@ if [[ $do_job != 0 ]]; then fi -## Retrieve exposure images (online if retrieve=vos) +## Retrieve exposure images (online, vos) (( do_job = $job & 8 )) if [[ $do_job != 0 ]]; then command_cfg_shapepipe \ - "config_exp_Gie_$retrieve.ini" \ + "config_exp_Gie_vos.ini" \ "Run shapepipe (get exposure images)" \ $n_smp \ $exclusive @@ -420,7 +419,7 @@ if [[ $do_job != 0 ]]; then ### Download external catalogue from vos command_cfg_shapepipe \ - "config_tile_Git_cat_$retrieve.ini" \ + "config_tile_Git_cat_vos.ini" \ "Run shapepipe (download external tile catalogue)" \ -1 \ $exclusive @@ -496,9 +495,11 @@ fi (( do_job = $job & 2048 )) if [[ $do_job != 0 ]]; then + suff_sm="_nosm" + ### Merge all relevant information into final catalogue command_cfg_shapepipe \ - "config_tile_Mc_$psf.ini" \ + "config_tile_make_cat_$psf${suff_sm}.ini" \ "Run shapepipe (tile: create final cat $psf)" \ $n_smp \ $exclusive diff --git a/scripts/sh/run_job_sp_canfar_v2.0.bash b/scripts/sh/run_job_sp_canfar_v2.0.bash index da464ff23..9da08f598 100755 --- a/scripts/sh/run_job_sp_canfar_v2.0.bash +++ b/scripts/sh/run_job_sp_canfar_v2.0.bash @@ -16,22 +16,15 @@ ID=-1 psf='psfex' tile_det='uc' tile_mask=0 -N_SMP=-1 +N_SMP=1 dry_run=0 dir=`pwd` debug_out="" - -# Input type: data or image_sims -type="data" - #scratch="/scratch/$USER/shapepipe/v${version}" scratch="" test_only=0 -check_only=0 -run_and_check=1 +check=0 force=0 -retry=0 -quiet=0 VERBOSE=1 pat="-- " @@ -47,18 +40,14 @@ ${JOB_LIST_HELP} -e, --exclusive ID\timage ID\n \t\t\tPSF model, one in ['psfex'|'mccd'], default='$psf'\n --tile_det DET\t\ttile detection mode, one in ['sx'|'uc'], default='$tile_det'\n --tile_mask MASK\ttile masking, default='$tile_mask'\n - -t, --type TYPE input type, allowed are 'data', 'image_sims', default='$type'\n -N, --N_SMP N_SMP\tnumber of SMP jobs, default from original config files\n -d, --directory DIR\trun directory, default is pwd ($dir)\n -S, --scratch DIR\tprocessing scratch directory, default=none\n -n, --dry_run\t\tDRY RUN, no actual processing; default is $dry_run\n --debug_out PATH\tdebug output file PATH, default=none\n --test\t\t\ttest mode, no processing\n - --check_only\t\tcheck completeness only, no processing\n + --check\t\tcheck download completeness only (job 8), no processing\n --force\t\tremove existing module output dir(s) before running\n - --retry\t\tskip jobs whose existing run is complete; remove and rerun\n - \t\t\tonly those whose existing run is incomplete/failed\n - -q, --quiet\t\tsuppress all output except Complete/Missing/Incomplete/WARNING lines\n " ## Help if no arguments @@ -86,10 +75,6 @@ while [ $# -gt 0 ]; do psf="$2" shift ;; - -t|--type) - type="$2" - shift - ;; --tile_det) tile_det="$2" shift @@ -121,18 +106,12 @@ while [ $# -gt 0 ]; do --test) test_only=1 ;; - --check_only) - check_only=1 + --check) + check=1 ;; --force) force=1 ;; - --retry) - retry=1 - ;; - -q|--quiet) - quiet=1 - ;; esac shift done @@ -145,15 +124,10 @@ function message() { my_debug_out=$2 my_exit=$3 - if [ "$quiet" == "0" ] || [[ "$msg" =~ (Complete|[Mm]issing|Incomplete|WARNING) ]]; then - echo $msg - fi + echo $msg if [ -n "$my_debug_out" ]; then echo ${pat}$msg >> $my_debug_out fi - if [ -n "$log_file" ]; then - echo ${pat}$msg >> $log_file - fi if [ "$my_exit" != "-1" ]; then if [ -n "$my_debug_out" ]; then @@ -161,14 +135,12 @@ function message() { else echo "${pat}exiting with code $my_exit" fi - if [ -n "$log_file" ]; then - echo "${pat}exiting with code $my_exit" >> $log_file - fi exit $my_exit fi } + # Initialise exposure work directory: create dirs, exp_numbers file, config symlink. # The exp_numbers-000-000.txt file is created only once (skipped if already exists). # Args: $1 = exp_id, $2 = exp_work_dir @@ -186,35 +158,11 @@ function init_exp_work_dir() { fi if [ ! -e "$exp_work_dir/cfis" ]; then - ln -sf $config_dir "$exp_work_dir/cfis" + ln -sf ~/shapepipe/example/cfis "$exp_work_dir/cfis" fi } -# Remove existing run_sp__* output directories. -# Args: $1 = base dir containing the "output" subdir (work_dir or exp_work_dir) -# $2 = "tile" or "exp" -# $3 = space-separated run_prefixes -# $4 = label for the removal message (e.g. "Force-removing") -function remove_run_dirs() { - local base_dir=$1 - local kind=$2 - local run_prefixes=$3 - local label=$4 - local run_prefix - for run_prefix in $run_prefixes; do - local dirs_to_remove - dirs_to_remove=$(ls -d "$base_dir/output/run_sp_${kind}_${run_prefix}"* 2>/dev/null) - if [ -n "$dirs_to_remove" ]; then - for d in $dirs_to_remove; do - message "${label} $d" "$debug_out" -1 - command "rm -rf $d" $dry_run - done - fi - done -} - - # Run a per-exposure job (e.g. job 8, 16). # Args: $1 = job number # $2 = space-separated list of run_sp_exp output dir prefixes (e.g. "Gie") @@ -237,7 +185,7 @@ function run_exp_job() { message "Exposure numbers file exp_numbers-${IDra}-${IDdec}.txt not found in $work_dir/output" "$debug_out" 10 fi - if [ "$check_only" == "1" ]; then + if [ "$check" == "1" ]; then message "Check mode: skipping job $exp_job" "$debug_out" -1 fi @@ -252,22 +200,31 @@ function run_exp_job() { (( n_total++ )) - # exp_id e.g. "2182795p" (data) or "208659" (image_sims) - # Strip trailing letter if present (data format); keep full id if numeric only. + # exp_id e.g. "2182795p": ab = first 2 chars, abcdefg = all but last char local exp_prefix="${exp_id:0:2}" - local exp_base - if [[ "${exp_id: -1}" =~ [a-zA-Z] ]]; then - exp_base="${exp_id%?}" - else - exp_base="$exp_id" - fi + local exp_base="${exp_id%?}" local exp_id_disp="${exp_prefix}/${exp_base}" - local exp_work_dir="$dir/exp/$exp_prefix/$exp_base" + local exp_work_dir="$HOME/v${version}/exp/$exp_prefix/$exp_base" local exp_log_file="$exp_work_dir/job_sp_canfar_v2.0.log" # Create exp_numbers-000-000.txt and cfis link if not existent init_exp_work_dir "$exp_id" "$exp_work_dir" + # force: remove all existing run directories for each prefix before running + if [ "$force" == "1" ]; then + local run_prefix + for run_prefix in $run_prefixes; do + local dirs_to_remove + dirs_to_remove=$(ls -d "$exp_work_dir/output/run_sp_exp_${run_prefix}"* 2>/dev/null) + if [ -n "$dirs_to_remove" ]; then + for d in $dirs_to_remove; do + message "Force-removing $d" "$debug_out" -1 + command "rm -rf $d" $dry_run + done + fi + done + fi + # Check completeness of existing run output (main prefix) local run_dir=$(ls -dt "$exp_work_dir/output/run_sp_exp_${main_prefix}"* 2>/dev/null | head -1) local is_complete=1 @@ -329,22 +286,14 @@ function run_exp_job() { fi done - # force: always remove and rerun. - # retry: remove and rerun only if the existing run is incomplete/failed. - if [ "$force" == "1" ]; then - remove_run_dirs "$exp_work_dir" "exp" "$run_prefixes" "Force-removing" - elif [ "$retry" == "1" ] && [ "$is_complete" == "0" ]; then - remove_run_dirs "$exp_work_dir" "exp" "$run_prefixes" "Retry: removing incomplete run" - fi - - if [ "$force" != "1" ] && [ "$is_complete" == "1" ]; then + if [ "$is_complete" == "1" ]; then message "Complete $exp_id_disp: run_sp_exp_${main_prefix} ( $check_desc)" "$debug_out" -1 (( n_complete++ )) continue fi # Report incomplete/missing in check mode; in run mode handle and proceed - if [ "$check_only" == "1" ]; then + if [ "$check" == "1" ]; then if [ -n "$run_dir" ]; then message " Benign incomplete: $exp_id_disp ($check_desc)" "$debug_out" -1 else @@ -362,9 +311,9 @@ function run_exp_job() { [ -n "$debug_out" ] && debug_flag="--debug_out $debug_out" echo "$(basename "$0") -j $exp_job -e $exp_id" > "$exp_log_file" - [ "$quiet" == "0" ] && echo "pwd=`pwd`" - command "job_sp_canfar_v2.0.bash -c $config_dir -p $psf -r $retrieve --tile_det $tile_det --tile_mask $tile_mask -j $exp_job --n_smp $N_SMP --nsh_jobs $N_SMP $debug_flag" $dry_run 2>&1 | tee -a "$exp_log_file" - [ "$quiet" == "0" ] && echo "Done with job_sp_canfar_v2.0.bash" + echo "pwd=`pwd`" + command "job_sp_canfar_v2.0.bash -p $psf --tile_det $tile_det --tile_mask $tile_mask -j $exp_job --n_smp $N_SMP --nsh_jobs $N_SMP $debug_flag" $dry_run 2>&1 | tee -a "$exp_log_file" + echo "Done with job_sp_canfar_v2.0.bash" done < "$exp_numbers_file" @@ -394,6 +343,21 @@ function run_tile_job() { local complete_checks=$3 local main_prefix="${run_prefixes%% *}" + # force: remove all existing run directories for each prefix before running + if [ "$force" == "1" ]; then + local run_prefix + for run_prefix in $run_prefixes; do + local dirs_to_remove + dirs_to_remove=$(ls -d "$work_dir/output/run_sp_tile_${run_prefix}"* 2>/dev/null) + if [ -n "$dirs_to_remove" ]; then + for d in $dirs_to_remove; do + message "Force-removing $d" "$debug_out" -1 + command "rm -rf $d" $dry_run + done + fi + done + fi + # Locate most recent existing run directory for the main prefix local run_dir run_dir=$(ls -dt "$work_dir/output/run_sp_tile_${main_prefix}"* 2>/dev/null | head -1) @@ -457,24 +421,16 @@ function run_tile_job() { done fi - # force: always remove and rerun. - # retry: remove and rerun only if the existing run is incomplete/failed. - if [ "$force" == "1" ]; then - remove_run_dirs "$work_dir" "tile" "$run_prefixes" "Force-removing" - elif [ "$retry" == "1" ] && [ "$is_complete" == "0" ]; then - remove_run_dirs "$work_dir" "tile" "$run_prefixes" "Retry: removing incomplete run" - fi - - if [ "$force" != "1" ] && [ "$is_complete" == "1" ] && [ -n "$complete_checks" ]; then + if [ "$is_complete" == "1" ] && [ -n "$complete_checks" ]; then message "Complete: ( $check_desc)" "$debug_out" -1 return 0 fi - if [ "$check_only" == "1" ]; then + if [ "$check" == "1" ]; then if [ -n "$run_dir" ]; then - message "Incomplete: ( $check_desc)" "$debug_out" -1 + message "Incomplete: ($check_desc)" "$debug_out" -1 else - message "Missing: ( $check_desc)" "$debug_out" -1 + message "Missing: ($check_desc)" "$debug_out" -1 fi return 0 fi @@ -484,85 +440,15 @@ function run_tile_job() { [ -n "$debug_out" ] && debug_flag="--debug_out $debug_out" if [ ! -e "cfis" ]; then - ln -sf $config_dir cfis + ln -sf ~/shapepipe/example/cfis "cfis" fi command "update_runs_log_file.py" $dry_run # Run job script - command "job_sp_canfar_v2.0.bash -c $config_dir -p $psf -r $retrieve --tile_det $tile_det --tile_mask $tile_mask -j $tile_job --n_smp $N_SMP --nsh_jobs $N_SMP $debug_flag" $dry_run 2>&1 | tee -a "$log_file" + command "job_sp_canfar_v2.0.bash -p $psf --tile_det $tile_det --tile_mask $tile_mask -j $tile_job --n_smp $N_SMP --nsh_jobs $N_SMP $debug_flag" $dry_run 2>&1 | tee -a "$log_file" } - -# Wrap a run_tile_job or run_exp_job call with check-log and force-cleanup. -# Usage: run_job_logged BIT FUNC [FUNC_ARGS...] -# - With --force: removes log_job_BIT.txt before running FUNC -# - With --check: captures output of FUNC; writes log_job_BIT.txt only if no -# Missing/Incomplete lines appear -# - Otherwise: calls FUNC directly -function run_job_logged() { - local bit=$1 - shift - local log="$dir/logs/log_job_${ID}_${bit}.txt" - - [ "$force" == "1" ] && rm -f "$log" - - if [ "$check_only" == "1" ]; then - local out - out=$( "$@" 2>&1 ) - echo "$out" - echo "$out" | grep -qiE "(Missing|Incomplete)" || echo "$out" > "$log" - else - "$@" - local rc=$? - if [ $rc -eq 0 ]; then - if [ "$run_and_check" == "1" ]; then - check_only=1 - local saved_force=$force - force=0 - local out - out=$( "$@" 2>&1 ) - check_only=0 - force=$saved_force - echo "$out" - echo "$out" | grep -qiE "(Missing|Incomplete)" || echo "$out" > "$log" - else - echo "Completed job $bit $(date)" > "$log" - fi - fi - fi -} - - -if [ "$type" == "data" ]; then - - [ "$quiet" == "0" ] && echo "Running on data" - retrieve="vos" - config_dir=$HOME/shapepipe/example/cfis - export SP_DIR=$dir - export SP_CONFIG=$config_dir - -elif [ "$type" == "image_sims" ]; then - - [ "$quiet" == "0" ] && echo "Running on image simulations" - retrieve="symlink" - config_dir=$HOME/shapepipe/example/cfis_image_sims - # SP_DIR points to the run directory where input_tiles and input_exp live; - # configs use $SP_DIR/input_* so those dirs stay outside SP_RUN and are - # not found twice by ShapePipe's recursive glob scan. - export SP_DIR=$dir - export SP_CONFIG=$config_dir - tile_det='sx' - -else - - echo "Invalid input type $type" - -fi - -[ "$quiet" == "0" ] && echo "config_dir=$config_dir" - - # Init message if [ "$test_only" == "1" ]; then message "$(basename "$0") test mode, exiting." "$debug_out" 0 @@ -583,24 +469,13 @@ if [ "$ID" == "-1" ]; then fi if [ "$psf" != "psfex" ] && [ "$psf" != "mccd" ]; then - message "PSF (option -p) needs to be 'psfex' or 'mccd', not '$psf'" "$debug_out" 4 + message "PSF (option -p) needs to be 'psfex' or 'mccd'" "$debug_out" 4 fi if [ "$dry_run" != "0" ] && [ "$dry_run" != "1" ]; then message "dry_run must be 0 or 1, not $dry_run" "$debug_out" 8 fi -## Check input links -for link in "$dir/input_tiles" "$dir/input_exp" "$dir/cfis"; do - if [ -L "$link" ]; then - if [ ! -e "$link" ]; then - message "Broken symlink: $link" "$debug_out" 6 - fi - elif [ ! -e "$link" ]; then - message "Missing path: $link" "$debug_out" 6 - fi -done - # Start script @@ -620,28 +495,18 @@ Letter=${letter^} cd $dir -# Derive tile path components from ID (e.g. "000.227" -> IDra="000", IDdec="227") +# Derive tile path components from ID (e.g. "000.227" -> IDra="000") IDra=${ID%%.*} -IDdec=${ID##*.} -ID_DASHED="${IDra}-${IDdec}" work_dir="$dir/tiles/$IDra/$ID" log_file="$work_dir/job_sp_canfar_v2.0.log" # Create tile work directory [ ! -d "$work_dir" ] && command "mkdir -p $work_dir" $dry_run cd $work_dir -echo "$0 $@" > "$log_file" # Write ID to first input -# Image sims use dash format (e.g. 233-293); real data uses dot format (233.293) -# which ShapePipe's in2out_pattern converts to dashes for output naming only, -# not for input file lookup — so write the format that matches the actual files. if [ ! -e tile_numbers.txt ]; then - if [ "$type" == "image_sims" ]; then - echo ${ID//./-} > tile_numbers.txt - else - echo $ID > tile_numbers.txt - fi + echo $ID > tile_numbers.txt fi # Output directory @@ -649,8 +514,7 @@ if [ ! -d "output" ]; then command "mkdir output" $dry_run fi - -[ "$quiet" == "0" ] && { echo -n "pwd: "; pwd; } +echo -n "pwd: "; pwd # Avoid Qt error with setools @@ -673,99 +537,44 @@ IDdec=${ID##*.} (( do_job = job & 1 )) if [[ $do_job != 0 ]]; then # Job 1: download tile images and weights - if [ "$type" == "image_sims" ]; then - n_exp=2 - else - n_exp=4 - fi - run_job_logged 1 run_tile_job 1 "Git" "get_images_runner:${n_exp}" + run_tile_job 1 "Git" "get_images_runner:4" fi (( do_job = job & 2 )) if [[ $do_job != 0 ]]; then - log_2="$dir/logs/log_job_${ID}_2.txt" - [ "$force" == "1" ] && rm -f "$log_2" - if [ "$type" == "image_sims" ]; then - # Image sims weights are already uncompressed; fake the Uz output directory - # so downstream jobs can find the weight via last:uncompress_fits_runner. - weight_src="$dir/input_tiles/CFIS_simu_weight-${ID//./-}.fits" - if [ "$check_only" == "1" ]; then - uz_run_dir=$(ls -dt "$work_dir/output/run_sp_tile_Uz"* 2>/dev/null | head -1) - if [ -n "$uz_run_dir" ] && [ -e "$uz_run_dir/uncompress_fits_runner/output/$(basename $weight_src)" ]; then - msg="Complete: ( Uz/uncompress_fits_runner[fake] 1/1 )" - message "$msg" "$debug_out" -1 - echo "$msg" > "$log_2" - else - message "Missing: Uz $(basename $weight_src)" "$debug_out" -1 - fi - else - uz_out="$work_dir/output/run_sp_tile_Uz$(date +_%Y-%m-%d_%H-%M-%S)/uncompress_fits_runner/output" - command "mkdir -p $uz_out" $dry_run - if [ -e "$weight_src" ] && [ ! -e "$uz_out/$(basename $weight_src)" ]; then - command "ln -sf $weight_src $uz_out/$(basename $weight_src)" $dry_run - fi - if [ "$run_and_check" == "1" ]; then - uz_run_dir=$(ls -dt "$work_dir/output/run_sp_tile_Uz"* 2>/dev/null | head -1) - if [ -n "$uz_run_dir" ] && [ -e "$uz_run_dir/uncompress_fits_runner/output/$(basename $weight_src)" ]; then - msg="Complete: Uz $(basename $weight_src)" - message "$msg" "$debug_out" -1 - echo "$msg" > "$log_2" - else - message "Missing: Uz $(basename $weight_src)" "$debug_out" -1 - fi - else - echo "Completed job 2 $(date)" > "$log_2" - fi - fi - else - # Job 2: uncompress tile weights - run_job_logged 2 run_tile_job 2 "Uz" "uncompress_fits_runner:1" - fi + # Job 2: uncompress tile weights + run_tile_job 2 "Uz" "uncompress_fits_runner:1" fi (( do_job = job & 4 )) if [[ $do_job != 0 ]]; then # Job 4: find exposures - run_job_logged 4 run_tile_job 4 "Fe" "find_exposures_runner:1" + run_tile_job 4 "Fe" "find_exposures_runner:1" fi (( do_job = job & 8 )) if [[ $do_job != 0 ]]; then # Job 8: retrieve exposure images - if [ "$type" == "image_sims" ]; then - n_exp=3 - else - n_exp=6 - fi - run_job_logged 8 run_exp_job 8 "Gie" "get_images_runner:${n_exp}" + run_exp_job 8 "Gie" "get_images_runner:6" fi (( do_job = job & 16 )) if [[ $do_job != 0 ]]; then # Job 16: split exposures, get WCS headers - run_job_logged 16 run_exp_job 16 "Sp" "split_exp_runner:121" + run_exp_job 16 "Sp" "split_exp_runner:121" fi (( do_job = job & 32 )) if [[ $do_job != 0 ]]; then # Job 32: mask exposures - run_job_logged 32 run_exp_job 32 "Ma" "mask_runner:40" + run_exp_job 32 "Ma" "mask_runner:40" fi (( do_job = job & 64 )) if [[ $do_job != 0 ]]; then - # Job 64: PSF model - # For image_sims: fake PSF runs as part of job 512 (requires sexcat from job 256) - # For data: run full exposure-level PSF modelling pipeline - if [ "$type" == "image_sims" ]; then - # Fake PSF is handled inside job 512; write placeholder log so the sequence is complete - log_64="$dir/logs/log_job_${ID}_64.txt" - [ "$force" == "1" ] && rm -f "$log_64" - msg="Complete: job 64 placeholder (fake PSF runs as part of job 512)" - message "$msg" "$debug_out" -1 - echo "$msg" > "$log_64" - elif [ "$psf" == "psfex" ]; then - run_job_logged 64 run_exp_job 64 "SxSePsf${Letter}i" "sextractor_runner:80 psfex_runner:80 psfex_interp_runner:40::warn setools_runner:80:rand_split" + # Job 64: process stars on exposures, PSF model + if [ "$psf" == "psfex" ]; then + run_exp_job 64 "SxSePsf${Letter}i" "sextractor_runner:80 psfex_runner:80 psfex_interp_runner:40::warn setools_runner:80:rand_split" else message "MCCD not implemented yet for v2.0" "$debug_out" 10 fi @@ -774,81 +583,31 @@ fi (( do_job = job & 128 )) if [[ $do_job != 0 ]]; then # Job 128: merge exposure WCS headers into tile-level sqlite log - run_job_logged 128 run_tile_job 128 "Mh_exp" "merge_headers_runner:1" + run_tile_job 128 "Mh_exp" "merge_headers_runner:1" fi (( do_job = job & 256 )) if [[ $do_job != 0 ]]; then # Job 256: object selection on tiles if [ "$tile_det" == "uc" ]; then - run_job_logged 256 run_tile_job 256 "Gic Uc" "get_images_runner:2 read_ext_sexcat_runner:1" + run_tile_job 256 "Gic Uc" "get_images_runner:2 read_ext_sexcat_runner:1" else - n_exp=2 - run_job_logged 256 run_tile_job 256 "Sx" "sextractor_runner:$n_exp" + run_tile_job 256 "Sx" "sextractor_runner:1" fi fi (( do_job = job & 512 )) if [[ $do_job != 0 ]]; then - # Job 512: process tiles ([PSF interp,] vignets) - # For image_sims: fake PSF runs first (requires sexcat from job 256), then vignets - log_512="$dir/logs/log_job_${ID}_512.txt" - [ "$force" == "1" ] && rm -f "$log_512" - if [ "$type" == "data" ]; then - if [ "$check_only" == "1" ]; then - out=$(run_tile_job 512 "${Letter}iViVi ${Letter}iViVi ${Letter}iViVi" "psfex_interp_runner:1 vignetmaker_runner_run_1:1 vignetmaker_runner_run_2:4" 2>&1) - echo "$out" - echo "$out" | grep -qiE "(Missing|Incomplete)" || echo "$out" > "$log_512" - else - run_tile_job 512 "${Letter}iViVi ${Letter}iViVi ${Letter}iViVi" "psfex_interp_runner:1 vignetmaker_runner_run_1:1 vignetmaker_runner_run_2:4" - if [ "$run_and_check" == "1" ]; then - check_only=1; local saved_force_512=$force; force=0 - out=$(run_tile_job 512 "${Letter}iViVi ${Letter}iViVi ${Letter}iViVi" "psfex_interp_runner:1 vignetmaker_runner_run_1:1 vignetmaker_runner_run_2:4" 2>&1) - check_only=0; force=$saved_force_512 - echo "$out" - echo "$out" | grep -qiE "(Missing|Incomplete)" || echo "$out" > "$log_512" - else - echo "Completed job 512 $(date)" > "$log_512" - fi - fi - else - if [ "$check_only" == "1" ]; then - out1=$(run_tile_job 64 "fpsf" "fake_psf_runner:1" 2>&1) - out2=$(run_tile_job 512 "ViVi ViVi" "vignetmaker_runner_run_1:1 vignetmaker_runner_run_2:3" 2>&1) - echo "$out1"; echo "$out2" - { echo "$out1"; echo "$out2"; } | grep -qiE "(Missing|Incomplete)" || \ - { echo "$out1"; echo "$out2"; } > "$log_512" - else - run_tile_job 64 "fpsf" "fake_psf_runner:1" - run_tile_job 512 "ViVi ViVi" "vignetmaker_runner_run_1:1 vignetmaker_runner_run_2:3" - if [ "$run_and_check" == "1" ]; then - check_only=1; saved_force_512im=$force; force=0 - out1=$(run_tile_job 64 "fpsf" "fake_psf_runner:1" 2>&1) - out2=$(run_tile_job 512 "ViVi ViVi" "vignetmaker_runner_run_1:1 vignetmaker_runner_run_2:3" 2>&1) - check_only=0; force=$saved_force_512im - echo "$out1"; echo "$out2" - { echo "$out1"; echo "$out2"; } | grep -qiE "(Missing|Incomplete)" || \ - { echo "$out1"; echo "$out2"; } > "$log_512" - else - echo "Completed job 512 $(date)" > "$log_512" - fi - fi - fi + # Job 512: process tiles (PSF interp, vignet) + run_tile_job 512 "${Letter}iViVi ${Letter}iViVi ${Letter}iViVi" "psfex_interp_runner:1 vignetmaker_runner_run_1:1 vignetmaker_runner_run_2:4" fi (( do_job = job & 1024 )) if [[ $do_job != 0 ]]; then # Job 1024: shape measurement - run_job_logged 1024 run_tile_job 1024 "Ng" "ngmix_runner:1" + run_tile_job 1024 "Ng" "ngmix_interp_runner:1" fi -(( do_job = job & 2048 )) -if [[ $do_job != 0 ]]; then - # Job 2048: merge catalogues - run_job_logged 2048 run_tile_job 2048 "Mc_${psf}" "make_cat_runner:1" -fi - - if [ -n "$scratch" ]; then message "Syncing output from scratch back to permanent dir" "$debug_out" -1 command "rsync -a output/ $work_dir/output/" $dry_run diff --git a/tests/module/test_psf_grammar_properties.py b/tests/module/test_psf_grammar_properties.py index 09eaf181a..87c966db6 100644 --- a/tests/module/test_psf_grammar_properties.py +++ b/tests/module/test_psf_grammar_properties.py @@ -21,9 +21,9 @@ (``example/cfis/final_cat.param``) names is a column the writer can produce — writer/param-file consistency; (d) the FULL frozen grammar (shapepipe#761) — ``ESTIMATOR_COMPONENT[_ERR]_ - OBJECT[_metacaltype]`` — holds across all three estimator families: ngmix - and galsim split ellipticity into ``G1``/``G2`` (g-type), HSM into - ``E1``/``E2`` (e-type); every family stores exactly one size, ``T``; HSM + OBJECT[_metacaltype]`` — holds across all three estimator families: all three + families split ellipticity into ``G1``/``G2`` (g-type, HSM included since + the return to reduced shear); every family stores exactly one size, ``T``; HSM keeps the singular ``FLAG`` token, ngmix/galsim keep plural ``FLAGS``. This part is a static example-based check (not writer-driven), so it covers galsim/HSM without depending on those producer modules — see @@ -418,9 +418,9 @@ def test_param_file_ngmix_tokens_are_producible(param_path, obj_ids): # data-driven FITS extension name, so left as a generic uppercase token. r"|GALSIM_(?:G1|G2|T)(?:_ERR|_UNCORR)?(?:_PSF)?_[A-Z0-9_]+" r"|GALSIM_(?:FLUX|FLUX_ERR|MAG|MAG_ERR|FLAGS|RES)_[A-Z0-9_]+" - # HSM: e-type, explicit PSF/STAR object, singular FLAG; the multi-epoch + # HSM: g-type, explicit PSF/STAR object, singular FLAG; the multi-epoch # sink in make_cat._save_psf_data appends a bare epoch index. - r"|HSM_(?:E1|E2|T)_(?:PSF|STAR)(?:_\d+)?" + r"|HSM_(?:G1|G2|T)_(?:PSF|STAR)(?:_\d+)?" r"|HSM_FLAG_(?:PSF|STAR)(?:_\d+)?" r")$" ) @@ -438,10 +438,10 @@ def test_param_file_ngmix_tokens_are_producible(param_path, obj_ids): "GALSIM_G2_UNCORR_NOSHEAR", "GALSIM_T_PSF_ORIGINAL_PSF", "GALSIM_FLAGS_NOSHEAR", - "HSM_E1_PSF", + "HSM_G1_PSF", "HSM_T_STAR", "HSM_FLAG_PSF", - "HSM_E1_PSF_3", # make_cat multi-epoch sink + "HSM_G1_PSF_3", # make_cat multi-epoch sink "HSM_T_PSF_2", "HSM_FLAG_STAR_1", ] @@ -452,6 +452,7 @@ def test_param_file_ngmix_tokens_are_producible(param_path, obj_ids): "NGMIX_G1_GAL_NOSHEAR", # GAL token regression "NGMIX_ELL_NOSHEAR", # packed ellipticity "E1_PSF_HSM", # pre-rename HSM naming (not HSM_-prefixed) + "HSM_E1_PSF", # e-type leftover: HSM stores g-type G1/G2 since #761 "SIGMA_PSF_HSM", # raw sigma, not T "HSM_SIGMA_PSF", # stored sigma instead of T "HSM_FLAGS_PSF", # plural — HSM is singular FLAG diff --git a/tests/module/test_psfex_interp.py b/tests/module/test_psfex_interp.py index 5e133fda0..afde409e8 100644 --- a/tests/module/test_psfex_interp.py +++ b/tests/module/test_psfex_interp.py @@ -121,15 +121,15 @@ def test_get_galaxy_positions_raises_for_missing_column(tmp_path): @settings(deadline=None, max_examples=25) @given(e=_e_complex) -def test_get_psfshapes_stores_e_type_distortion(psfex_interpolator, e): - """The stored HSM PSF shapes are e-type distortion, not g-type shear. +def test_get_psfshapes_stores_g_type_shear(psfex_interpolator, e): + """The stored HSM PSF shapes are g-type reduced shear, not e-type distortion. - ``_get_psfshapes`` reads ``moms.observed_shape.e1/.e2``; this pins it to the - galsim *distortion* accessor and away from the reduced-shear ``.g1/.g2``. + ``_get_psfshapes`` reads ``moms.observed_shape.g1/.g2``; this pins it to the + galsim *reduced-shear* accessor and away from the distortion ``.e1/.e2``. The check is exact against the same image's ``observed_shape`` (independent of how well HSM recovers the input), and the magnitude inequality is the - tooth: for any non-round shape ``|e| > |g|`` strictly, so storing ``.g`` - instead (the pre-fix bug) would fail it. + tooth: for any non-round shape ``|e| > |g|`` strictly, so storing ``.e`` + instead would fail it. """ psfex_interpolator.interp_PSFs = _elliptical_psf_stamps([(e.real, e.imag)]) psfex_interpolator._get_psfshapes() @@ -144,11 +144,11 @@ def test_get_psfshapes_stores_e_type_distortion(psfex_interpolator, e): mag_g = np.hypot(g1, g2) assume(mag_e - mag_g > 1e-3) # ensure a non-degenerate e-vs-g distinction - # Stored values are exactly the g-type distortion components ... + # Stored values are exactly the g-type reduced-shear components ... npt.assert_array_equal(psfex_interpolator.psf_shapes[0, 0], g1) npt.assert_array_equal(psfex_interpolator.psf_shapes[0, 1], g2) - # ... whose magnitude strictly subceeds the e-type shear. + # ... whose magnitude strictly subceeds the e-type distortion. mag_stored = np.hypot( psfex_interpolator.psf_shapes[0, 0], psfex_interpolator.psf_shapes[0, 1] )