Next, install uv:
pip install uvNext, set up venv:
uv venv .venvFinally, install everything in pyproject.toml to build project dependencies:
uv sync''' conda install pytorch torchvision torchaudio pytorch-cuda=12.4 cuda-toolkit -c pytorch -c nvidia '''
export CUDA_HOME=... pip install -r requirements_sglang.txt pip install flash-attn==2.8.3 --no-build-isolation export CUDA_HOME="$CONDA_PREFIX"
pip install vendor/verl
All datasets are downloaded to /nas/mars/dataset/ by default. Make sure you have sufficient disk space.
# Download MLVU dataset
huggingface-cli download MLVU/MVLU --repo-type dataset --local-dir /nas/mars/dataset/MLVU
# Download Video-MME dataset
huggingface-cli download lmms-lab/Video-MME --repo-type dataset --local-dir /nas/mars/dataset/Video-MME
# Download LVBench dataset
huggingface-cli download lmms-lab/LVBench --repo-type dataset --local-dir /nas/mars/dataset/LVBench
# Download LongVideoBench dataset
huggingface-cli download lmms-lab/LongVideoBench --repo-type dataset --local-dir /nas/mars/dataset/longvideobench# Create base directory
mkdir -p /nas/mars/dataset
# Download all datasets
for dataset in "MLVU/MVLU:MLVU" "lmms-lab/Video-MME:Video-MME" "lmms-lab/LVBench:LVBench" "lmms-lab/LongVideoBench:longvideobench"; do
IFS=':' read -r repo_id local_dir <<< "$dataset"
echo "Downloading $repo_id to /nas/mars/dataset/$local_dir"
huggingface-cli download "$repo_id" --repo-type dataset --local-dir "/nas/mars/dataset/$local_dir"
donepython3 scripts/utils/burn_subtitles.py --json-file lvb_val.json --output-dir /nas/mars/dataset/longvideobench --data-folder /nas/mars/dataset/longvideobench/LongVideoBench/python3 scripts/data_ops/run_data_pipeline.py \
retriever.window_size=8 \
dataset.name='lvb' \
dataset.lvb.dataset_path="/nas/mars/dataset/longvideobench/LongVideoBench/" \
dataset.lvb.burned_path="/nas/mars/dataset/longvideobench/" \
retriever.index_path="/home/hg22723/vseek/dataset"python3
bash scripts/data_ops/preprocess_lvb.sh \
--local_dataset_path "/nas/mars/dataset/longvideobench/LongVideoBench" \
--burned_path "/nas/mars/dataset/longvideobench/" \
--train_ratio 0.8 \
--local_save_dir /nas/mars/dataset/longvideobench \
--index_path /home/hg22723/vseek/dataset \
--window_size 8 \
--prompt_type tagCHUNKS_DIR=/nas/mars/dataset/LVBench/video_chunks OUTPUT_DIR=/nas/mars/dataset/LVBench/videos bash scripts/utils/unzip_videos.sh python3 scripts/utils/convert_parquet.py --parquet_path /nas/mars/dataset/LVBench/data/train-00000-of-00001.parquet --json_path /nas/mars/dataset/LVBench/data/lvbench_val.jsonpython3 scripts/data_ops/run_data_pipeline.py \
retriever.window_size=8 \
dataset.name='lvbench' \
dataset.lvbench.dataset_path="/nas/mars/dataset/LVBench" \
dataset.lvbench.burned_path="/nas/mars/dataset/LVBench/videos" \
retriever.index_path="/home/hg22723/vseek/dataset"python3
bash scripts/data_ops/preprocess_lvbench.sh \
--local_dataset_path "/nas/mars/dataset/LVBench" \
--burned_path "/nas/mars/dataset/LVBench/videos" \
--train_ratio 0.8 \
--local_save_dir /nas/mars/dataset/LVBench \
--index_path /home/hg22723/vseek/dataset \
--window_size 8 \
--prompt_type tagSUBTITLE_DIR=/nas/mars/dataset/Video-MME CHUNKS_DIR=/nas/mars/dataset/Video-MME OUTPUT_DIR=/nas/mars/dataset/Video-MME/videos bash scripts/utils/unzip_videos.shpython3 scripts/utils/convert_parquet.py --parquet_path /nas/mars/dataset/Video-MME/videomme/test-00000-of-00001.parquet --json_path /nas/mars/dataset/Video-MME/videomme/videomme_val.jsonpython3 scripts/utils/burn_subtitles.py --json-file videomme/videomme_val.json --output-dir /nas/mars/dataset/Video-MME/burn-subtitles --data-folder /nas/mars/dataset/Video-MME
python3 scripts/data_ops/run_data_pipeline.py \
dataset.name=videomme \
retriever.window_size=8 \
dataset.videomme.dataset_path=/nas/mars/dataset/Video-MME/ \
dataset.videomme.burned_path=/nas/mars/dataset/Video-MME/burn-subtitles/ \
retriever.index_path=/home/hg22723/vseek/datasetpython3
bash scripts/data_ops/preprocess_videomme.sh \
--local_dataset_path "/nas/mars/dataset/Video-MME" \
--burned_path "/nas/mars/dataset/Video-MME/burn-subtitles" \
--train_ratio 0.8 \
--local_save_dir /nas/mars/dataset/Video-MME \
--index_path /home/hg22723/vseek/dataset \
--window_size 8 \
--prompt_type tagplotQA, needle, ego, count, order, anomaly_reco, topic_reasoning
# Process MLVU dataset (if preprocessor exists)
# Add your MLVU processing commands herepython3 scripts/data_ops/run_data_pipeline.py \
retriever.window_size=8 dataset.name='mlvu' \
dataset.mlvu.dataset_path="/nas/mars/dataset/MVLU/MVLU" \
dataset.mlvu.burned_path="/nas/mars/dataset/MVLU/MVLU" \
retriever.index_path="/home/hg22723/vseek/dataset"python3
bash scripts/data_ops/preprocess_mvlu.sh \
--local_dataset_path "/nas/mars/dataset/MLVU/MLVU" \
--burned_path "/nas/mars/dataset/MLVU/MLVU" \
--train_ratio 0.8 \
--local_save_dir /nas/mars/dataset/MVLU \
--index_path /home/hg22723/vseek/dataset \
--window_size 8 \
--prompt_type tag
# Option 2: Use the convenience script
bash scripts/preprocess_mvlu.shthe dowloaded test file is a test.parquet, we extract it to .json python3 scripts/utils/convert_parquet.py --parquet_path /nas/mars/dataset/LVBench/data/train-00000-of-00001.parquet --json_path /nas/mars/dataset/LVBench/data/lvbench_val.json
python3 scripts/utils/convert_parquet.py --parquet_path /nas/mars/dataset/Video-MME/videomme/test-00000-of-00001.parquet --json_path /nas/mars/dataset/Video-MME/videomme/videomme_val.json
python3 scripts/utils/burn_subtitles.py --json-file videomme/videomme_val.json --output-dir /nas/mars/dataset/Video-MME/burn-subtitles --data-folder /nas/mars/dataset/Video-MME
python3 scripts/utils/burn_subtitles.py --json-file lvb_val.json --output-dir /nas/mars/experiment_result/test --data-folder /nas/mars/dataset/longvideobench/LongVideoBench/
python3 scripts/data_ops/run_data_pipeline.py dataset.name=lvbench retriever.window_size=8 da taset.dataset_path=/nas/mars/dataset/LVBench dataset.burned_path=/nas/mars/dataset/LVBench/videos/ retriever.index_path=/home/hg22723/vseek/dataset
# Process LongVideoBench dataset
python3 scripts/data_ops/run_data_pipeline.py \
retriever.window_size=8 \
dataset.name='lvb' \
dataset.dataset_path="/nas/mars/dataset/longvideobench/LongVideoBench/" \
dataset.burned_path="/nas/mars/dataset/longvideobench/"# Option 1: Run full pipeline (index + parquet creation)
python src/data/lvbench_preprocessor.py \
--local_dataset_path /nas/mars/dataset/LVBench \
--local_save_dir ~/data/lvbench \
--index_path /nas/mars/dataset/lvbench_index \
--window_size 8 \
--embed_frames \
--prompt_type tag \
--retrieval_model_path /path/to/viclip \
--gpu_number 0 \
--mode both
# Option 2: Use the convenience script
bash scripts/preprocess_lvbench.shpython3 scripts/utils/download.py --dataset_name lmms-lab/LongVideoBench --download_directory /nas/mars/dataset/longvideobench -->
huggingface-cli download MLVU/MVLU --repo-type dataset --local-dir /nas/mars/dataset/MLVU
python3 scripts/data_ops/run_data_pipeline.py retriever.window_size=8 dataset.name='lvb' dataset.dataset_path="/nas/mars/dataset/longvideobench/LongVideoBench/" dataset.burned_path="/nas/mars/dataset/longvideobench/"
CUDA_VISIBLE_DEVICES=5 python3 scripts/run_uniform_agent_data.py inference.max_images_per_turn=64 +inference.agent_prompt_type=cot llm.model=Qwen/Qwen3-VL-4B-Thinking inference.output_dir=results/uniform_q3_4bt_cot2 inference.max_output_tokens=2048 inference.gpu_number=5
python3 -m verl.model_merger merge --backend fsdp --local_dir checkpoints/vseek/qwen3-4bt_vl_lvb-emreward-vllm-wtool-tag/global_step_260/actor --target_dir checkpoints/vseek/qwen3-4bt_vl_lvb-emreward-vllm-wtool-tag/global_step_260/actor/huggingface
CC=/usr/bin/gcc CXX=/usr/bin/g++ pip install -r requirements.txt
install decord from source
- install ffmpeg=4.4.2 with conda install 'ffmpeg=4'
cd to decord
mdkir build & cd build C=$(which gcc) CXX=$(which g++) cmake .. -DUSE_CUDA=ON -DCMAKE_BUILD_TYPE=Release -DFFMPEG_DIR=$WORK/miniforge3/envs/vseek-vllm make
cd python python setup.py install