Skip to content

Commit cbeb6b9

Browse files
meatybobbynv-kkudrynski
authored andcommitted
[ELECTRA/TF2] Fixed build issue of tokenizer on arm
1 parent ca5ae20 commit cbeb6b9

1 file changed

Lines changed: 1 addition & 37 deletions

File tree

TensorFlow2/LanguageModeling/ELECTRA/Dockerfile

Lines changed: 1 addition & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -13,45 +13,9 @@
1313
# limitations under the License.
1414

1515
ARG FROM_IMAGE_NAME=nvcr.io/nvidia/tensorflow:20.07-tf2-py3
16-
17-
######
18-
# Tokenizers is only available pre-built on x86
19-
#
20-
FROM ${FROM_IMAGE_NAME} AS tokenizers_amd64
21-
WORKDIR /wheelhouse
22-
RUN pip download tokenizers==0.7.0
23-
24-
FROM quay.io/pypa/manylinux2014_aarch64 as tokenizers_arm64
25-
ARG PYVER=38
26-
RUN yum install -y openssl-devel
27-
RUN curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain nightly-2019-11-01 -y
28-
ENV PATH="/root/.cargo/bin:$PATH"
29-
ENV PYBIN=/opt/python/cp${PYVER}-cp${PYVER}/bin
30-
ENV PYTHON_SYS_EXECUTABLE="$PYBIN/python"
31-
RUN git clone -b python-v0.8.0 https://github.com/huggingface/tokenizers.git /opt/tokenizers
32-
WORKDIR /opt/tokenizers/bindings/python
33-
RUN "${PYBIN}/pip" install setuptools-rust \
34-
&& "${PYBIN}/python" setup.py bdist_wheel \
35-
&& rm -rf build/* \
36-
&& for whl in dist/*.whl; do \
37-
auditwheel repair "$whl" -w dist/; \
38-
done \
39-
&& rm dist/*-linux_* \
40-
&& mkdir -p /wheelhouse \
41-
&& mv dist/*.whl /wheelhouse
42-
43-
ARG TARGETARCH
44-
FROM tokenizers_${TARGETARCH} AS tokenizers
45-
#
46-
#####
47-
48-
4916
FROM ${FROM_IMAGE_NAME}
5017
RUN apt-get update && apt-get install -y pbzip2 pv bzip2 cabextract
5118

52-
RUN --mount=from=tokenizers,source=/wheelhouse,target=/tmp/wheelhouse \
53-
pip install --no-cache-dir /tmp/wheelhouse/tokenizers*.whl
54-
5519
ENV DATA_PREP_WORKING_DIR /workspace/electra/data
5620
WORKDIR /workspace
5721
RUN git clone https://github.com/attardi/wikiextractor.git && cd wikiextractor && git checkout 6408a430fc504a38b04d37ce5e7fc740191dee16 && cd ..
@@ -61,7 +25,7 @@ WORKDIR /workspace/electra
6125

6226
RUN pip install --no-cache-dir tqdm boto3 requests six ipdb h5py nltk progressbar filelock \
6327
git+https://github.com/NVIDIA/dllogger \
64-
nvidia-ml-py3==7.352.0
28+
nvidia-ml-py3==7.352.0 tokenizers==0.11.0
6529

6630
RUN apt-get install -y iputils-ping
6731
COPY . .

0 commit comments

Comments
 (0)