# syntax = docker/dockerfile:experimental
FROM ubuntu:18.04
ARG PYTHON_VERSION=3.8.12
ARG PYTHON_VERSION_SHOT=3.8
ARG TOOLKIT_PATH=/usr/local/Ascend/ascend-toolkit/latest
WORKDIR /tmp
RUN echo "deb [trusted=yes] https://repo.huaweicloud.com/ubuntu/ bionic main restricted universe multiverse\n\
deb-src [trusted=yes] https://repo.huaweicloud.com/ubuntu/ bionic main restricted universe multiverse\n\
deb [trusted=yes] https://repo.huaweicloud.com/ubuntu/ bionic-updates main restricted universe multiverse\n\
deb-src [trusted=yes] https://repo.huaweicloud.com/ubuntu/ bionic-updates main restricted universe multiverse\n\
deb [trusted=yes] https://repo.huaweicloud.com/ubuntu/ bionic-backports main restricted universe multiverse\n\
deb-src [trusted=yes] https://repo.huaweicloud.com/ubuntu/ bionic-backports main restricted universe multiverse\n\
deb [trusted=yes] https://repo.huaweicloud.com/ubuntu/ bionic-security main restricted universe multiverse\n\
deb-src [trusted=yes] https://repo.huaweicloud.com/ubuntu/ bionic-security main restricted universe multiverse\n" > /etc/apt/sources.list && \
apt-get -o "Acquire::https::Verify-Peer=false" update && \
apt-get -o "Acquire::https::Verify-Peer=false" install -y --no-install-recommends ca-certificates && \
apt-get -o "Acquire::https::Verify-Peer=false" install -y --no-install-recommends \
wget vim sudo bzip2 wget make tar curl g++ pkg-config unzip numactl \
libsqlite3-dev libzip-dev liblzma-dev zlib1g-dev libbz2-dev libopenblas-dev libblas3 liblapack3 \
liblapack-dev libblas-dev gfortran libhdf5-dev libffi-dev libicu60 \
libxml2 libssl-dev git patch libfreetype6-dev libpng-dev libgl1-mesa-glx less htop bc \
gcc cmake zlib1g pciutils libxslt1-dev net-tools libxml2-dev libxslt-dev libsqlite3-dev openssl openssh-server git-lfs && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
ENV LD_LIBRARY_PATH=/usr/local/python${PYTHON_VERSION}/lib: \
PATH=/usr/local/python${PYTHON_VERSION}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
# 安装python和pip
RUN curl -k https://mirrors.huaweicloud.com/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tar.xz -o Python-${PYTHON_VERSION}.tar.xz && \
tar -xf /tmp/Python-${PYTHON_VERSION}.tar.xz && \
cd Python-${PYTHON_VERSION} && ./configure --prefix=/usr/local/python${PYTHON_VERSION} --enable-shared --enable-loadable-sqlite-extensions --enable-optimizations && \
make -j8 && make install && \
ln -sf /usr/local/python${PYTHON_VERSION}/bin/python3 /usr/bin/python3 && \
ln -sf /usr/local/python${PYTHON_VERSION}/bin/python3 /usr/bin/python && \
ln -sf /usr/local/python${PYTHON_VERSION}/bin/pip3 /usr/bin/pip3 && \
ln -sf /usr/local/python${PYTHON_VERSION}/bin/pip3 /usr/bin/pip && \
cd .. && \
rm -rf Python* && \
mkdir -p ~/.pip && \
echo '[global] \n\
index-url=https://mirrors.aliyun.com/pypi/simple\n\
trusted-host=mirrors.aliyun.com' >> ~/.pip/pip.conf && \
pip3 install pip -U
RUN git config --global http.sslVerify false
#安装cmake
RUN wget https://github.com/Kitware/CMake/releases/download/v3.28.1/cmake-3.28.1-linux-x86_64.sh --no-check-certificate && \
mkdir /usr/local/cmake && \
bash cmake-3.28.1-linux-x86_64.sh --skip-license --prefix=/usr/local/cmake
ENV PATH=/usr/local/cmake/bin:$PATH \
LD_LIBRARY_PATH=/usr/local/cmake/lib:/$LD_LIBRARY_PATH
#torch.whl https://download.pytorch.org/whl/cpu/torch-2.1.0%2Bcpu-cp38-cp38-linux_x86_64.whl#sha256=9e5cfd931a65b38d222755a45dabb53b836be31bc620532bc66fee77e3ff67dc
#torch_npu https://gitee.com/ascend/pytorch/releases/download/v5.0.0-pytorch2.1.0/torch_npu-2.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
COPY ./pkgs/torch*.whl /tmp/
# RUN pip3 install torch-* --force-reinstall && \
# pip3 install --upgrade torch_npu*.whl --force-reinstall && \
# git config --global http.sslVerify false && \
# rm -rf /tmp/* && \
# rm -rf /root/.cache/pip
#编译安装torch 和npu
RUN pip3 install torch-* --force-reinstall && \
rm -rf /tmp/*
RUN cd /root/ && \
git clone https://gitee.com/ascend/pytorch.git -b v2.1.0-5.0.0 --depth 1 && \
cd pytorch && \
bash ci/build.sh --python=${PYTHON_VERSION_SHOT} && \
pip3 install --upgrade dist/torch_npu-* --force-reinstall && \
#安装pip包
RUN pip3 install pip -U && \
pip3 install decorator numpy sympy pyyaml pathlib2 grpcio grpcio-tools protobuf scipy \
requests attrs wheel Pillow wheel setuptools matplotlib opencv-python \
sklearn==0.0 pandas lxml pytest xdoctest torchvision==0.16.0 && \
pip3 install cffi pycocotools easydict psutil pathlib2 absl-py && \
git clone https://gitee.com/mirrors_NVIDIA/dllogger && \
pip3 install /tmp/dllogger && \
rm -rf /tmp/* \
rm -rf /root/.cache/pip
#set env
ENV LD_LIBRARY_PATH=$TOOLKIT_PATH/fwkacllib/lib64/:/usr/local/python${PYTHON_VERSION}/lib/python${PYTHON_VERSION_SHOT}/site-packages/torch/lib:/usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH \
PATH=$PATH:$TOOLKIT_PATH/fwkacllib/ccec_compiler/bin/:$TOOLKIT_PATH/toolkit/tools/ide_daemon/bin/ \
ASCEND_OPP_PATH=$TOOLKIT_PATH/opp/ \
OPTION_EXEC_EXTERN_PLUGIN_PATH=$TOOLKIT_PATH/fwkacllib/lib64/plugin/opskernel/libfe.so:$TOOLKIT_PATH/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:$TOOLKIT_PATH/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so \
PYTHONPATH=$TOOLKIT_PATH/fwkacllib/python/site-packages/:$TOOLKIT_PATH/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:$TOOLKIT_PATH/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH \
ASCEND_AICPU_PATH=$TOOLKIT_PATH
COPY ./pkgs/Ascend-cann-toolkit*.run /tmp/
COPY ./pkgs/Ascend-cann-kernels-*.run /tmp/
#安装toolkit
RUN echo y | bash ./Ascend-cann-toolkit*.run --install-path=/usr/local/Ascend/ --install --quiet --install-for-all && \
echo y | bash ./Ascend-cann-kernels-*.run --install --install-for-all && \
rm -rf /tmp/*
#apex包编译安装
RUN git clone -b master https://gitee.com/ascend/apex.git && \
cd apex && \
bash scripts/build.sh --python=${PYTHON_VERSION_SHOT} && \
cd apex/dist/ && \
pip3 uninstall apex && \
pip3 install --upgrade apex*
# COPY ./pkgs/apex-*.whl /tmp/
# RUN pip3 install --upgrade /tmp/apex* && \
# rm -rf /tmp/*
# mindx_toolbox
COPY ./pkgs/Ascend-mindx* /tmp/
RUN echo y | bash ./Ascend-mindx*.run --install-path=/usr/local/Ascend/ --install --quiet && \
rm -rf /tmp/*
# mpich 安装 (hccl_test前置)
#https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/devtools/auxiliarydevtool/HCCLpertest_16_0002.html
RUN wget https://www.mpich.org/static/downloads/3.2.1/mpich-3.2.1.tar.gz --no-check-certificate && \
tar -xzvf mpich-3.2.1.tar.gz && \
cd mpich-3.2.1 && \
./configure --disable-fortran --prefix=/usr/local/mpich-3.2.1 && \
make -j8 && \
make install &&\
echo "export PATH=\$PATH:/usr/local/mpich-3.2.1/bin" >> /root/.bashrc &&\
echo "export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/usr/local/mpich-3.2.1/lib" >> /root/.bashrc && \
rm -rf /tmp/*
# cann 环境变量
RUN echo "source /usr/local/Ascend/ascend-toolkit/set_env.sh">>/root/.bashrc
RUN echo "source /usr/local/Ascend/ascend-toolkit/latest/bin/setenv.bash">>/root/.bashrc
WORKDIR /root/
#Model-link迁移
#https://gitee.com/ascend/AscendSpeed/tree/master/examples/llama2
RUN pip3 install --no-use-pep517 -e git+https://github.com/NVIDIA/Megatron-LM.git@23.05#egg=megatron-core && \
pip install deepspeed==0.9.2 && \
git clone https://gitee.com/ascend/DeepSpeed.git -b v0.9.2 deepspeed_npu && \
cd deepspeed_npu && \
pip3 install -e ./ && \
cd .. && \
git clone https://gitee.com/ascend/AscendSpeed.git && \
cd AscendSpeed && \
pip install -r requirements.txt && \
mkdir logs && \
mkdir ckpt && \
mkdir dataset && \
mkdir dataset/llama/ && \
cd dataset
# wget https://huggingface.co/datasets/tatsu-lab/alpaca/resolve/main/data/train-00000-of-00001-a09b74b3ef9c3b56.parquet --no-check-certificate
# Modelzoo 适配
RUN git clone https://gitee.com/ascend/ModelZoo-PyTorch.git && \
cd ModelZoo-PyTorch/PyTorch/built-in/foundation/ChatGLM2-6B/ && \
find ./ -name "requirements.txt" -exec sed -i 's/transformers==4.29.0/ /g' {} + && \
pip install -r requirements.txt && \
pip3 install -U transformers && \
pip3 install deepspeed