权重下载

1.官方下载方法

## 可能会下载历史commit导致文件夹偏大
nohup modelscope download --model AI-ModelScope/whisper-large-v2  --local_dir  whisper-large-v2 &> download.log &
tail -f download.log

from modelscope import snapshot_download

model_dir = snapshot_download(
    model_id="damo/nlp_gpt3_text-generation_1.3B",  # 模型 ID
    revision="master",  # 只取最新版本(也可以写具体 commit id)
    cache_dir="./models",  # 指定下载目录
    allow_file_pattern=[   # 只保留需要的文件
        "*safetensors",
        "*bin",
        "*py",
        "*json",
        "*txt",
        "*model",
        "jinja"
    ],
    #ignore_file_pattern=[".*"]  # 忽略其它所有文件
)

print("模型下载到:", model_dir)

modelscope

openssl s_client -showcerts -connect modelscope.cn:443 </dev/null 2>/dev/null \
| awk '/BEGIN CERTIFICATE/,/END CERTIFICATE/' \
> /etc/pki/ca-trust/source/anchors/modelscope-ca.crt

sudo update-ca-trust extract

2.wget批量下载

modelscope和wget都可以直接下载单个文件,这里使用wget为例
下载10-19编号文件,文件名中编号部分宽度为5,比如model-00010-of-00092.safetensors
. download.sh 10 19 5

du

#!/usr/bin/env bash
set -euo pipefail

START=$1
END=$2
MAX_PARALLEL=$3

BASE_URL="https://modelscope.cn/your_repo_path/"
TOTAL=92
WIDTH=5

WORKDIR="$PWD/downloads"
LOGDIR="$WORKDIR/logs"
mkdir -p "$WORKDIR" "$LOGDIR"

declare -A ACTIVE LAST_SIZE STAGNANT
current=$START

gen_filename() {
    printf "model-%0*d-of-%0*d.safetensors" "$WIDTH" "$1" "$WIDTH" "$TOTAL"
}

download_one() {
    local idx=$1
    local fname
    fname=$(gen_filename "$idx")
    local url="${BASE_URL}${fname}"
    local file="$WORKDIR/$fname"
    local log="$LOGDIR/$idx.log"

    echo "[$(date)] start $fname"
    nohup wget -c "$url" -O "$file" > "$log" 2>&1 &
    ACTIVE[$idx]=1
    LAST_SIZE[$idx]=0
    STAGNANT[$idx]=0
}

while (( current <= END && ${#ACTIVE[@]} < MAX_PARALLEL )); do
    download_one "$current"
    ((current++))
done

while (( ${#ACTIVE[@]} > 0 || current <= END )); do
    sleep 60

    for idx in "${!ACTIVE[@]}"; do
        file="$WORKDIR/$(gen_filename "$idx")"
        [[ ! -f "$file" ]] && continue

        size=$(du -m "$file" | awk '{print $1}')
        last=${LAST_SIZE[$idx]}

        if [[ "$size" -eq "$last" && "$size" -gt 0 ]]; then
            STAGNANT[$idx]=$((STAGNANT[$idx]+1))
        else
            STAGNANT[$idx]=0
        fi

        LAST_SIZE[$idx]=$size

        if (( STAGNANT[$idx] >= 2 )); then
            echo "[$(date)] $idx stagnant (du)"
            unset ACTIVE[$idx] LAST_SIZE[$idx] STAGNANT[$idx]
        fi
    done

    while (( current <= END && ${#ACTIVE[@]} < MAX_PARALLEL )); do
        download_one "$current"
        ((current++))
    done
done

stat

#!/usr/bin/env bash
set -euo pipefail

START=$1
END=$2
MAX_PARALLEL=$3

BASE_URL="https://modelscope.cn/your_repo_path/"
TOTAL=92
WIDTH=5

WORKDIR="$PWD/downloads"
LOGDIR="$WORKDIR/logs"
mkdir -p "$WORKDIR" "$LOGDIR"

declare -A ACTIVE LAST_SIZE
current=$START

gen_filename() {
    printf "model-%0*d-of-%0*d.safetensors" "$WIDTH" "$1" "$WIDTH" "$TOTAL"
}

get_size() {
    stat -c %s "$1" 2>/dev/null || echo 0
}

download_one() {
    local idx=$1
    local fname
    fname=$(gen_filename "$idx")
    local url="${BASE_URL}${fname}"
    local file="$WORKDIR/$fname"
    local log="$LOGDIR/$idx.log"

    echo "[$(date)] start $fname"
    nohup wget -c "$url" -O "$file" > "$log" 2>&1 &
    ACTIVE[$idx]=1
    LAST_SIZE[$idx]=0
}

while (( current <= END && ${#ACTIVE[@]} < MAX_PARALLEL )); do
    download_one "$current"
    ((current++))
done

while (( ${#ACTIVE[@]} > 0 || current <= END )); do
    sleep 60

    for idx in "${!ACTIVE[@]}"; do
        file="$WORKDIR/$(gen_filename "$idx")"
        size=$(get_size "$file")
        last=${LAST_SIZE[$idx]}

        if [[ "$size" -eq "$last" && "$size" -gt 0 ]]; then
            echo "[$(date)] $idx stagnant (stat)"
            unset ACTIVE[$idx] LAST_SIZE[$idx]
        else
            LAST_SIZE[$idx]=$size
        fi
    done

    while (( current <= END && ${#ACTIVE[@]} < MAX_PARALLEL )); do
        download_one "$current"
        ((current++))
    done
done

mtime

#!/usr/bin/env bash
	set -euo pipefail

START=$1
END=$2
MAX_PARALLEL=$3

BASE_URL="https://modelscope.cn/your_repo_path/"
TOTAL=92
WIDTH=5

WORKDIR="$PWD/downloads"
LOGDIR="$WORKDIR/logs"
mkdir -p "$WORKDIR" "$LOGDIR"

declare -A ACTIVE
current=$START

gen_filename() {
    printf "model-%0*d-of-%0*d.safetensors" "$WIDTH" "$1" "$WIDTH" "$TOTAL"
}

download_one() {
    local idx=$1
    local fname
    fname=$(gen_filename "$idx")
    local url="${BASE_URL}${fname}"
    local file="$WORKDIR/$fname"
    local log="$LOGDIR/$idx.log"

    echo "[$(date)] start $fname"
    nohup wget -c "$url" -O "$file" > "$log" 2>&1 &
    ACTIVE[$idx]=1
}

while (( current <= END && ${#ACTIVE[@]} < MAX_PARALLEL )); do
    download_one "$current"
    ((current++))
done

while (( ${#ACTIVE[@]} > 0 || current <= END )); do
    sleep 60

    for idx in "${!ACTIVE[@]}"; do
        log="$LOGDIR/$idx.log"

        if [[ ! -f "$log" ]]; then
            continue
        fi

        if find "$log" -mmin +1 | grep -q .; then
            echo "[$(date)] $idx log stalled"
            unset ACTIVE[$idx]
        fi
    done

    while (( current <= END && ${#ACTIVE[@]} < MAX_PARALLEL )); do
        download_one "$current"
        ((current++))
    done
done