权重下载
1.官方下载方法
## 可能会下载历史commit导致文件夹偏大
nohup modelscope download --model AI-ModelScope/whisper-large-v2 --local_dir whisper-large-v2 &> download.log &
tail -f download.log
from modelscope import snapshot_download
model_dir = snapshot_download(
model_id="damo/nlp_gpt3_text-generation_1.3B", # 模型 ID
revision="master", # 只取最新版本(也可以写具体 commit id)
cache_dir="./models", # 指定下载目录
allow_file_pattern=[ # 只保留需要的文件
"*safetensors",
"*bin",
"*py",
"*json",
"*txt",
"*model",
"jinja"
],
#ignore_file_pattern=[".*"] # 忽略其它所有文件
)
print("模型下载到:", model_dir)
modelscope
openssl s_client -showcerts -connect modelscope.cn:443 </dev/null 2>/dev/null \
| awk '/BEGIN CERTIFICATE/,/END CERTIFICATE/' \
> /etc/pki/ca-trust/source/anchors/modelscope-ca.crt
sudo update-ca-trust extract
2.wget批量下载
modelscope和wget都可以直接下载单个文件,这里使用wget为例
下载10-19编号文件,文件名中编号部分宽度为5,比如model-00010-of-00092.safetensors
. download.sh 10 19 5
du
#!/usr/bin/env bash
set -euo pipefail
START=$1
END=$2
MAX_PARALLEL=$3
BASE_URL="https://modelscope.cn/your_repo_path/"
TOTAL=92
WIDTH=5
WORKDIR="$PWD/downloads"
LOGDIR="$WORKDIR/logs"
mkdir -p "$WORKDIR" "$LOGDIR"
declare -A ACTIVE LAST_SIZE STAGNANT
current=$START
gen_filename() {
printf "model-%0*d-of-%0*d.safetensors" "$WIDTH" "$1" "$WIDTH" "$TOTAL"
}
download_one() {
local idx=$1
local fname
fname=$(gen_filename "$idx")
local url="${BASE_URL}${fname}"
local file="$WORKDIR/$fname"
local log="$LOGDIR/$idx.log"
echo "[$(date)] start $fname"
nohup wget -c "$url" -O "$file" > "$log" 2>&1 &
ACTIVE[$idx]=1
LAST_SIZE[$idx]=0
STAGNANT[$idx]=0
}
while (( current <= END && ${#ACTIVE[@]} < MAX_PARALLEL )); do
download_one "$current"
((current++))
done
while (( ${#ACTIVE[@]} > 0 || current <= END )); do
sleep 60
for idx in "${!ACTIVE[@]}"; do
file="$WORKDIR/$(gen_filename "$idx")"
[[ ! -f "$file" ]] && continue
size=$(du -m "$file" | awk '{print $1}')
last=${LAST_SIZE[$idx]}
if [[ "$size" -eq "$last" && "$size" -gt 0 ]]; then
STAGNANT[$idx]=$((STAGNANT[$idx]+1))
else
STAGNANT[$idx]=0
fi
LAST_SIZE[$idx]=$size
if (( STAGNANT[$idx] >= 2 )); then
echo "[$(date)] $idx stagnant (du)"
unset ACTIVE[$idx] LAST_SIZE[$idx] STAGNANT[$idx]
fi
done
while (( current <= END && ${#ACTIVE[@]} < MAX_PARALLEL )); do
download_one "$current"
((current++))
done
done
stat
#!/usr/bin/env bash
set -euo pipefail
START=$1
END=$2
MAX_PARALLEL=$3
BASE_URL="https://modelscope.cn/your_repo_path/"
TOTAL=92
WIDTH=5
WORKDIR="$PWD/downloads"
LOGDIR="$WORKDIR/logs"
mkdir -p "$WORKDIR" "$LOGDIR"
declare -A ACTIVE LAST_SIZE
current=$START
gen_filename() {
printf "model-%0*d-of-%0*d.safetensors" "$WIDTH" "$1" "$WIDTH" "$TOTAL"
}
get_size() {
stat -c %s "$1" 2>/dev/null || echo 0
}
download_one() {
local idx=$1
local fname
fname=$(gen_filename "$idx")
local url="${BASE_URL}${fname}"
local file="$WORKDIR/$fname"
local log="$LOGDIR/$idx.log"
echo "[$(date)] start $fname"
nohup wget -c "$url" -O "$file" > "$log" 2>&1 &
ACTIVE[$idx]=1
LAST_SIZE[$idx]=0
}
while (( current <= END && ${#ACTIVE[@]} < MAX_PARALLEL )); do
download_one "$current"
((current++))
done
while (( ${#ACTIVE[@]} > 0 || current <= END )); do
sleep 60
for idx in "${!ACTIVE[@]}"; do
file="$WORKDIR/$(gen_filename "$idx")"
size=$(get_size "$file")
last=${LAST_SIZE[$idx]}
if [[ "$size" -eq "$last" && "$size" -gt 0 ]]; then
echo "[$(date)] $idx stagnant (stat)"
unset ACTIVE[$idx] LAST_SIZE[$idx]
else
LAST_SIZE[$idx]=$size
fi
done
while (( current <= END && ${#ACTIVE[@]} < MAX_PARALLEL )); do
download_one "$current"
((current++))
done
done
mtime
#!/usr/bin/env bash
set -euo pipefail
START=$1
END=$2
MAX_PARALLEL=$3
BASE_URL="https://modelscope.cn/your_repo_path/"
TOTAL=92
WIDTH=5
WORKDIR="$PWD/downloads"
LOGDIR="$WORKDIR/logs"
mkdir -p "$WORKDIR" "$LOGDIR"
declare -A ACTIVE
current=$START
gen_filename() {
printf "model-%0*d-of-%0*d.safetensors" "$WIDTH" "$1" "$WIDTH" "$TOTAL"
}
download_one() {
local idx=$1
local fname
fname=$(gen_filename "$idx")
local url="${BASE_URL}${fname}"
local file="$WORKDIR/$fname"
local log="$LOGDIR/$idx.log"
echo "[$(date)] start $fname"
nohup wget -c "$url" -O "$file" > "$log" 2>&1 &
ACTIVE[$idx]=1
}
while (( current <= END && ${#ACTIVE[@]} < MAX_PARALLEL )); do
download_one "$current"
((current++))
done
while (( ${#ACTIVE[@]} > 0 || current <= END )); do
sleep 60
for idx in "${!ACTIVE[@]}"; do
log="$LOGDIR/$idx.log"
if [[ ! -f "$log" ]]; then
continue
fi
if find "$log" -mmin +1 | grep -q .; then
echo "[$(date)] $idx log stalled"
unset ACTIVE[$idx]
fi
done
while (( current <= END && ${#ACTIVE[@]} < MAX_PARALLEL )); do
download_one "$current"
((current++))
done
done