大模型下载和调用

download_hg.py

#coding=utf8
import time
from huggingface_hub import snapshot_download
#huggingface上的模型名称
repo_id = "Qwen/Qwen3-4b"
#本地存储地址
local_dir = "/mnt/f/test/Qwen-model/models--Qwen--Qwen3-4b"
cache_dir = local_dir + "/cache"
while True:
    try:
        snapshot_download(cache_dir=cache_dir,
            local_dir=local_dir,
            repo_id=repo_id,
            local_dir_use_symlinks=False,
            #resume_download=True,
            # allow_patterns=["*.model", "*.json", "*.bin", "*.py", "*.md", "*.txt"],
            # ignore_patterns=["*.safetensors", "*.msgpack", "*.h5", "*.ot",],
        )
    except Exception as e :
        print(e)
        # time.sleep(5)
    else:
        print('下载完成')
        break

download_modelscope.py

# from modelscope.hub.snapshot_download import snapshot_download
#模型下载
from modelscope import snapshot_download
# model_dir = snapshot_download('deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B')
model_dir = snapshot_download('deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', cache_dir='/mnt/f/test/Qwen-model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B')

# from modelscope.hub.snapshot_download import snapshot_download
#模型下载
from modelscope import snapshot_download
# model_dir = snapshot_download('deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B')

local_path = '/mnt/f/test/Qwen-model/'
model_name = 'ZhipuAI/glm-edge-1.5b-chat'
model_path = local_path + '/' + model_name
model_dir = snapshot_download(model_name, cache_dir=model_path)

download_safetensors.py

from huggingface_hub import snapshot_download

# model_name = input("Qwen/Qwen3-0.6b")
# model_path = input("/mnt/f/test/Qwen-model/models--Qwen--Qwen3-0.6b") 

model_name = input("HF HUB 路径，例如 Qwen/Qwen3-0.6b: ")
model_path = input("本地存放路径，例如 ./path/modelname: ") 

snapshot_download(
    repo_id=model_name, 
    local_dir=model_path,
    local_dir_use_symlinks=False,
    revision="main")

test.py调用



# 1 ok
# from transformers import AutoTokenizer, AutoModelForCausalLM

# 模型名称（根据实际情况修改）
# model_name = "Qwen/Qwen3-4b"

# 加载Qwen模型
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForCausalLM.from_pretrained(model_name)
# model = AutoModelForCausalLM.from_pretrained(model_name, cache_dir='./')
# 测试模型
# input_text = "你是谁？"
# inputs = tokenizer(input_text, return_tensors="pt")
# outputs = model.generate(**inputs)
# print(tokenizer.decode(outputs[0], skip_special_tokens=True))

# 2 ok
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM
import torch

model_name = "/mnt/f/test/Qwen-model/models--Qwen--Qwen3-4b"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# 测试模型
input_text = "帮我写一首诗歌，关于月亮的"
inputs = tokenizer(input_text, return_tensors="pt")
outputs = model.generate(**inputs, max_length=512)
# outputs = model(**inputs)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

# with torch.no_grad():
#     outputs = model.generate(
#         **inputs,
#         max_length=100,
#         temperature=0.7,
#         do_sample=True
#     )

# response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# print(response)

test2.py

from modelscope import AutoModelForCausalLM, AutoTokenizer
import torch
MODEL_PATH = "/mnt/f/test/Qwen-model/ZhipuAI/glm-edge-1.5b-chat"

tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, device_map="auto")

message = [
    # {"role": "system", "content": "你是一个有帮助的助手。"},
    {"role": "user", "content": "帮我写一篇语文短文，300字"}
    # {"role": "assistant", "content": "你好！有什么我可以帮助你的吗？"},
]

inputs = tokenizer.apply_chat_template(
    message,
    return_tensors="pt",
    add_generation_prompt=True,
    return_dict=True,
).to(model.device)

# generate_kwargs = {
#     "input_ids": inputs["input_ids"],
#     "attention_mask": inputs["attention_mask"],
#     "max_new_tokens": 128,
#     "do_sample": False,
# }
# out = model.generate(**generate_kwargs)
# print(tokenizer.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True))

with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_length=2048,
        temperature=0.7,
        do_sample=True
    )
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)

转载请注明：SuperIT » 大模型下载和调用