|
4 | 4 | # Code: https://github.com/rasbt/LLMs-from-scratch |
5 | 5 |
|
6 | 6 | from .utils import KVCache # noqa: F401 |
7 | | - |
8 | | -import os |
9 | | -import urllib.request |
10 | | -from pathlib import Path |
| 7 | +from ..qwen3 import ( # noqa: F401 |
| 8 | + QWEN_CONFIG_06_B, QWEN3_CONFIG_1_7B, QWEN3_CONFIG_4B, |
| 9 | + QWEN3_CONFIG_8B, QWEN3_CONFIG_14B, QWEN3_CONFIG_32B, |
| 10 | + Qwen3Tokenizer, load_weights_into_qwen, |
| 11 | + download_from_huggingface, |
| 12 | + download_from_huggingface_from_snapshots |
| 13 | +) |
11 | 14 |
|
12 | 15 | import torch |
13 | 16 | import torch.nn as nn |
14 | 17 |
|
15 | | -# 0.6B model |
16 | | -QWEN_CONFIG_06_B = { |
17 | | - "vocab_size": 151_936, # Vocabulary size |
18 | | - "context_length": 40_960, # Context length that was used to train the model |
19 | | - "emb_dim": 1024, # Embedding dimension |
20 | | - "n_heads": 16, # Number of attention heads |
21 | | - "n_layers": 28, # Number of layers |
22 | | - "hidden_dim": 3072, # Size of the intermediate dimension in FeedForward |
23 | | - "head_dim": 128, # Size of the heads in GQA |
24 | | - "qk_norm": True, # Whether to normalize queries and values in GQA |
25 | | - "n_kv_groups": 8, # Key-Value groups for grouped-query attention |
26 | | - "rope_base": 1_000_000.0, # The base in RoPE's "theta" |
27 | | - "dtype": torch.bfloat16, # Lower-precision dtype to reduce memory usage |
28 | | -} |
29 | | - |
30 | 18 |
|
31 | 19 | class Qwen3Model(nn.Module): |
32 | 20 | def __init__(self, cfg): |
@@ -285,150 +273,3 @@ def forward(self, x): |
285 | 273 | norm_x = norm_x + self.shift |
286 | 274 |
|
287 | 275 | return norm_x.to(input_dtype) |
288 | | - |
289 | | - |
290 | | -def load_weights_into_qwen(model, param_config, params): |
291 | | - def assign(left, right, tensor_name="unknown"): |
292 | | - if left.shape != right.shape: |
293 | | - raise ValueError(f"Shape mismatch in tensor '{tensor_name}'. Left: {left.shape}, Right: {right.shape}") |
294 | | - return torch.nn.Parameter(right.clone().detach() if isinstance(right, torch.Tensor) else torch.tensor(right)) |
295 | | - |
296 | | - model.tok_emb.weight = assign(model.tok_emb.weight, params["model.embed_tokens.weight"], "model.embed_tokens.weight") |
297 | | - |
298 | | - for l in range(param_config["n_layers"]): |
299 | | - block = model.trf_blocks[l] |
300 | | - att = block.att |
301 | | - |
302 | | - # Q, K, V projections |
303 | | - att.W_query.weight = assign( |
304 | | - att.W_query.weight, |
305 | | - params[f"model.layers.{l}.self_attn.q_proj.weight"], |
306 | | - f"model.layers.{l}.self_attn.q_proj.weight" |
307 | | - ) |
308 | | - att.W_key.weight = assign( |
309 | | - att.W_key.weight, |
310 | | - params[f"model.layers.{l}.self_attn.k_proj.weight"], |
311 | | - f"model.layers.{l}.self_attn.k_proj.weight" |
312 | | - ) |
313 | | - att.W_value.weight = assign( |
314 | | - att.W_value.weight, |
315 | | - params[f"model.layers.{l}.self_attn.v_proj.weight"], |
316 | | - f"model.layers.{l}.self_attn.v_proj.weight" |
317 | | - ) |
318 | | - |
319 | | - # Output projection |
320 | | - att.out_proj.weight = assign( |
321 | | - att.out_proj.weight, |
322 | | - params[f"model.layers.{l}.self_attn.o_proj.weight"], |
323 | | - f"model.layers.{l}.self_attn.o_proj.weight" |
324 | | - ) |
325 | | - |
326 | | - # QK norms |
327 | | - if hasattr(att, "q_norm") and att.q_norm is not None: |
328 | | - att.q_norm.scale = assign( |
329 | | - att.q_norm.scale, |
330 | | - params[f"model.layers.{l}.self_attn.q_norm.weight"], |
331 | | - f"model.layers.{l}.self_attn.q_norm.weight" |
332 | | - ) |
333 | | - if hasattr(att, "k_norm") and att.k_norm is not None: |
334 | | - att.k_norm.scale = assign( |
335 | | - att.k_norm.scale, |
336 | | - params[f"model.layers.{l}.self_attn.k_norm.weight"], |
337 | | - f"model.layers.{l}.self_attn.k_norm.weight" |
338 | | - ) |
339 | | - |
340 | | - # Attention layernorm |
341 | | - block.norm1.scale = assign( |
342 | | - block.norm1.scale, |
343 | | - params[f"model.layers.{l}.input_layernorm.weight"], |
344 | | - f"model.layers.{l}.input_layernorm.weight" |
345 | | - ) |
346 | | - |
347 | | - # Feedforward weights |
348 | | - block.ff.fc1.weight = assign( |
349 | | - block.ff.fc1.weight, |
350 | | - params[f"model.layers.{l}.mlp.gate_proj.weight"], |
351 | | - f"model.layers.{l}.mlp.gate_proj.weight" |
352 | | - ) |
353 | | - block.ff.fc2.weight = assign( |
354 | | - block.ff.fc2.weight, |
355 | | - params[f"model.layers.{l}.mlp.up_proj.weight"], |
356 | | - f"model.layers.{l}.mlp.up_proj.weight" |
357 | | - ) |
358 | | - block.ff.fc3.weight = assign( |
359 | | - block.ff.fc3.weight, |
360 | | - params[f"model.layers.{l}.mlp.down_proj.weight"], |
361 | | - f"model.layers.{l}.mlp.down_proj.weight" |
362 | | - ) |
363 | | - block.norm2.scale = assign( |
364 | | - block.norm2.scale, |
365 | | - params[f"model.layers.{l}.post_attention_layernorm.weight"], |
366 | | - f"model.layers.{l}.post_attention_layernorm.weight" |
367 | | - ) |
368 | | - |
369 | | - # Final normalization and output head |
370 | | - model.final_norm.scale = assign(model.final_norm.scale, params["model.norm.weight"], "model.norm.weight") |
371 | | - |
372 | | - # Model uses weight tying, hence we reuse the embedding layer weights here |
373 | | - model.out_head.weight = assign(model.out_head.weight, params["model.embed_tokens.weight"], "model.embed_tokens.weight") |
374 | | - |
375 | | - |
376 | | -class Qwen3Tokenizer(): |
377 | | - def __init__(self, tokenizer_file_path="tokenizer.json", |
378 | | - repo_id=None, add_generation_prompt=False, add_thinking=False): |
379 | | - from tokenizers import Tokenizer |
380 | | - self.tokenizer_file_path = tokenizer_file_path |
381 | | - |
382 | | - if add_generation_prompt != add_thinking: |
383 | | - raise ValueError( |
384 | | - "Only add_generation_prompt==add_thinking settings are currently supported" |
385 | | - ) |
386 | | - |
387 | | - self.add_generation_prompt = add_generation_prompt |
388 | | - self.add_thinking = add_thinking |
389 | | - |
390 | | - tokenizer_file_path_obj = Path(tokenizer_file_path) |
391 | | - if not tokenizer_file_path_obj.is_file() and repo_id is not None: |
392 | | - _ = download_from_huggingface( |
393 | | - repo_id=repo_id, |
394 | | - filename=str(tokenizer_file_path_obj.name), |
395 | | - local_dir=str(tokenizer_file_path_obj.parent.name) |
396 | | - ) |
397 | | - self.tokenizer = Tokenizer.from_file(tokenizer_file_path) |
398 | | - |
399 | | - def encode(self, prompt): |
400 | | - messages = [ |
401 | | - {"role": "user", "content": prompt} |
402 | | - ] |
403 | | - formatted_prompt = self.format_qwen_chat( |
404 | | - messages, |
405 | | - add_generation_prompt=self.add_generation_prompt, |
406 | | - add_thinking=self.add_thinking |
407 | | - ) |
408 | | - return self.tokenizer.encode(formatted_prompt).ids |
409 | | - |
410 | | - def decode(self, token_ids): |
411 | | - return self.tokenizer.decode(token_ids, skip_special_tokens=False) |
412 | | - |
413 | | - @staticmethod |
414 | | - def format_qwen_chat(messages, add_generation_prompt=False, add_thinking=False): |
415 | | - prompt = "" |
416 | | - for msg in messages: |
417 | | - prompt += f"<|im_start|>{msg['role']}\n{msg['content']}<|im_end|>\n" |
418 | | - if add_generation_prompt: |
419 | | - prompt += "<|im_start|>assistant" |
420 | | - if not add_thinking: |
421 | | - prompt += "<|think>\n\n<|/think>\n\n" |
422 | | - else: |
423 | | - prompt += "\n" |
424 | | - return prompt |
425 | | - |
426 | | - |
427 | | -def download_from_huggingface(repo_id, filename, local_dir, revision="main"): |
428 | | - base_url = "https://huggingface.co" |
429 | | - url = f"{base_url}/{repo_id}/resolve/{revision}/{filename}" |
430 | | - Path(local_dir).mkdir(parents=True, exist_ok=True) |
431 | | - dest_path = os.path.join(local_dir, filename) |
432 | | - print(f"Downloading {url} to {dest_path}...") |
433 | | - urllib.request.urlretrieve(url, dest_path) |
434 | | - return dest_path |
0 commit comments