lqume commited on
Commit
ba80d6e
·
verified ·
1 Parent(s): 69af6d2

Fixed tokenizer bug and gibberish image generation. Use AutoTokenizer instead of MT5Tokenizer

Browse files

" Some weights of MT5EncoderModel were not initialized from the model checkpoint at google/mt5-small and are newly initialized: ['encoder.embed_tokens.weight', 'shared.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. "

Files changed (1) hide show
  1. utils.py +17 -4
utils.py CHANGED
@@ -9,16 +9,29 @@ from imwatermark import WatermarkEncoder
9
 
10
  from diffusers.pipelines.pipeline_utils import DiffusionPipeline, ImagePipelineOutput
11
  from diffusers.utils.torch_utils import randn_tensor
12
- from transformers import MT5Tokenizer, MT5EncoderModel
13
  from typing import List, Optional, Tuple, Union
14
 
15
  # Determine device and torch dtype
16
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
17
  torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
18
 
19
- # Load MT5 tokenizer and encoder (can be replaced with private model + token if needed)
20
- tokenizer = MT5Tokenizer.from_pretrained("google/mt5-small", use_safetensors=True)
21
- encoder_model = MT5EncoderModel.from_pretrained("google/mt5-small", use_safetensors=True).to(device=device, dtype=torch_dtype)
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  encoder_model.eval()
23
 
24
  class QPipeline(DiffusionPipeline):
 
9
 
10
  from diffusers.pipelines.pipeline_utils import DiffusionPipeline, ImagePipelineOutput
11
  from diffusers.utils.torch_utils import randn_tensor
12
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
13
  from typing import List, Optional, Tuple, Union
14
 
15
  # Determine device and torch dtype
16
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
17
  torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
18
 
19
+ model_name = "google/mt5-small" # or base / large / etc.
20
+
21
+ # Load tokenizer:
22
+ tokenizer = AutoTokenizer.from_pretrained(
23
+ model_name,
24
+ use_fast=False, # use slow (SentencePiece) tokenizer to avoid subtle fast/slow differences
25
+ legacy=True # enforce legacy behavior (so that “new vs legacy” mismatch warnings are avoided)
26
+ )
27
+
28
+ # Load model:
29
+ encoder_model = AutoModelForSeq2SeqLM.from_pretrained(
30
+ model_name,
31
+ torch_dtype=torch_dtype, # or whatever dtype you want (float32/float16/bfloat16)
32
+ device_map="auto" # or device=device if you want to manually move
33
+ )
34
+
35
  encoder_model.eval()
36
 
37
  class QPipeline(DiffusionPipeline):