Spaces:
Paused
Paused
alessandro trinca tornidor
commited on
Commit
·
accca77
1
Parent(s):
bbb9bc0
feat: support new ZeroGPU version addig device_map/device/device2 app_helpers.py functions
Browse files
lisa_on_cuda/utils/app_helpers.py
CHANGED
|
@@ -22,6 +22,13 @@ from . import constants, utils
|
|
| 22 |
placeholders = utils.create_placeholder_variables()
|
| 23 |
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
def parse_args(args_to_parse, internal_logger=None):
|
| 26 |
if internal_logger is None:
|
| 27 |
internal_logger = app_logger
|
|
@@ -118,12 +125,13 @@ def preprocess(
|
|
| 118 |
|
| 119 |
def load_model_for_causal_llm_pretrained(
|
| 120 |
version, torch_dtype, load_in_8bit, load_in_4bit, seg_token_idx, vision_tower,
|
| 121 |
-
internal_logger: logging = None, device_map="auto"
|
| 122 |
):
|
| 123 |
if internal_logger is None:
|
| 124 |
internal_logger = app_logger
|
| 125 |
internal_logger.debug(f"prepare kwargs, 4bit:{load_in_4bit}, 8bit:{load_in_8bit}.")
|
| 126 |
-
|
|
|
|
| 127 |
if load_in_4bit:
|
| 128 |
kwargs.update(
|
| 129 |
{
|
|
@@ -154,15 +162,27 @@ def load_model_for_causal_llm_pretrained(
|
|
| 154 |
low_cpu_mem_usage=True,
|
| 155 |
vision_tower=vision_tower,
|
| 156 |
seg_token_idx=seg_token_idx,
|
| 157 |
-
# try to avoid CUDA init RuntimeError on ZeroGPU huggingface hardware
|
| 158 |
-
device_map=device_map,
|
| 159 |
**kwargs
|
| 160 |
)
|
| 161 |
internal_logger.debug("model loaded!")
|
| 162 |
return _model
|
| 163 |
|
| 164 |
|
| 165 |
-
def get_model(args_to_parse, internal_logger: logging = None, inference_decorator: Callable = None, device_map="auto"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
if internal_logger is None:
|
| 167 |
internal_logger = app_logger
|
| 168 |
internal_logger.info(f"starting model preparation, folder creation for path: {args_to_parse.vis_save_path}.")
|
|
@@ -201,7 +221,8 @@ def get_model(args_to_parse, internal_logger: logging = None, inference_decorato
|
|
| 201 |
load_in_4bit=args_to_parse.load_in_4bit,
|
| 202 |
seg_token_idx=args_to_parse.seg_token_idx,
|
| 203 |
vision_tower=args_to_parse.vision_tower,
|
| 204 |
-
device_map=device_map # try to avoid CUDA init RuntimeError on ZeroGPU huggingface hardware
|
|
|
|
| 205 |
)) if inference_decorator else load_model_for_causal_llm_pretrained(
|
| 206 |
args_to_parse.version,
|
| 207 |
torch_dtype=torch_dtype,
|
|
@@ -226,10 +247,8 @@ def get_model(args_to_parse, internal_logger: logging = None, inference_decorato
|
|
| 226 |
)
|
| 227 |
internal_logger.debug(f"_model type:{type(_model)} => {_model}.")
|
| 228 |
internal_logger.debug(f"vision_tower type:{type(vision_tower)} => {vision_tower}.")
|
| 229 |
-
# set device to
|
| 230 |
-
device
|
| 231 |
-
internal_logger.debug(f"device to use with vision tower:{device}, device_map:{device_map}, local_rank:{args_to_parse.local_rank}.")
|
| 232 |
-
vision_tower.to(device=device)
|
| 233 |
internal_logger.debug("vision tower loaded, prepare clip image processor...")
|
| 234 |
_clip_image_processor = CLIPImageProcessor.from_pretrained(_model.config.vision_tower)
|
| 235 |
internal_logger.debug("clip image processor done.")
|
|
@@ -274,12 +293,24 @@ def prepare_model_vision_tower(_model, args_to_parse, torch_dtype, internal_logg
|
|
| 274 |
|
| 275 |
|
| 276 |
def get_inference_model_by_args(
|
| 277 |
-
args_to_parse, internal_logger0: logging = None, inference_decorator: Callable = None, device_map="auto"
|
| 278 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
if internal_logger0 is None:
|
| 280 |
internal_logger0 = app_logger
|
| 281 |
internal_logger0.info(f"args_to_parse:{args_to_parse}, creating model...")
|
| 282 |
-
model, clip_image_processor, tokenizer, transform = get_model(args_to_parse, device_map=device_map)
|
| 283 |
internal_logger0.info("created model, preparing inference function")
|
| 284 |
no_seg_out = placeholders["no_seg_out"]
|
| 285 |
|
|
|
|
| 22 |
placeholders = utils.create_placeholder_variables()
|
| 23 |
|
| 24 |
|
| 25 |
+
def get_device_map_kwargs(device_map="auto", device="cuda"):
|
| 26 |
+
kwargs = {"device_map": device_map}
|
| 27 |
+
if device != "cuda":
|
| 28 |
+
kwargs['device_map'] = {"": device}
|
| 29 |
+
return kwargs
|
| 30 |
+
|
| 31 |
+
|
| 32 |
def parse_args(args_to_parse, internal_logger=None):
|
| 33 |
if internal_logger is None:
|
| 34 |
internal_logger = app_logger
|
|
|
|
| 125 |
|
| 126 |
def load_model_for_causal_llm_pretrained(
|
| 127 |
version, torch_dtype, load_in_8bit, load_in_4bit, seg_token_idx, vision_tower,
|
| 128 |
+
internal_logger: logging = None, device_map="auto", device="cuda"
|
| 129 |
):
|
| 130 |
if internal_logger is None:
|
| 131 |
internal_logger = app_logger
|
| 132 |
internal_logger.debug(f"prepare kwargs, 4bit:{load_in_4bit}, 8bit:{load_in_8bit}.")
|
| 133 |
+
kwargs_device_map = get_device_map_kwargs(device_map=device_map, device=device)
|
| 134 |
+
kwargs = {"torch_dtype": torch_dtype, **kwargs_device_map}
|
| 135 |
if load_in_4bit:
|
| 136 |
kwargs.update(
|
| 137 |
{
|
|
|
|
| 162 |
low_cpu_mem_usage=True,
|
| 163 |
vision_tower=vision_tower,
|
| 164 |
seg_token_idx=seg_token_idx,
|
| 165 |
+
# try to avoid CUDA init RuntimeError on ZeroGPU huggingface hardware (injected into kwargs)
|
|
|
|
| 166 |
**kwargs
|
| 167 |
)
|
| 168 |
internal_logger.debug("model loaded!")
|
| 169 |
return _model
|
| 170 |
|
| 171 |
|
| 172 |
+
def get_model(args_to_parse, internal_logger: logging = None, inference_decorator: Callable = None, device_map="auto", device="cpu", device2="cuda"):
|
| 173 |
+
"""Load model and inference function with arguments. Compatible with ZeroGPU (spaces 0.30.2)
|
| 174 |
+
|
| 175 |
+
Args:
|
| 176 |
+
args_to_parse: default input arguments
|
| 177 |
+
internal_logger: logger
|
| 178 |
+
inference_decorator: inference decorator (now it's supported and tested ZeroGPU spaces.GPU decorator)
|
| 179 |
+
device_map: device type needed for ZeroGPU cuda hw
|
| 180 |
+
device: device type needed for ZeroGPU cuda hw
|
| 181 |
+
device2: device type needed for ZeroGPU cuda hw, default to cpu to avoid bug on loading model
|
| 182 |
+
|
| 183 |
+
Returns:
|
| 184 |
+
inference function with LISA model
|
| 185 |
+
"""
|
| 186 |
if internal_logger is None:
|
| 187 |
internal_logger = app_logger
|
| 188 |
internal_logger.info(f"starting model preparation, folder creation for path: {args_to_parse.vis_save_path}.")
|
|
|
|
| 221 |
load_in_4bit=args_to_parse.load_in_4bit,
|
| 222 |
seg_token_idx=args_to_parse.seg_token_idx,
|
| 223 |
vision_tower=args_to_parse.vision_tower,
|
| 224 |
+
device_map=device_map, # try to avoid CUDA init RuntimeError on ZeroGPU huggingface hardware
|
| 225 |
+
device=device
|
| 226 |
)) if inference_decorator else load_model_for_causal_llm_pretrained(
|
| 227 |
args_to_parse.version,
|
| 228 |
torch_dtype=torch_dtype,
|
|
|
|
| 247 |
)
|
| 248 |
internal_logger.debug(f"_model type:{type(_model)} => {_model}.")
|
| 249 |
internal_logger.debug(f"vision_tower type:{type(vision_tower)} => {vision_tower}.")
|
| 250 |
+
# set device to "cuda" try to avoid CUDA init RuntimeError on ZeroGPU huggingface hardware
|
| 251 |
+
vision_tower.to(device=device2)
|
|
|
|
|
|
|
| 252 |
internal_logger.debug("vision tower loaded, prepare clip image processor...")
|
| 253 |
_clip_image_processor = CLIPImageProcessor.from_pretrained(_model.config.vision_tower)
|
| 254 |
internal_logger.debug("clip image processor done.")
|
|
|
|
| 293 |
|
| 294 |
|
| 295 |
def get_inference_model_by_args(
|
| 296 |
+
args_to_parse, internal_logger0: logging = None, inference_decorator: Callable = None, device_map="auto", device="cuda"
|
| 297 |
):
|
| 298 |
+
"""Load model and inference function with arguments. Compatible with ZeroGPU (spaces 0.30.2)
|
| 299 |
+
|
| 300 |
+
Args:
|
| 301 |
+
args_to_parse: default input arguments
|
| 302 |
+
internal_logger0: logger
|
| 303 |
+
inference_decorator: inference decorator (now it's supported and tested ZeroGPU spaces.GPU decorator)
|
| 304 |
+
device_map: device type needed for ZeroGPU cuda hw
|
| 305 |
+
device: device type needed for ZeroGPU cuda hw
|
| 306 |
+
|
| 307 |
+
Returns:
|
| 308 |
+
inference function with LISA model
|
| 309 |
+
"""
|
| 310 |
if internal_logger0 is None:
|
| 311 |
internal_logger0 = app_logger
|
| 312 |
internal_logger0.info(f"args_to_parse:{args_to_parse}, creating model...")
|
| 313 |
+
model, clip_image_processor, tokenizer, transform = get_model(args_to_parse, device_map=device_map, device=device)
|
| 314 |
internal_logger0.info("created model, preparing inference function")
|
| 315 |
no_seg_out = placeholders["no_seg_out"]
|
| 316 |
|