You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
2024-04-18T01:17:55.125611Z INFO lorax_router: router/src/main.rs:296: Warming up model
2024-04-18T01:17:55.167710Z INFO lorax_launcher: flash_causal_lm.py:793 Warming up to max_total_tokens: 488
2024-04-18T01:17:55.167711Z INFO lorax_launcher: flash_causal_lm.py:793 Warming up to max_total_tokens: 488
2024-04-18T01:17:55.548798Z ERROR lorax_launcher: interceptor.py:41 Method Warmup encountered an error.
Traceback (most recent call last):
File "/opt/conda/bin/lorax-server", line 8, in
sys.exit(app())
File "/opt/conda/lib/python3.10/site-packages/typer/main.py", line 311, in call
return get_command(self)(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/click/core.py", line 1157, in call
return self.main(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/typer/core.py", line 778, in main
return _main(
File "/opt/conda/lib/python3.10/site-packages/typer/core.py", line 216, in _main
rv = self.invoke(ctx)
File "/opt/conda/lib/python3.10/site-packages/click/core.py", line 1688, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/opt/conda/lib/python3.10/site-packages/click/core.py", line 1434, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/opt/conda/lib/python3.10/site-packages/click/core.py", line 783, in invoke
return __callback(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/typer/main.py", line 683, in wrapper
return callback(**use_params) # type: ignore
File "/opt/conda/lib/python3.10/site-packages/lorax_server/cli.py", line 89, in serve
server.serve(
File "/opt/conda/lib/python3.10/site-packages/lorax_server/server.py", line 321, in serve
asyncio.run(
File "/opt/conda/lib/python3.10/asyncio/runners.py", line 44, in run
return loop.run_until_complete(main)
File "/opt/conda/lib/python3.10/asyncio/base_events.py", line 636, in run_until_complete
self.run_forever()
File "/opt/conda/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
self._run_once()
File "/opt/conda/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
handle._run()
File "/opt/conda/lib/python3.10/asyncio/events.py", line 80, in _run
self._context.run(self._callback, *self._args)
File "/opt/conda/lib/python3.10/site-packages/grpc_interceptor/server.py", line 165, in invoke_intercept_method
return await self.intercept(
File "/opt/conda/lib/python3.10/site-packages/lorax_server/interceptor.py", line 38, in intercept
return await response
File "/opt/conda/lib/python3.10/site-packages/opentelemetry/instrumentation/grpc/_aio_server.py", line 82, in _unary_interceptor
raise error
File "/opt/conda/lib/python3.10/site-packages/opentelemetry/instrumentation/grpc/_aio_server.py", line 73, in _unary_interceptor
return await behavior(request_or_iterator, context)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/server.py", line 81, in Warmup
max_supported_total_tokens = self.model.warmup(batch, request.max_new_tokens)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/models/flash_causal_lm.py", line 796, in warmup
_, batch = self.generate_token(batch, is_warmup=True)
File "/opt/conda/lib/python3.10/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/models/flash_causal_lm.py", line 936, in generate_token
raise e
File "/opt/conda/lib/python3.10/site-packages/lorax_server/models/flash_causal_lm.py", line 933, in generate_token
out = self.forward(batch, adapter_data)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/models/flash_causal_lm.py", line 890, in forward
logits = model.forward(
File "/opt/conda/lib/python3.10/site-packages/lorax_server/models/custom_modeling/flash_qwen2_modeling.py", line 484, in forward
hidden_states = self.model(
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/models/custom_modeling/flash_qwen2_modeling.py", line 428, in forward
hidden_states, residual = layer(
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/models/custom_modeling/flash_qwen2_modeling.py", line 351, in forward
attn_output = self.self_attn(
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/models/custom_modeling/flash_qwen2_modeling.py", line 213, in forward
qkv = self.query_key_value(hidden_states, adapter_data)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/utils/layers.py", line 606, in forward
result = self.base_layer(input)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/utils/layers.py", line 402, in forward
return self.linear.forward(x)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/utils/layers.py", line 288, in forward
if self.bias is not None and self.bias.dtype != x.dtype:
AttributeError: 'bool' object has no attribute 'dtype'
2024-04-18T01:17:55.549099Z ERROR warmup{max_input_length=1024 max_prefill_tokens=1024 max_total_tokens=1512}:warmup: lorax_client: router/client/src/lib.rs:34: Server error: 'bool' object has no attribute 'dtype'
2024-04-18T01:17:55.559823Z ERROR lorax_launcher: interceptor.py:41 Method Warmup encountered an error.
Traceback (most recent call last):
File "/opt/conda/bin/lorax-server", line 8, in
sys.exit(app())
File "/opt/conda/lib/python3.10/site-packages/typer/main.py", line 311, in call
return get_command(self)(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/click/core.py", line 1157, in call
return self.main(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/typer/core.py", line 778, in main
return _main(
File "/opt/conda/lib/python3.10/site-packages/typer/core.py", line 216, in _main
rv = self.invoke(ctx)
File "/opt/conda/lib/python3.10/site-packages/click/core.py", line 1688, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/opt/conda/lib/python3.10/site-packages/click/core.py", line 1434, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/opt/conda/lib/python3.10/site-packages/click/core.py", line 783, in invoke
return __callback(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/typer/main.py", line 683, in wrapper
return callback(**use_params) # type: ignore
File "/opt/conda/lib/python3.10/site-packages/lorax_server/cli.py", line 89, in serve
server.serve(
File "/opt/conda/lib/python3.10/site-packages/lorax_server/server.py", line 321, in serve
asyncio.run(
File "/opt/conda/lib/python3.10/asyncio/runners.py", line 44, in run
return loop.run_until_complete(main)
File "/opt/conda/lib/python3.10/asyncio/base_events.py", line 636, in run_until_complete
self.run_forever()
File "/opt/conda/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
self._run_once()
File "/opt/conda/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
handle._run()
File "/opt/conda/lib/python3.10/asyncio/events.py", line 80, in _run
self._context.run(self._callback, *self._args)
File "/opt/conda/lib/python3.10/site-packages/grpc_interceptor/server.py", line 165, in invoke_intercept_method
return await self.intercept(
File "/opt/conda/lib/python3.10/site-packages/lorax_server/interceptor.py", line 38, in intercept
return await response
File "/opt/conda/lib/python3.10/site-packages/opentelemetry/instrumentation/grpc/_aio_server.py", line 82, in _unary_interceptor
raise error
File "/opt/conda/lib/python3.10/site-packages/opentelemetry/instrumentation/grpc/_aio_server.py", line 73, in _unary_interceptor
return await behavior(request_or_iterator, context)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/server.py", line 81, in Warmup
max_supported_total_tokens = self.model.warmup(batch, request.max_new_tokens)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/models/flash_causal_lm.py", line 796, in warmup
_, batch = self.generate_token(batch, is_warmup=True)
File "/opt/conda/lib/python3.10/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/models/flash_causal_lm.py", line 936, in generate_token
raise e
File "/opt/conda/lib/python3.10/site-packages/lorax_server/models/flash_causal_lm.py", line 933, in generate_token
out = self.forward(batch, adapter_data)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/models/flash_causal_lm.py", line 890, in forward
logits = model.forward(
File "/opt/conda/lib/python3.10/site-packages/lorax_server/models/custom_modeling/flash_qwen2_modeling.py", line 484, in forward
hidden_states = self.model(
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/models/custom_modeling/flash_qwen2_modeling.py", line 428, in forward
hidden_states, residual = layer(
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/models/custom_modeling/flash_qwen2_modeling.py", line 351, in forward
attn_output = self.self_attn(
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/models/custom_modeling/flash_qwen2_modeling.py", line 213, in forward
qkv = self.query_key_value(hidden_states, adapter_data)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/utils/layers.py", line 606, in forward
result = self.base_layer(input)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/utils/layers.py", line 402, in forward
return self.linear.forward(x)
File "/opt/conda/lib/python3.10/site-packages/lorax_server/utils/layers.py", line 288, in forward
if self.bias is not None and self.bias.dtype != x.dtype:
AttributeError: 'bool' object has no attribute 'dtype'
2024-04-18T01:17:55.560073Z ERROR warmup{max_input_length=1024 max_prefill_tokens=1024 max_total_tokens=1512}:warmup: lorax_client: router/client/src/lib.rs:34: Server error: 'bool' object has no attribute 'dtype'
Error: Warmup(Generation("'bool' object has no attribute 'dtype'"))
2024-04-18T01:17:55.643388Z ERROR lorax_launcher: Webserver Crashed
Expected behavior
none
The text was updated successfully, but these errors were encountered:
System Info
lorax latest docker, 2 A100, unbuntu 22.04
Information
Tasks
Reproduction
When loading qwen-32b-chat.
Expected behavior
none
The text was updated successfully, but these errors were encountered: