Download the demo resource
wget http://download.openmmlab.com/agentlego/road.jpg
Use the tool directly (without agent)
from agentlego.apis import load_tool
# load tool
tool = load_tool('ObjectDetection', device='cuda')
# apply tool
visualization = tool('road.jpg')
With Lagent
from lagent import ReAct, GPTAPI, ActionExecutor
from agentlego.apis import load_tool
# load tools and build agent
# please set `OPENAI_API_KEY` in your environment variable.
tool = load_tool('ObjectDetection', device='cuda').to_lagent()
agent = ReAct(GPTAPI(temperature=0.), action_executor=ActionExecutor([tool]))
# agent running with the tool.
ret = agent.chat(f'Please detect all objects in the image `road.jpg`.')
for step in ret.inner_steps[1:]:
print('------')
print(step['content'])
Before using the tool, please confirm you have installed the related dependencies by the below commands.
pip install openmim
mim install mmdet
This tool uses a RTMDet model by default. See the following paper for details.
@misc{lyu2022rtmdet,
title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
year={2022},
eprint={2212.07784},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
Download the demo resource
wget http://download.openmmlab.com/agentlego/road.jpg
Use the tool directly (without agent)
from agentlego.apis import load_tool
# load tool
tool = load_tool('TextToBbox', device='cuda')
# apply tool
visualization, result = tool('road.jpg', 'The largest white truck')
With Lagent
from lagent import ReAct, GPTAPI, ActionExecutor
from agentlego.apis import load_tool
# load tools and build agent
# please set `OPENAI_API_KEY` in your environment variable.
tool = load_tool('TextToBbox', device='cuda').to_lagent()
agent = ReAct(GPTAPI(temperature=0.), action_executor=ActionExecutor([tool]))
# agent running with the tool.
ret = agent.chat(f'Please detect the largest white truck in the image `road.jpg`.')
for step in ret.inner_steps[1:]:
print('------')
print(step['content'])
Before using the tool, please confirm you have installed the related dependencies by the below commands.
pip install openmim
mim install mmdet
This tool uses a GLIP model. See the following paper for details.
@inproceedings{li2021grounded,
title={Grounded Language-Image Pre-training},
author={Liunian Harold Li* and Pengchuan Zhang* and Haotian Zhang* and Jianwei Yang and Chunyuan Li and Yiwu Zhong and Lijuan Wang and Lu Yuan and Lei Zhang and Jenq-Neng Hwang and Kai-Wei Chang and Jianfeng Gao},
year={2022},
booktitle={CVPR},
}