Publications | Qi Chen

2025

AAAI

Attention-driven GUI Grounding: Leveraging Pretrained Multimodal Large Language Models without Fine-Tuning

Hai-Ming Xu^*, Qi Chen^*, Lei Wang, and Lingqiao Liu

In AAAI Conference on Artificial Intelligence (AAAI), 2025

@inproceedings{xu2024attention,
  title = {Attention-driven GUI Grounding: Leveraging Pretrained Multimodal Large Language Models without Fine-Tuning},
  author = {Xu, Hai-Ming and Chen, Qi and Wang, Lei and Liu, Lingqiao},
  booktitle = {AAAI Conference on Artificial Intelligence (AAAI)},
  year = {2025},
}

2024

TCSVT

Improving Video Moment Retrieval by Auxiliary Moment-Query Pairs with Hyper-Interaction

Runhao Zeng, Yishen Zhuo, Jialiang Li, Yunjin Yang, Huisi Wu, Qi Chen^†, Xiping Hu, and Victor C. M. Leung

In IEEE Transactions on Circuits and Systems for Video Technology (TCSVT), 2024

Bib PDF

@inproceedings{zeng2024improving,
  title = {Improving Video Moment Retrieval by Auxiliary Moment-Query Pairs with Hyper-Interaction},
  author = {Zeng, Runhao and Zhuo, Yishen and Li, Jialiang and Yang, Yunjin and Wu, Huisi and Chen, Qi and Hu, Xiping and Leung, Victor C. M.},
  booktitle = {IEEE Transactions on Circuits and Systems for Video Technology (TCSVT)},
  year = {2024},
}

NeurIPS

Weak-eval-Strong: Evaluating and Eliciting Lateral Thinking of LLMs with Situation Puzzles

Qi Chen, Bowen Zhang, Gang Wang, and Qi Wu

In Conference on Neural Information Processing Systems (NeurIPS), 2024

Bib PDF

@inproceedings{chen2024weak,
  title = {Weak-eval-Strong: Evaluating and Eliciting Lateral Thinking of LLMs with Situation Puzzles},
  author = {Chen, Qi and Zhang, Bowen and Wang, Gang and Wu, Qi},
  booktitle = {Conference on Neural Information Processing Systems (NeurIPS)},
  year = {2024},
}

ACCV

Act Like a Radiologist: Radiology Report Generation across Anatomical Regions

Qi Chen, Yutong Xie, Biao Wu, Xiaomin Chen, James Ang, Minh-Son To, Xiaojun Chang, and Qi Wu

In Asian Conference on Computer Vision (ACCV) (Oral), 2024

Bib PDF

@inproceedings{chen2023s4m,
  title = {Act Like a Radiologist: Radiology Report Generation across Anatomical Regions},
  author = {Chen, Qi and Xie, Yutong and Wu, Biao and Chen, Xiaomin and Ang, James and To, Minh-Son and Chang, Xiaojun and Wu, Qi},
  booktitle = {Asian Conference on Computer Vision (ACCV) (Oral)},
  year = {2024},
}

TPAMI

Towards lightweight super-resolution with dual regression learning

Yong Guo, Jingdong Wang, Qi Chen, Jiezhang Cao, Zeshuai Deng, Yanwu Xu, Jian Chen, and Mingkui Tan

IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI), 2024

Bib PDF

@article{guo2024towards,
  title = {Towards lightweight super-resolution with dual regression learning},
  author = {Guo, Yong and Wang, Jingdong and Chen, Qi and Cao, Jiezhang and Deng, Zeshuai and Xu, Yanwu and Chen, Jian and Tan, Mingkui},
  journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)},
  year = {2024},
}

CVPR

G-NeRF: Geometry-enhanced Novel View Synthesis from Single-View Images

Zixiong Huang^*, Qi Chen^*, Libo Sun, Yifan Yang, Naizhou Wang, Qi Wu, and Mingkui Tan

In Conference on Computer Vision and Pattern Recognition (CVPR), 2024

Bib PDF

@inproceedings{huang2024g,
  title = {G-NeRF: Geometry-enhanced Novel View Synthesis from Single-View Images},
  author = {Huang, Zixiong and Chen, Qi and Sun, Libo and Yang, Yifan and Wang, Naizhou and Wu, Qi and Tan, Mingkui},
  booktitle = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  pages = {10117--10126},
  year = {2024},
}

CVPR

PairAug: What Can Augmented Image-Text Pairs Do for Radiology?

Yutong Xie^*, Qi Chen^*, Sinuo Wang, Minh-Son To, Iris Lee, Ee Win Khoo, Kerolos Hendy, Daniel Koh, Yong Xia, and Qi Wu

In Conference on Computer Vision and Pattern Recognition (CVPR), 2024

Bib PDF

@inproceedings{xie2024pairaug,
  title = {PairAug: What Can Augmented Image-Text Pairs Do for Radiology?},
  author = {Xie, Yutong and Chen, Qi and Wang, Sinuo and To, Minh-Son and Lee, Iris and Khoo, Ee Win and Hendy, Kerolos and Koh, Daniel and Xia, Yong and Wu, Qi},
  booktitle = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  pages = {11652--11661},
  year = {2024},
}

AAAI

Webvln: Vision-and-language navigation on websites

Qi Chen, Dileepa Pitawela, Chongyang Zhao, Gengze Zhou, Hsiang-Ting Chen, and Qi Wu

In AAAI Conference on Artificial Intelligence (AAAI), 2024

arXiv Bib

@inproceedings{chen2024webvln,
  title = {Webvln: Vision-and-language navigation on websites},
  author = {Chen, Qi and Pitawela, Dileepa and Zhao, Chongyang and Zhou, Gengze and Chen, Hsiang-Ting and Wu, Qi},
  booktitle = {AAAI Conference on Artificial Intelligence (AAAI)},
  volume = {38},
  number = {2},
  pages = {1165--1173},
  year = {2024},
}

2023

ICCV

Prompt switch: Efficient clip adaptation for text-video retrieval

Chaorui Deng^*, Qi Chen^*, Pengda Qin, Da Chen, and Qi Wu

In International Conference on Computer Vision (ICCV), 2023

Bib PDF

@inproceedings{deng2023prompt,
  title = {Prompt switch: Efficient clip adaptation for text-video retrieval},
  author = {Deng, Chaorui and Chen, Qi and Qin, Pengda and Chen, Da and Wu, Qi},
  booktitle = {International Conference on Computer Vision (ICCV)},
  pages = {15648--15658},
  year = {2023},
}

2022

NeurIPS

Learning distinct and representative modes for image captioning

Qi Chen, Chaorui Deng, and Qi Wu

Conference on Neural Information Processing Systems (NeurIPS), 2022

Bib PDF

@article{chen2022learning,
  title = {Learning distinct and representative modes for image captioning},
  author = {Chen, Qi and Deng, Chaorui and Wu, Qi},
  journal = {Conference on Neural Information Processing Systems (NeurIPS)},
  volume = {35},
  pages = {9472--9485},
  year = {2022},
}

CVPR

V2C: Visual voice cloning

Qi Chen, Mingkui Tan, Yuankai Qi, Jiaqiu Zhou, Yuanqing Li, and Qi Wu

In Conference on Computer Vision and Pattern Recognition (CVPR), 2022

Bib PDF

@inproceedings{chen2022v2c,
  title = {V2C: Visual voice cloning},
  author = {Chen, Qi and Tan, Mingkui and Qi, Yuankai and Zhou, Jiaqiu and Li, Yuanqing and Wu, Qi},
  booktitle = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  pages = {21242--21251},
  year = {2022},
}

2021

ACM MM

R-GAN: Exploring human-like way for reasonable text-to-image synthesis via generative adversarial networks

Yanyuan Qiao, Qi Chen, Chaorui Deng, Ning Ding, Yuankai Qi, Mingkui Tan, Xincheng Ren, and Qi Wu

In ACM International Conference on Multimedia (ACM MM), 2021

Bib PDF

@inproceedings{qiao2021r,
  title = {R-GAN: Exploring human-like way for reasonable text-to-image synthesis via generative adversarial networks},
  author = {Qiao, Yanyuan and Chen, Qi and Deng, Chaorui and Ding, Ning and Qi, Yuankai and Tan, Mingkui and Ren, Xincheng and Wu, Qi},
  booktitle = {ACM International Conference on Multimedia (ACM MM)},
  pages = {2085--2093},
  year = {2021},
}

TPAMI

Towards accurate and compact architectures via neural architecture transformer

Yong Guo, Yin Zheng, Mingkui Tan, Qi Chen, Zhipeng Li, Jian Chen, Peilin Zhao, and Junzhou Huang

IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI), 2021

Bib PDF

@article{guo2021towards,
  title = {Towards accurate and compact architectures via neural architecture transformer},
  author = {Guo, Yong and Zheng, Yin and Tan, Mingkui and Chen, Qi and Li, Zhipeng and Chen, Jian and Zhao, Peilin and Huang, Junzhou},
  journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)},
  volume = {44},
  number = {10},
  pages = {6501--6516},
  year = {2021},
}

CVPR

Contrastive neural architecture search with neural architecture comparators

Yaofo Chen, Yong Guo, Qi Chen, Minli Li, Wei Zeng, Yaowei Wang, and Mingkui Tan

In Conference on Computer Vision and Pattern Recognition (CVPR), 2021

Bib PDF

@inproceedings{chen2021contrastive,
  title = {Contrastive neural architecture search with neural architecture comparators},
  author = {Chen, Yaofo and Guo, Yong and Chen, Qi and Li, Minli and Zeng, Wei and Wang, Yaowei and Tan, Mingkui},
  booktitle = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  pages = {9502--9511},
  year = {2021},
}

2020

ACM MM

Dynamic extension nets for few-shot semantic segmentation

Lizhao Liu^*, Qi Chen^*, Junyi Cao, Minqian Liu, Yong Guo, and Mingkui Tan

In ACM International Conference on Multimedia (ACM MM), 2020

Bib PDF

@inproceedings{liu2020dynamic,
  title = {Dynamic extension nets for few-shot semantic segmentation},
  author = {Liu, Lizhao and Chen, Qi and Cao, Junyi and Liu, Minqian and Guo, Yong and Tan, Mingkui},
  booktitle = {ACM International Conference on Multimedia (ACM MM)},
  pages = {1441--1449},
  year = {2020},
}

TIP

Scripted video generation with a bottom-up generative adversarial network

Qi Chen, Qi Wu, Jian Chen, Qingyao Wu, Anton Hengel, and Mingkui Tan

IEEE Transactions on Image Processing (TIP), 2020

Bib PDF

@article{chen2020scripted,
  title = {Scripted video generation with a bottom-up generative adversarial network},
  author = {Chen, Qi and Wu, Qi and Chen, Jian and Wu, Qingyao and van den Hengel, Anton and Tan, Mingkui},
  journal = {IEEE Transactions on Image Processing (TIP)},
  volume = {29},
  pages = {7454--7467},
  year = {2020},
}

CVPR

Closed-loop matters: Dual regression networks for single image super-resolution

Yong Guo, Jian Chen, Jingdong Wang, Qi Chen, Jiezhang Cao, Zeshuai Deng, Yanwu Xu, and Mingkui Tan

In Conference on Computer Vision and Pattern Recognition (CVPR), 2020

Bib PDF

@inproceedings{guo2020closed,
  title = {Closed-loop matters: Dual regression networks for single image super-resolution},
  author = {Guo, Yong and Chen, Jian and Wang, Jingdong and Chen, Qi and Cao, Jiezhang and Deng, Zeshuai and Xu, Yanwu and Tan, Mingkui},
  booktitle = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  pages = {5407--5416},
  year = {2020},
}

CVPR

Intelligent home 3d: Automatic 3d-house design from linguistic descriptions only

Qi Chen, Qi Wu, Rui Tang, Yuhan Wang, Shuai Wang, and Mingkui Tan

In Conference on Computer Vision and Pattern Recognition (CVPR), 2020

Bib PDF

@inproceedings{chen2020intelligent,
  title = {Intelligent home 3d: Automatic 3d-house design from linguistic descriptions only},
  author = {Chen, Qi and Wu, Qi and Tang, Rui and Wang, Yuhan and Wang, Shuai and Tan, Mingkui},
  booktitle = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  pages = {12625--12634},
  year = {2020},
}

ACCV

Modular graph attention network for complex visual relational reasoning

Yihan Zheng, Zhiquan Wen, Mingkui Tan, Runhao Zeng, Qi Chen, Yaowei Wang, and Qi Wu

In Asian Conference on Computer Vision (ACCV), 2020

Bib PDF

@inproceedings{zheng2020modular,
  title = {Modular graph attention network for complex visual relational reasoning},
  author = {Zheng, Yihan and Wen, Zhiquan and Tan, Mingkui and Zeng, Runhao and Chen, Qi and Wang, Yaowei and Wu, Qi},
  booktitle = {Asian Conference on Computer Vision (ACCV)},
  year = {2020},
}

2019

NeurIPS

Nat: Neural architecture transformer for accurate and compact architectures

Yong Guo, Yin Zheng, Mingkui Tan, Qi Chen, Jian Chen, Peilin Zhao, and Junzhou Huang

Conference on Neural Information Processing Systems (NeurIPS), 2019

Bib PDF

@article{guo2019nat,
  title = {Nat: Neural architecture transformer for accurate and compact architectures},
  author = {Guo, Yong and Zheng, Yin and Tan, Mingkui and Chen, Qi and Chen, Jian and Zhao, Peilin and Huang, Junzhou},
  journal = {Conference on Neural Information Processing Systems (NeurIPS)},
  volume = {32},
  year = {2019},
}

TMM

Auto-embedding generative adversarial networks for high resolution image synthesis

Yong Guo^*, Qi Chen^*, Jian Chen, Qingyao Wu, Qinfeng Shi, and Mingkui Tan

IEEE Transactions on Multimedia (TMM), 2019

Bib PDF

@article{guo2019auto,
  title = {Auto-embedding generative adversarial networks for high resolution image synthesis},
  author = {Guo, Yong and Chen, Qi and Chen, Jian and Wu, Qingyao and Shi, Qinfeng and Tan, Mingkui},
  journal = {IEEE Transactions on Multimedia (TMM)},
  volume = {21},
  number = {11},
  pages = {2726--2737},
  year = {2019},
}