Bitsandbytes documentation
Papers, related resources & how to cite
Get started
Usage Guides
Explanation
API reference
You are viewing main version, which requires installation from source. If you'd like
regular pip install, checkout the latest stable version (v0.49.2).
Papers, related resources & how to cite
The below academic work is ordered in reverse chronological order.
SpQR: A Sparse-Quantized Representation for Near-Lossless LLM Weight Compression (Jun 2023)
Authors: Tim Dettmers, Ruslan Svirschevski, Vage Egiazarian, Denis Kuznedelev, Elias Frantar, Saleh Ashkboos, Alexander Borzunov, Torsten Hoefler, Dan Alistarh
@article{dettmers2023spqr,
title={SpQR: A Sparse-Quantized Representation for Near-Lossless LLM Weight Compression},
author={Dettmers, Tim and Svirschevski, Ruslan and Egiazarian, Vage and Kuznedelev, Denis and Frantar, Elias and Ashkboos, Saleh and Borzunov, Alexander and Hoefler, Torsten and Alistarh, Dan},
journal={arXiv preprint arXiv:2306.03078},
year={2023}
}QLoRA: Efficient Finetuning of Quantized LLMs (May 2023)
Authors: Tim Dettmers, Artidoro Pagnoni, Ari Holtzman, Luke Zettlemoyer
@article{dettmers2023qlora,
title={Qlora: Efficient finetuning of quantized llms},
author={Dettmers, Tim and Pagnoni, Artidoro and Holtzman, Ari and Zettlemoyer, Luke},
journal={arXiv preprint arXiv:2305.14314},
year={2023}
}The case for 4-bit precision: k-bit Inference Scaling Laws (Dec 2022)
Authors: Tim Dettmers, Luke Zettlemoyer
@inproceedings{dettmers2023case,
title={The case for 4-bit precision: k-bit inference scaling laws},
author={Dettmers, Tim and Zettlemoyer, Luke},
booktitle={International Conference on Machine Learning},
pages={7750--7774},
year={2023},
organization={PMLR}
}LLM.int8(): 8-bit Matrix Multiplication for Transformers at Scale (Nov 2022)
Authors: Tim Dettmers, Mike Lewis, Younes Belkada, Luke Zettlemoyer
- LLM.int8() Blog Post
- LLM.int8() Emergent Features Blog Post
- Introduction to Weight Quantization
- Poster
@article{dettmers2022llm,
title={Llm. int8 (): 8-bit matrix multiplication for transformers at scale},
author={Dettmers, Tim and Lewis, Mike and Belkada, Younes and Zettlemoyer, Luke},
journal={arXiv preprint arXiv:2208.07339},
year={2022}
}8-bit Optimizers via Block-wise Quantization (Oct 2021)
Authors: Tim Dettmers, Mike Lewis, Sam Shleifer, Luke Zettlemoyer
@article{DBLP:journals/corr/abs-2110-02861,
author = {Tim Dettmers and
Mike Lewis and
Sam Shleifer and
Luke Zettlemoyer},
title = {8-bit Optimizers via Block-wise Quantization},
journal = {CoRR},
volume = {abs/2110.02861},
year = {2021},
url = {https://arxiv.org/abs/2110.02861},
eprinttype = {arXiv},
eprint = {2110.02861},
timestamp = {Thu, 21 Oct 2021 16:20:08 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2110-02861.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}