tgate


Nametgate JSON
Version 1.0.0 PyPI version JSON
download
home_pagehttps://github.com/HaozheLiu-ST/T-GATE/tree/releases/
SummaryTGATE-V2: Faster Diffusion Through Temporal Attention Decomposition.
upload_time2024-08-30 13:17:27
maintainerNone
docs_urlNone
authorWentian Zhang
requires_python>=3.8.0
licenseNone
keywords
VCS
bugtrack_url
requirements No requirements were recorded.
Travis-CI No Travis.
coveralls test coverage No coveralls.
            # TGATE

[TGATE](https://github.com/HaozheLiu-ST/T-GATE/tree/main) accelerates inferences of [`PixArtAlphaPipeline`], [`PixArtSigmaPipeline`], [`StableDiffusionPipeline`], [`StableDiffusionXLPipeline`], and [`StableVideoDiffusionPipeline`] by skipping the calculation of self-attention and cross-attention once it converges. More details can be found at [technical report](https://huggingface.co/papers/2404.02747).

![](https://github.com/user-attachments/assets/44805d66-e504-4de4-837d-d027fb3f566b)


## 🚀 Major Features

* Training-Free.
* Easily Integrate into [Diffusers](https://github.com/huggingface/diffusers/tree/main).
* Only a few lines of code are required.
* Complementary to [DeepCache](https://github.com/horseee/DeepCache).
* Friendly support [Stable Diffusion pipelines](https://huggingface.co/stabilityai), [PixArt](https://pixart-alpha.github.io/), and [Latent Consistency Models](https://latent-consistency-models.github.io/).
* 10%-50% speed up for different models. 

## 📖 Quick Start

### 🛠️ Installation

Start by installing [TGATE](https://github.com/HaozheLiu-ST/T-GATE/tree/releases):

```bash
pip install tgate
```

#### Requirements

* pytorch>=2.0.0
* diffusers>=0.29.0
* DeepCache==0.1.1
* transformers
* accelerate

### 🌟 Usage

Accelerate `PixArtAlphaPipeline` with TGATE:

```diff
import torch
from diffusers import PixArtAlphaPipeline

pipe = PixArtAlphaPipeline.from_pretrained(
    "PixArt-alpha/PixArt-XL-2-1024-MS", 
    torch_dtype=torch.float16,
)

+ from tgate import TgatePixArtAlphaLoader
+ gate_step = 15
+ sp_interval = 3
+ fi_interval = 1
+ warm_up = 2
+ inference_step = 25
+ pipe = TgatePixArtAlphaLoader(pipe).to("cuda")

+ image = pipe.tgate(
+         "An alpaca made of colorful building blocks, cyberpunk.",
+         gate_step=gate_step,
+         sp_interval=sp_interval,
+         fi_interval=fi_interval,
+         warm_up=warm_up,   
+         num_inference_steps=inference_step,
+ ).images[0]
```

Accelerate `PixArtSigmaPipeline` with TGATE:

```diff
import torch
from diffusers import PixArtSigmaPipeline

pipe = PixArtSigmaPipeline.from_pretrained(
    "PixArt-alpha/PixArt-Sigma-XL-2-1024-MS", 
    torch_dtype=torch.float16,
)

+ from tgate import TgatePixArtSigmaLoader
+ gate_step = 15
+ sp_interval = 3
+ fi_interval = 1
+ warm_up = 2
+ inference_step = 25
+ pipe = TgatePixArtSigmaLoader(pipe).to("cuda")

+ image = pipe.tgate(
+         "an astronaut sitting in a diner, eating fries, cinematic, analog film.",
+         gate_step=gate_step,
+         sp_interval=sp_interval,
+         fi_interval=fi_interval,
+         warm_up=warm_up,   
+         num_inference_steps=inference_step,
+ ).images[0]
```

Accelerate `StableDiffusionXLPipeline` with TGATE:

```diff
import torch
from diffusers import StableDiffusionXLPipeline
from diffusers import DPMSolverMultistepScheduler

pipe = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch.float16,
    variant="fp16",
    use_safetensors=True,
)

+ from tgate import TgateSDXLLoader
+ gate_step = 10
+ sp_interval = 5
+ fi_interval = 1
+ warm_up = 2
+ inference_step = 25
+ pipe = TgateSDXLLoader(pipe).to("cuda")

pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)

+ image = pipe.tgate(
+         "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.",
+         gate_step=gate_step,
+         sp_interval=sp_interval,
+         fi_interval=fi_interval,
+         warm_up=warm_up,  
+         num_inference_steps=inference_step
+ ).images[0]
```

Accelerate `StableDiffusionXLPipeline` with [DeepCache](https://github.com/horseee/DeepCache) and TGATE:

```diff
import torch
from diffusers import StableDiffusionXLPipeline
from diffusers import DPMSolverMultistepScheduler

pipe = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch.float16,
    variant="fp16",
    use_safetensors=True,
)

+ from tgate import TgateSDXLDeepCacheLoader
+ gate_step = 10
+ sp_interval = 1
+ fi_interval = 1
+ warm_up = 0
+ inference_step = 25
+ pipe = TgateSDXLDeepCacheLoader(
+        pipe,
+        cache_interval=3,
+        cache_branch_id=0,
+ ).to("cuda")

pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)


+ image = pipe.tgate(
+         "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.",
+         gate_step=gate_step,
+         sp_interval=sp_interval,
+         fi_interval=fi_interval,
+         warm_up=warm_up,  
+         num_inference_steps=inference_step
+ ).images[0]
```

Accelerate `latent-consistency/lcm-sdxl` with TGATE:

```diff
import torch
from diffusers import StableDiffusionXLPipeline
from diffusers import UNet2DConditionModel, LCMScheduler
from diffusers import DPMSolverMultistepScheduler

unet = UNet2DConditionModel.from_pretrained(
    "latent-consistency/lcm-sdxl",
    torch_dtype=torch.float16,
    variant="fp16",
)
pipe = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    unet=unet,
    torch_dtype=torch.float16,
    variant="fp16",
)
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)

+ from tgate import TgateSDXLLoader
+ gate_step = 1
+ sp_interval = 1
+ fi_interval = 1
+ warm_up = 0
+ inference_step = 4
+ pipe = TgateSDXLLoader(pipe,lcm=True).to("cuda")

+ image = pipe.tgate(
+         "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.",
+         gate_step=gate_step,
+         sp_interval=sp_interval,
+         fi_interval=fi_interval,
+         warm_up=warm_up,  
+         num_inference_steps=inference_step,
+ ).images[0]
```

TGATE also supports `StableDiffusionPipeline`, `PixArt-alpha/PixArt-LCM-XL-2-1024-MS`, and `StableVideoDiffusionPipeline`.
More details can be found at [here](https://github.com/HaozheLiu-ST/T-GATE/tree/releases/main.py).

## 📄 Results
| Model                 | MACs     | Latency | Zero-shot 10K-FID on MS-COCO |
|-----------------------|----------|---------|---------------------------|
| SD-XL                 | 149.438T | 53.187s | 24.164                    |
| SD-XL w/ TGATE        | 95.988T  | 31.643s | 22.917                    |
| Pixart-Alpha          | 107.031T | 61.502s | 37.983                    |
| Pixart-Alpha w/ TGATE | 73.971T  | 36.650s | 36.390                    |
| Pixart-Sigma          | 107.766T | 60.467s | 34.278                    |
| Pixart-Sigma w/ TGATE | 74.420T  | 36.449s | 32.927                    |
| DeepCache (SD-XL)     | 57.888T  | 19.931s | 25.678                    |
| DeepCache w/ TGATE    | 43.868T  | 14.666s | 24.511                    |
| LCM (SD-XL)           | 11.955T  | 3.805s  | 26.357                    |
| LCM w/ TGATE          | 11.171T  | 3.533s  | 26.902                    |
| LCM (Pixart-Alpha)    | 8.563T   | 4.733s  | 35.989                    |
| LCM w/ TGATE          | 7.623T   | 4.543s  | 35.843                    |

The FID is computed on [captions](https://github.com/HaozheLiu-ST/T-GATE/files/15369063/idx_caption.txt) by [PytorchFID](https://github.com/mseitzer/pytorch-fid).

The latency is tested on a 1080ti commercial card and diffusers [v0.28.2](https://github.com/huggingface/diffusers/tree/v0.28.2). 

The MACs and Params are calculated by [calflops](https://github.com/MrYxJ/calculate-flops.pytorch). 

## Citation
If you find our work inspiring or use our codebase in your research, please consider giving a star ⭐ and a citation.
```
@article{tgate_v2,
  title={Faster Diffusion via Temporal Attention Decomposition},
  author={Liu, Haozhe and Zhang, Wentian and Xie, Jinheng and Faccio, Francesco and Xu, Mengmeng and Xiang, Tao and Shou, Mike Zheng and Perez-Rua, Juan-Manuel and Schmidhuber, J{\"u}rgen},
  journal={arXiv preprint arXiv:2404.02747},
  year={2024}
}
```


            

Raw data

            {
    "_id": null,
    "home_page": "https://github.com/HaozheLiu-ST/T-GATE/tree/releases/",
    "name": "tgate",
    "maintainer": null,
    "docs_url": null,
    "requires_python": ">=3.8.0",
    "maintainer_email": null,
    "keywords": null,
    "author": "Wentian Zhang",
    "author_email": "zhangwentianml@gmail.com",
    "download_url": "https://files.pythonhosted.org/packages/9d/30/47414493922e6008058b396bc8d6e4e1d130835bac07aff06ce403d4acb2/tgate-1.0.0.tar.gz",
    "platform": null,
    "description": "# TGATE\n\n[TGATE](https://github.com/HaozheLiu-ST/T-GATE/tree/main) accelerates inferences of [`PixArtAlphaPipeline`], [`PixArtSigmaPipeline`], [`StableDiffusionPipeline`], [`StableDiffusionXLPipeline`], and [`StableVideoDiffusionPipeline`] by skipping the calculation of self-attention and cross-attention once it converges. More details can be found at [technical report](https://huggingface.co/papers/2404.02747).\n\n![](https://github.com/user-attachments/assets/44805d66-e504-4de4-837d-d027fb3f566b)\n\n\n## \ud83d\ude80 Major Features\n\n* Training-Free.\n* Easily Integrate into [Diffusers](https://github.com/huggingface/diffusers/tree/main).\n* Only a few lines of code are required.\n* Complementary to [DeepCache](https://github.com/horseee/DeepCache).\n* Friendly support [Stable Diffusion pipelines](https://huggingface.co/stabilityai), [PixArt](https://pixart-alpha.github.io/), and [Latent Consistency Models](https://latent-consistency-models.github.io/).\n* 10%-50% speed up for different models. \n\n## \ud83d\udcd6 Quick Start\n\n### \ud83d\udee0\ufe0f Installation\n\nStart by installing [TGATE](https://github.com/HaozheLiu-ST/T-GATE/tree/releases):\n\n```bash\npip install tgate\n```\n\n#### Requirements\n\n* pytorch>=2.0.0\n* diffusers>=0.29.0\n* DeepCache==0.1.1\n* transformers\n* accelerate\n\n### \ud83c\udf1f Usage\n\nAccelerate `PixArtAlphaPipeline` with TGATE:\n\n```diff\nimport torch\nfrom diffusers import PixArtAlphaPipeline\n\npipe = PixArtAlphaPipeline.from_pretrained(\n    \"PixArt-alpha/PixArt-XL-2-1024-MS\", \n    torch_dtype=torch.float16,\n)\n\n+ from tgate import TgatePixArtAlphaLoader\n+ gate_step = 15\n+ sp_interval = 3\n+ fi_interval = 1\n+ warm_up = 2\n+ inference_step = 25\n+ pipe = TgatePixArtAlphaLoader(pipe).to(\"cuda\")\n\n+ image = pipe.tgate(\n+         \"An alpaca made of colorful building blocks, cyberpunk.\",\n+         gate_step=gate_step,\n+         sp_interval=sp_interval,\n+         fi_interval=fi_interval,\n+         warm_up=warm_up,   \n+         num_inference_steps=inference_step,\n+ ).images[0]\n```\n\nAccelerate `PixArtSigmaPipeline` with TGATE:\n\n```diff\nimport torch\nfrom diffusers import PixArtSigmaPipeline\n\npipe = PixArtSigmaPipeline.from_pretrained(\n    \"PixArt-alpha/PixArt-Sigma-XL-2-1024-MS\", \n    torch_dtype=torch.float16,\n)\n\n+ from tgate import TgatePixArtSigmaLoader\n+ gate_step = 15\n+ sp_interval = 3\n+ fi_interval = 1\n+ warm_up = 2\n+ inference_step = 25\n+ pipe = TgatePixArtSigmaLoader(pipe).to(\"cuda\")\n\n+ image = pipe.tgate(\n+         \"an astronaut sitting in a diner, eating fries, cinematic, analog film.\",\n+         gate_step=gate_step,\n+         sp_interval=sp_interval,\n+         fi_interval=fi_interval,\n+         warm_up=warm_up,   \n+         num_inference_steps=inference_step,\n+ ).images[0]\n```\n\nAccelerate `StableDiffusionXLPipeline` with TGATE:\n\n```diff\nimport torch\nfrom diffusers import StableDiffusionXLPipeline\nfrom diffusers import DPMSolverMultistepScheduler\n\npipe = StableDiffusionXLPipeline.from_pretrained(\n    \"stabilityai/stable-diffusion-xl-base-1.0\",\n    torch_dtype=torch.float16,\n    variant=\"fp16\",\n    use_safetensors=True,\n)\n\n+ from tgate import TgateSDXLLoader\n+ gate_step = 10\n+ sp_interval = 5\n+ fi_interval = 1\n+ warm_up = 2\n+ inference_step = 25\n+ pipe = TgateSDXLLoader(pipe).to(\"cuda\")\n\npipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)\n\n+ image = pipe.tgate(\n+         \"Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.\",\n+         gate_step=gate_step,\n+         sp_interval=sp_interval,\n+         fi_interval=fi_interval,\n+         warm_up=warm_up,  \n+         num_inference_steps=inference_step\n+ ).images[0]\n```\n\nAccelerate `StableDiffusionXLPipeline` with [DeepCache](https://github.com/horseee/DeepCache) and TGATE:\n\n```diff\nimport torch\nfrom diffusers import StableDiffusionXLPipeline\nfrom diffusers import DPMSolverMultistepScheduler\n\npipe = StableDiffusionXLPipeline.from_pretrained(\n    \"stabilityai/stable-diffusion-xl-base-1.0\",\n    torch_dtype=torch.float16,\n    variant=\"fp16\",\n    use_safetensors=True,\n)\n\n+ from tgate import TgateSDXLDeepCacheLoader\n+ gate_step = 10\n+ sp_interval = 1\n+ fi_interval = 1\n+ warm_up = 0\n+ inference_step = 25\n+ pipe = TgateSDXLDeepCacheLoader(\n+        pipe,\n+        cache_interval=3,\n+        cache_branch_id=0,\n+ ).to(\"cuda\")\n\npipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)\n\n\n+ image = pipe.tgate(\n+         \"Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.\",\n+         gate_step=gate_step,\n+         sp_interval=sp_interval,\n+         fi_interval=fi_interval,\n+         warm_up=warm_up,  \n+         num_inference_steps=inference_step\n+ ).images[0]\n```\n\nAccelerate `latent-consistency/lcm-sdxl` with TGATE:\n\n```diff\nimport torch\nfrom diffusers import StableDiffusionXLPipeline\nfrom diffusers import UNet2DConditionModel, LCMScheduler\nfrom diffusers import DPMSolverMultistepScheduler\n\nunet = UNet2DConditionModel.from_pretrained(\n    \"latent-consistency/lcm-sdxl\",\n    torch_dtype=torch.float16,\n    variant=\"fp16\",\n)\npipe = StableDiffusionXLPipeline.from_pretrained(\n    \"stabilityai/stable-diffusion-xl-base-1.0\",\n    unet=unet,\n    torch_dtype=torch.float16,\n    variant=\"fp16\",\n)\npipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)\n\n+ from tgate import TgateSDXLLoader\n+ gate_step = 1\n+ sp_interval = 1\n+ fi_interval = 1\n+ warm_up = 0\n+ inference_step = 4\n+ pipe = TgateSDXLLoader(pipe,lcm=True).to(\"cuda\")\n\n+ image = pipe.tgate(\n+         \"Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.\",\n+         gate_step=gate_step,\n+         sp_interval=sp_interval,\n+         fi_interval=fi_interval,\n+         warm_up=warm_up,  \n+         num_inference_steps=inference_step,\n+ ).images[0]\n```\n\nTGATE also supports `StableDiffusionPipeline`, `PixArt-alpha/PixArt-LCM-XL-2-1024-MS`, and `StableVideoDiffusionPipeline`.\nMore details can be found at [here](https://github.com/HaozheLiu-ST/T-GATE/tree/releases/main.py).\n\n## \ud83d\udcc4 Results\n| Model                 | MACs     | Latency | Zero-shot 10K-FID on MS-COCO |\n|-----------------------|----------|---------|---------------------------|\n| SD-XL                 | 149.438T | 53.187s | 24.164                    |\n| SD-XL w/ TGATE        | 95.988T  | 31.643s | 22.917                    |\n| Pixart-Alpha          | 107.031T | 61.502s | 37.983                    |\n| Pixart-Alpha w/ TGATE | 73.971T  | 36.650s | 36.390                    |\n| Pixart-Sigma          | 107.766T | 60.467s | 34.278                    |\n| Pixart-Sigma w/ TGATE | 74.420T  | 36.449s | 32.927                    |\n| DeepCache (SD-XL)     | 57.888T  | 19.931s | 25.678                    |\n| DeepCache w/ TGATE    | 43.868T  | 14.666s | 24.511                    |\n| LCM (SD-XL)           | 11.955T  | 3.805s  | 26.357                    |\n| LCM w/ TGATE          | 11.171T  | 3.533s  | 26.902                    |\n| LCM (Pixart-Alpha)    | 8.563T   | 4.733s  | 35.989                    |\n| LCM w/ TGATE          | 7.623T   | 4.543s  | 35.843                    |\n\nThe FID is computed on [captions](https://github.com/HaozheLiu-ST/T-GATE/files/15369063/idx_caption.txt) by [PytorchFID](https://github.com/mseitzer/pytorch-fid).\n\nThe latency is tested on a 1080ti commercial card and diffusers [v0.28.2](https://github.com/huggingface/diffusers/tree/v0.28.2). \n\nThe MACs and Params are calculated by [calflops](https://github.com/MrYxJ/calculate-flops.pytorch). \n\n## Citation\nIf you find our work inspiring or use our codebase in your research, please consider giving a star \u2b50 and a citation.\n```\n@article{tgate_v2,\n  title={Faster Diffusion via Temporal Attention Decomposition},\n  author={Liu, Haozhe and Zhang, Wentian and Xie, Jinheng and Faccio, Francesco and Xu, Mengmeng and Xiang, Tao and Shou, Mike Zheng and Perez-Rua, Juan-Manuel and Schmidhuber, J{\\\"u}rgen},\n  journal={arXiv preprint arXiv:2404.02747},\n  year={2024}\n}\n```\n\n",
    "bugtrack_url": null,
    "license": null,
    "summary": "TGATE-V2: Faster Diffusion Through Temporal Attention Decomposition.",
    "version": "1.0.0",
    "project_urls": {
        "Homepage": "https://github.com/HaozheLiu-ST/T-GATE/tree/releases/"
    },
    "split_keywords": [],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "ac41209f857c76f8cb9ea914fd3138eb7ab505b6a9feeda553879a6ec652521c",
                "md5": "c46034340f8a40171c7c0c6c21108c0e",
                "sha256": "bb1e9b324442c025bf9e9fa47a508730229ad6b08906fcddfb690e977425a6dd"
            },
            "downloads": -1,
            "filename": "tgate-1.0.0-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "c46034340f8a40171c7c0c6c21108c0e",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": ">=3.8.0",
            "size": 47123,
            "upload_time": "2024-08-30T13:17:26",
            "upload_time_iso_8601": "2024-08-30T13:17:26.345511Z",
            "url": "https://files.pythonhosted.org/packages/ac/41/209f857c76f8cb9ea914fd3138eb7ab505b6a9feeda553879a6ec652521c/tgate-1.0.0-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "9d3047414493922e6008058b396bc8d6e4e1d130835bac07aff06ce403d4acb2",
                "md5": "10a1ce982212bea248fe547a33275052",
                "sha256": "f7e17d924f7c610802c83aace385c8fc48d6d48607c85dbd08ab1d66fe86deed"
            },
            "downloads": -1,
            "filename": "tgate-1.0.0.tar.gz",
            "has_sig": false,
            "md5_digest": "10a1ce982212bea248fe547a33275052",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": ">=3.8.0",
            "size": 24388,
            "upload_time": "2024-08-30T13:17:27",
            "upload_time_iso_8601": "2024-08-30T13:17:27.617885Z",
            "url": "https://files.pythonhosted.org/packages/9d/30/47414493922e6008058b396bc8d6e4e1d130835bac07aff06ce403d4acb2/tgate-1.0.0.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2024-08-30 13:17:27",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "github_user": "HaozheLiu-ST",
    "github_project": "T-GATE",
    "travis_ci": false,
    "coveralls": false,
    "github_actions": false,
    "lcname": "tgate"
}
        
Elapsed time: 0.59111s