Combining community pipeline for image generation #7945

Fqlox · 2024-05-14T11:36:59Z

Describe the bug

I cannot use both stable diffusion XL reference and Instant ID in the same pipeline. I get 'FrozenDict' object has no attribute 'block_out_channels'"

Reproduction

from stable_diffusion_xl_reference import StableDiffusionXLReferencePipeline
from pipeline_stable_diffusion_xl_instantid import StableDiffusionXLInstantIDPipeline, draw_kps

controlnet_path = f'path/to/instant/id'

# load IdentityNet
identityNet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)

pipe = StableDiffusionXLReferencePipeline.from_pretrained(
    "../path/to/model",
    torch_dtype=torch.float16,
    #use_safetensors=True,
    variant="fp16").to('cuda')

pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)

pipe_instant = StableDiffusionXLInstantIDPipeline(
    pipe,
    #vae = pipe.vae, # I tried both witout and with the VAE
    text_encoder = pipe.text_encoder,
    text_encoder_2 = pipe.text_encoder_2,
    tokenizer = pipe.tokenizer,
    tokenizer_2 = pipe.tokenizer_2,
    unet = pipe.unet,
    scheduler = pipe.scheduler,
    feature_extractor = pipe.feature_extractor,
    controlnet= [identityNet],
)

Logs

{
	"name": "AttributeError",
	"message": "'FrozenDict' object has no attribute 'block_out_channels'",
	"stack": "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mAttributeError\u001b[0m                            Traceback (most recent call last)\nCell \u001b[1;32mIn[3], line 28\u001b[0m\n\u001b[0;32m     20\u001b[0m pipe \u001b[38;5;241m=\u001b[39m StableDiffusionXLReferencePipeline\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m     21\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m../models/StableDiffusion/RealvisXLv40_lightning\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m     22\u001b[0m     torch_dtype\u001b[38;5;241m=\u001b[39mtorch\u001b[38;5;241m.\u001b[39mfloat16,\n\u001b[0;32m     23\u001b[0m     \u001b[38;5;66;03m#use_safetensors=True,\u001b[39;00m\n\u001b[0;32m     24\u001b[0m     variant\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfp16\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcuda\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m     26\u001b[0m pipe\u001b[38;5;241m.\u001b[39mscheduler \u001b[38;5;241m=\u001b[39m UniPCMultistepScheduler\u001b[38;5;241m.\u001b[39mfrom_config(pipe\u001b[38;5;241m.\u001b[39mscheduler\u001b[38;5;241m.\u001b[39mconfig)\n\u001b[1;32m---> 28\u001b[0m pipe_instant \u001b[38;5;241m=\u001b[39m \u001b[43mStableDiffusionXLInstantIDPipeline\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m     29\u001b[0m \u001b[43m    \u001b[49m\u001b[43mpipe\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     30\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;66;43;03m#vae = pipe.vae, \u001b[39;49;00m\n\u001b[0;32m     31\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtext_encoder\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mpipe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtext_encoder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     32\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtext_encoder_2\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mpipe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtext_encoder_2\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     33\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtokenizer\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mpipe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtokenizer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     34\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtokenizer_2\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mpipe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtokenizer_2\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     35\u001b[0m \u001b[43m    \u001b[49m\u001b[43munet\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mpipe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43munet\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     36\u001b[0m \u001b[43m    \u001b[49m\u001b[43mscheduler\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mpipe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mscheduler\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     37\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;66;43;03m#safety_checker = pipe.safety_checker,\u001b[39;49;00m\n\u001b[0;32m     38\u001b[0m \u001b[43m    \u001b[49m\u001b[43mfeature_extractor\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mpipe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfeature_extractor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     39\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcontrolnet\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43midentityNet\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     40\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;66;43;03m#torch_dtype=torch.float16\u001b[39;49;00m\n\u001b[0;32m     41\u001b[0m \u001b[43m)\u001b[49m\n\u001b[0;32m     44\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m     45\u001b[0m \u001b[38;5;124;03mresult_img = pipe_instant(ref_image=input_image,\u001b[39;00m\n\u001b[0;32m     46\u001b[0m \u001b[38;5;124;03m                prompt=\"1girl\",\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     55\u001b[0m \u001b[38;5;124;03mresult_img.show()\u001b[39;00m\n\u001b[0;32m     56\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\nFile \u001b[1;32me:\\conda\\envs\\rayban\\lib\\site-packages\\diffusers\\pipelines\\controlnet\\pipeline_controlnet_sd_xl.py:211\u001b[0m, in \u001b[0;36mStableDiffusionXLControlNetPipeline.__init__\u001b[1;34m(self, vae, text_encoder, text_encoder_2, tokenizer, tokenizer_2, unet, controlnet, scheduler, force_zeros_for_empty_prompt, add_watermarker, feature_extractor, image_encoder)\u001b[0m\n\u001b[0;32m    197\u001b[0m     controlnet \u001b[38;5;241m=\u001b[39m MultiControlNetModel(controlnet)\n\u001b[0;32m    199\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mregister_modules(\n\u001b[0;32m    200\u001b[0m     vae\u001b[38;5;241m=\u001b[39mvae,\n\u001b[0;32m    201\u001b[0m     text_encoder\u001b[38;5;241m=\u001b[39mtext_encoder,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    209\u001b[0m     image_encoder\u001b[38;5;241m=\u001b[39mimage_encoder,\n\u001b[0;32m    210\u001b[0m )\n\u001b[1;32m--> 211\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvae_scale_factor \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m2\u001b[39m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m (\u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvae\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mblock_out_channels\u001b[49m) \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m    212\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mimage_processor \u001b[38;5;241m=\u001b[39m VaeImageProcessor(vae_scale_factor\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvae_scale_factor, do_convert_rgb\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m    213\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol_image_processor \u001b[38;5;241m=\u001b[39m VaeImageProcessor(\n\u001b[0;32m    214\u001b[0m     vae_scale_factor\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvae_scale_factor, do_convert_rgb\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, do_normalize\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m    215\u001b[0m )\n\n\u001b[1;31mAttributeError\u001b[0m: 'FrozenDict' object has no attribute 'block_out_channels'"
}

System Info

diffusers version: 0.25.0
Platform: Windows-10-10.0.19045-SP0
Python version: 3.10.14
PyTorch version (GPU?): 2.2.2 (True)
Huggingface_hub version: 0.22.2
Transformers version: 4.36.2
Accelerate version: 0.29.2
xFormers version: not installed
Using GPU in script?: yes
Using distributed or parallel set-up in script?: no

Who can help?

@yiyixuxu @sayakpaul @DN6 @stevhliu

The text was updated successfully, but these errors were encountered:

tolgacangoz · 2024-05-15T08:35:12Z

I installed diffusers from the source and this seems to work:

import torch
from diffusers import DiffusionPipeline, ControlNetModel
from diffusers import UniPCMultistepScheduler


controlnet_path = f'path/to/instant/id'

# load IdentityNet
identityNet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)

pipe = DiffusionPipeline.from_pretrained(
     "stabilityai/stable-diffusion-xl-base-1.0",
     custom_pipeline="stable_diffusion_xl_reference",
     torch_dtype=torch.float16,
     use_safetensors=True,
     variant="fp16").to('cuda')

pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
pipe_instant = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    custom_pipeline="pipeline_stable_diffusion_xl_instantid",
    #vae = pipe.vae, # I tried both witout and with the VAE
    text_encoder = pipe.text_encoder,
    text_encoder_2 = pipe.text_encoder_2,
    tokenizer = pipe.tokenizer,
    tokenizer_2 = pipe.tokenizer_2,
    unet = pipe.unet,
    scheduler = pipe.scheduler,
    feature_extractor = pipe.feature_extractor,
    controlnet = identityNet,
)

yiyixuxu · 2024-05-16T03:09:28Z

you can create your instant ID pipeline from SDXL reference pipeline with this script

import torch
from diffusers import DiffusionPipeline, ControlNetModel
from diffusers import UniPCMultistepScheduler


# load IdentityNet
identityNet = ControlNetModel.from_pretrained("InstantX/InstantID", subfolder ="ControlNetModel", torch_dtype=torch.float16)

pipe = DiffusionPipeline.from_pretrained(
     "stabilityai/stable-diffusion-xl-base-1.0",
     custom_pipeline="stable_diffusion_xl_reference",
     torch_dtype=torch.float16,
     use_safetensors=True,
     variant="fp16").to('cuda')

pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
pipe_instant = DiffusionPipeline.from_pipe(
    pipe,
    custom_pipeline="pipeline_stable_diffusion_xl_instantid",
    controlnet = identityNet,
)

Fqlox · 2024-05-16T13:50:23Z

@standardAI I did install diffuser from source then I had to downgrade due to an error on instantID. When I generate using both reference and instantId arguments, and with only instant id argument. It does not impact the generation.

image_plus_ref = pipe_instant(
    prompt,
    negative_prompt=negative_prompt,
    num_inference_steps=4,
    guidance_scale=1.2,
    image_proj_model_in_features=face_emb,
    image_embeds=face_emb,
    image=face_kps,
    controlnet_conditioning_scale=0.8,
    seed = 42,
    reference_attn=True,
    reference_adain=True,
    ref_image = ref_image
).images[0]

And

image = pipe_instant(
    prompt,
    negative_prompt=negative_prompt,
    num_inference_steps=4,
    guidance_scale=1.2,
    image_proj_model_in_features=face_emb,
    image_embeds=face_emb,
    image=face_kps,
    controlnet_conditioning_scale=0.8,
    seed = 42,
).images[0]

[note that I use lightning diffuser model]

@yiyixuxu since I'm on diffusers==0.26.3 the method from_pipe does not seems to work.

Fqlox added the bug Something isn't working label May 14, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Combining community pipeline for image generation #7945

Combining community pipeline for image generation #7945

Fqlox commented May 14, 2024

tolgacangoz commented May 15, 2024 •

edited

yiyixuxu commented May 16, 2024

Fqlox commented May 16, 2024

Combining community pipeline for image generation #7945

Combining community pipeline for image generation #7945

Comments

Fqlox commented May 14, 2024

Describe the bug

Reproduction

Logs

System Info

Who can help?

tolgacangoz commented May 15, 2024 • edited

yiyixuxu commented May 16, 2024

Fqlox commented May 16, 2024

tolgacangoz commented May 15, 2024 •

edited