Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Combining community pipeline for image generation #7945

Open
Fqlox opened this issue May 14, 2024 · 3 comments
Open

Combining community pipeline for image generation #7945

Fqlox opened this issue May 14, 2024 · 3 comments
Labels
bug Something isn't working

Comments

@Fqlox
Copy link

Fqlox commented May 14, 2024

Describe the bug

I cannot use both stable diffusion XL reference and Instant ID in the same pipeline. I get 'FrozenDict' object has no attribute 'block_out_channels'"

Reproduction

from stable_diffusion_xl_reference import StableDiffusionXLReferencePipeline
from pipeline_stable_diffusion_xl_instantid import StableDiffusionXLInstantIDPipeline, draw_kps

controlnet_path = f'path/to/instant/id'

# load IdentityNet
identityNet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)

pipe = StableDiffusionXLReferencePipeline.from_pretrained(
    "../path/to/model",
    torch_dtype=torch.float16,
    #use_safetensors=True,
    variant="fp16").to('cuda')

pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)

pipe_instant = StableDiffusionXLInstantIDPipeline(
    pipe,
    #vae = pipe.vae, # I tried both witout and with the VAE
    text_encoder = pipe.text_encoder,
    text_encoder_2 = pipe.text_encoder_2,
    tokenizer = pipe.tokenizer,
    tokenizer_2 = pipe.tokenizer_2,
    unet = pipe.unet,
    scheduler = pipe.scheduler,
    feature_extractor = pipe.feature_extractor,
    controlnet= [identityNet],
)

Logs

{
	"name": "AttributeError",
	"message": "'FrozenDict' object has no attribute 'block_out_channels'",
	"stack": "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mAttributeError\u001b[0m                            Traceback (most recent call last)\nCell \u001b[1;32mIn[3], line 28\u001b[0m\n\u001b[0;32m     20\u001b[0m pipe \u001b[38;5;241m=\u001b[39m StableDiffusionXLReferencePipeline\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m     21\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m../models/StableDiffusion/RealvisXLv40_lightning\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m     22\u001b[0m     torch_dtype\u001b[38;5;241m=\u001b[39mtorch\u001b[38;5;241m.\u001b[39mfloat16,\n\u001b[0;32m     23\u001b[0m     \u001b[38;5;66;03m#use_safetensors=True,\u001b[39;00m\n\u001b[0;32m     24\u001b[0m     variant\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfp16\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcuda\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m     26\u001b[0m pipe\u001b[38;5;241m.\u001b[39mscheduler \u001b[38;5;241m=\u001b[39m UniPCMultistepScheduler\u001b[38;5;241m.\u001b[39mfrom_config(pipe\u001b[38;5;241m.\u001b[39mscheduler\u001b[38;5;241m.\u001b[39mconfig)\n\u001b[1;32m---> 28\u001b[0m pipe_instant \u001b[38;5;241m=\u001b[39m \u001b[43mStableDiffusionXLInstantIDPipeline\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m     29\u001b[0m \u001b[43m    \u001b[49m\u001b[43mpipe\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     30\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;66;43;03m#vae = pipe.vae, \u001b[39;49;00m\n\u001b[0;32m     31\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtext_encoder\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mpipe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtext_encoder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     32\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtext_encoder_2\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mpipe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtext_encoder_2\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     33\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtokenizer\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mpipe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtokenizer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     34\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtokenizer_2\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mpipe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtokenizer_2\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     35\u001b[0m \u001b[43m    \u001b[49m\u001b[43munet\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mpipe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43munet\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     36\u001b[0m \u001b[43m    \u001b[49m\u001b[43mscheduler\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mpipe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mscheduler\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     37\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;66;43;03m#safety_checker = pipe.safety_checker,\u001b[39;49;00m\n\u001b[0;32m     38\u001b[0m \u001b[43m    \u001b[49m\u001b[43mfeature_extractor\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mpipe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfeature_extractor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     39\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcontrolnet\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43midentityNet\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     40\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;66;43;03m#torch_dtype=torch.float16\u001b[39;49;00m\n\u001b[0;32m     41\u001b[0m \u001b[43m)\u001b[49m\n\u001b[0;32m     44\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m     45\u001b[0m \u001b[38;5;124;03mresult_img = pipe_instant(ref_image=input_image,\u001b[39;00m\n\u001b[0;32m     46\u001b[0m \u001b[38;5;124;03m                prompt=\"1girl\",\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     55\u001b[0m \u001b[38;5;124;03mresult_img.show()\u001b[39;00m\n\u001b[0;32m     56\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\nFile \u001b[1;32me:\\conda\\envs\\rayban\\lib\\site-packages\\diffusers\\pipelines\\controlnet\\pipeline_controlnet_sd_xl.py:211\u001b[0m, in \u001b[0;36mStableDiffusionXLControlNetPipeline.__init__\u001b[1;34m(self, vae, text_encoder, text_encoder_2, tokenizer, tokenizer_2, unet, controlnet, scheduler, force_zeros_for_empty_prompt, add_watermarker, feature_extractor, image_encoder)\u001b[0m\n\u001b[0;32m    197\u001b[0m     controlnet \u001b[38;5;241m=\u001b[39m MultiControlNetModel(controlnet)\n\u001b[0;32m    199\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mregister_modules(\n\u001b[0;32m    200\u001b[0m     vae\u001b[38;5;241m=\u001b[39mvae,\n\u001b[0;32m    201\u001b[0m     text_encoder\u001b[38;5;241m=\u001b[39mtext_encoder,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    209\u001b[0m     image_encoder\u001b[38;5;241m=\u001b[39mimage_encoder,\n\u001b[0;32m    210\u001b[0m )\n\u001b[1;32m--> 211\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvae_scale_factor \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m2\u001b[39m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m (\u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvae\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mblock_out_channels\u001b[49m) \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m    212\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mimage_processor \u001b[38;5;241m=\u001b[39m VaeImageProcessor(vae_scale_factor\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvae_scale_factor, do_convert_rgb\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m    213\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol_image_processor \u001b[38;5;241m=\u001b[39m VaeImageProcessor(\n\u001b[0;32m    214\u001b[0m     vae_scale_factor\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvae_scale_factor, do_convert_rgb\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, do_normalize\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m    215\u001b[0m )\n\n\u001b[1;31mAttributeError\u001b[0m: 'FrozenDict' object has no attribute 'block_out_channels'"
}

System Info

  • diffusers version: 0.25.0
  • Platform: Windows-10-10.0.19045-SP0
  • Python version: 3.10.14
  • PyTorch version (GPU?): 2.2.2 (True)
  • Huggingface_hub version: 0.22.2
  • Transformers version: 4.36.2
  • Accelerate version: 0.29.2
  • xFormers version: not installed
  • Using GPU in script?: yes
  • Using distributed or parallel set-up in script?: no

Who can help?

@yiyixuxu @sayakpaul @DN6 @stevhliu

@Fqlox Fqlox added the bug Something isn't working label May 14, 2024
@tolgacangoz
Copy link
Contributor

tolgacangoz commented May 15, 2024

I installed diffusers from the source and this seems to work:

import torch
from diffusers import DiffusionPipeline, ControlNetModel
from diffusers import UniPCMultistepScheduler


controlnet_path = f'path/to/instant/id'

# load IdentityNet
identityNet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)

pipe = DiffusionPipeline.from_pretrained(
     "stabilityai/stable-diffusion-xl-base-1.0",
     custom_pipeline="stable_diffusion_xl_reference",
     torch_dtype=torch.float16,
     use_safetensors=True,
     variant="fp16").to('cuda')

pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
pipe_instant = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    custom_pipeline="pipeline_stable_diffusion_xl_instantid",
    #vae = pipe.vae, # I tried both witout and with the VAE
    text_encoder = pipe.text_encoder,
    text_encoder_2 = pipe.text_encoder_2,
    tokenizer = pipe.tokenizer,
    tokenizer_2 = pipe.tokenizer_2,
    unet = pipe.unet,
    scheduler = pipe.scheduler,
    feature_extractor = pipe.feature_extractor,
    controlnet = identityNet,
)

@yiyixuxu
Copy link
Collaborator

you can create your instant ID pipeline from SDXL reference pipeline with this script

import torch
from diffusers import DiffusionPipeline, ControlNetModel
from diffusers import UniPCMultistepScheduler


# load IdentityNet
identityNet = ControlNetModel.from_pretrained("InstantX/InstantID", subfolder ="ControlNetModel", torch_dtype=torch.float16)

pipe = DiffusionPipeline.from_pretrained(
     "stabilityai/stable-diffusion-xl-base-1.0",
     custom_pipeline="stable_diffusion_xl_reference",
     torch_dtype=torch.float16,
     use_safetensors=True,
     variant="fp16").to('cuda')

pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
pipe_instant = DiffusionPipeline.from_pipe(
    pipe,
    custom_pipeline="pipeline_stable_diffusion_xl_instantid",
    controlnet = identityNet,
)

@Fqlox
Copy link
Author

Fqlox commented May 16, 2024

@standardAI I did install diffuser from source then I had to downgrade due to an error on instantID. When I generate using both reference and instantId arguments, and with only instant id argument. It does not impact the generation.

image_plus_ref = pipe_instant(
    prompt,
    negative_prompt=negative_prompt,
    num_inference_steps=4,
    guidance_scale=1.2,
    image_proj_model_in_features=face_emb,
    image_embeds=face_emb,
    image=face_kps,
    controlnet_conditioning_scale=0.8,
    seed = 42,
    reference_attn=True,
    reference_adain=True,
    ref_image = ref_image
).images[0]

And

image = pipe_instant(
    prompt,
    negative_prompt=negative_prompt,
    num_inference_steps=4,
    guidance_scale=1.2,
    image_proj_model_in_features=face_emb,
    image_embeds=face_emb,
    image=face_kps,
    controlnet_conditioning_scale=0.8,
    seed = 42,
).images[0]

[note that I use lightning diffuser model]

@yiyixuxu since I'm on diffusers==0.26.3 the method from_pipe does not seems to work.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

No branches or pull requests

3 participants