Skip to content

Commit 9cc12c8

Browse files
committedAug 14, 2023
CLIPVisionEncode can now encode multiple images.
1 parent 0cb6dac commit 9cc12c8

File tree

3 files changed

+12
-12
lines changed

3 files changed

+12
-12
lines changed
 

‎comfy/clip_vision.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ def load_sd(self, sd):
2424
return self.model.load_state_dict(sd, strict=False)
2525

2626
def encode_image(self, image):
27-
img = torch.clip((255. * image[0]), 0, 255).round().int()
28-
inputs = self.processor(images=[img], return_tensors="pt")
27+
img = torch.clip((255. * image), 0, 255).round().int()
28+
inputs = self.processor(images=img, return_tensors="pt")
2929
outputs = self.model(**inputs)
3030
return outputs
3131

‎comfy/model_base.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -120,15 +120,15 @@ def encode_adm(self, **kwargs):
120120
weights = []
121121
noise_aug = []
122122
for unclip_cond in unclip_conditioning:
123-
adm_cond = unclip_cond["clip_vision_output"].image_embeds
124-
weight = unclip_cond["strength"]
125-
noise_augment = unclip_cond["noise_augmentation"]
126-
noise_level = round((self.noise_augmentor.max_noise_level - 1) * noise_augment)
127-
c_adm, noise_level_emb = self.noise_augmentor(adm_cond.to(device), noise_level=torch.tensor([noise_level], device=device))
128-
adm_out = torch.cat((c_adm, noise_level_emb), 1) * weight
129-
weights.append(weight)
130-
noise_aug.append(noise_augment)
131-
adm_inputs.append(adm_out)
123+
for adm_cond in unclip_cond["clip_vision_output"].image_embeds:
124+
weight = unclip_cond["strength"]
125+
noise_augment = unclip_cond["noise_augmentation"]
126+
noise_level = round((self.noise_augmentor.max_noise_level - 1) * noise_augment)
127+
c_adm, noise_level_emb = self.noise_augmentor(adm_cond.to(device), noise_level=torch.tensor([noise_level], device=device))
128+
adm_out = torch.cat((c_adm, noise_level_emb), 1) * weight
129+
weights.append(weight)
130+
noise_aug.append(noise_augment)
131+
adm_inputs.append(adm_out)
132132

133133
if len(noise_aug) > 1:
134134
adm_out = torch.stack(adm_inputs).sum(0)

‎nodes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -771,7 +771,7 @@ def INPUT_TYPES(s):
771771
CATEGORY = "conditioning/style_model"
772772

773773
def apply_stylemodel(self, clip_vision_output, style_model, conditioning):
774-
cond = style_model.get_cond(clip_vision_output)
774+
cond = style_model.get_cond(clip_vision_output).flatten(start_dim=0, end_dim=1).unsqueeze(dim=0)
775775
c = []
776776
for t in conditioning:
777777
n = [torch.cat((t[0], cond), dim=1), t[1].copy()]

0 commit comments

Comments
 (0)
Please sign in to comment.