added grid_size parameter to generate a grid of images

This commit is contained in:
Brett Kuprel
2022-07-02 08:45:49 -04:00
parent e0386f991c
commit 1eb56737d8
6 changed files with 87 additions and 69 deletions
+25 -10
View File
@@ -1,4 +1,5 @@
import os
from re import I
from PIL import Image
import numpy
from torch import LongTensor
@@ -28,7 +29,6 @@ class MinDalle:
self.is_reusable = is_reusable
self.is_verbose = is_verbose
self.sample_token_count = sample_token_count
self.batch_count = 2
self.text_token_count = 64
self.image_token_count = 256
self.layer_count = 24 if is_mega else 12
@@ -128,8 +128,7 @@ class MinDalle:
embed_count = self.embed_count,
glu_embed_count = self.glu_embed_count,
layer_count = self.layer_count,
start_token = self.image_vocab_count,
batch_count = self.batch_count
start_token = self.image_vocab_count
)
params = torch.load(self.decoder_params_path)
self.decoder.load_state_dict(params, strict=False)
@@ -148,7 +147,12 @@ class MinDalle:
if torch.cuda.is_available(): self.detokenizer = self.detokenizer.cuda()
def generate_image_tokens(self, text: str, seed: int) -> LongTensor:
def generate_image_tokens(
self,
text: str,
seed: int,
image_count: int
) -> LongTensor:
if self.is_verbose: print("tokenizing text")
tokens = self.tokenizer.tokenize(text)
if self.is_verbose: print("text tokens", tokens)
@@ -166,18 +170,29 @@ class MinDalle:
if not self.is_reusable: self.init_decoder()
if self.is_verbose: print("sampling image tokens")
if seed < 0: seed = random.randint(0, 2 ** 31)
torch.manual_seed(seed)
image_tokens = self.decoder.forward(text_tokens, encoder_state)
if seed > 0: torch.manual_seed(seed)
image_tokens = self.decoder.forward(
image_count,
text_tokens,
encoder_state
)
if not self.is_reusable: del self.decoder
return image_tokens
def generate_image(self, text: str, seed: int) -> Image.Image:
image_tokens = self.generate_image_tokens(text, seed)
def generate_image(
self,
text: str,
seed: int = -1,
grid_size: int = 1
) -> Image.Image:
image_count = grid_size ** 2
image_tokens = self.generate_image_tokens(text, seed, image_count)
if not self.is_reusable: self.init_detokenizer()
if self.is_verbose: print("detokenizing image")
image = self.detokenizer.forward(image_tokens).to(torch.uint8)
images = self.detokenizer.forward(image_tokens).to(torch.uint8)
if not self.is_reusable: del self.detokenizer
images = images.reshape([grid_size] * 2 + list(images.shape[1:]))
image = images.flatten(1, 2).transpose(0, 1).flatten(1, 2)
image = Image.fromarray(image.to('cpu').detach().numpy())
return image