|
- #!/usr/bin/env python
- # coding=utf-8
- # Copyright 2022 The HuggingFace Inc. team. All rights reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
-
- import argparse
- import hashlib
- import logging
- import math
- import os
- import warnings
- from pathlib import Path
- from typing import Optional
-
- import numpy as np
- import torch
- import torch.nn.functional as F
- import torch.utils.checkpoint
- import safetensors
- import transformers
- from accelerate import Accelerator
- from accelerate.logging import get_logger
- from accelerate.utils import ProjectConfiguration, set_seed
- from huggingface_hub import HfFolder, Repository, create_repo, whoami
- from PIL import Image
- from torch.utils.data import Dataset
- from torchvision import transforms
- from tqdm.auto import tqdm
- from transformers import AutoTokenizer, PretrainedConfig
-
- import diffusers
- from diffusers import (
- AutoencoderKL,
- DDPMScheduler,
- DiffusionPipeline,
- DPMSolverMultistepScheduler,
- UNet2DConditionModel,
- )
- from diffusers import loaders
- from diffusers.loaders import AttnProcsLayers
- from diffusers.optimization import get_scheduler
- from diffusers.utils import check_min_version, is_wandb_available
- from diffusers.utils.import_utils import is_xformers_available
- from diffusers.utils.constants import WEIGHTS_NAME, SAFETENSORS_WEIGHTS_NAME
- from models import LoRACrossAttnProcessor
- loaders.LORA_WEIGHT_NAME = WEIGHTS_NAME
-
-
- # Will error if the minimal version of diffusers is not installed. Remove at your own risks.
- check_min_version("0.13.0.dev0")
-
- logger = get_logger(__name__)
-
-
- def save_model_card(repo_name, images=None, base_model=str, prompt=str, repo_folder=None):
- img_str = ""
- for i, image in enumerate(images):
- image.save(os.path.join(repo_folder, f"image_{i}.png"))
- img_str += f"![img_{i}](./image_{i}.png)\n"
-
- yaml = f"""
- ---
- license: creativeml-openrail-m
- base_model: {base_model}
- instance_prompt: {prompt}
- tags:
- - stable-diffusion
- - stable-diffusion-diffusers
- - text-to-image
- - diffusers
- - lora
- inference: true
- ---
- """
- model_card = f"""
- # LoRA DreamBooth - {repo_name}
- These are LoRA adaption weights for {base_model}. The weights were trained on {prompt} using [DreamBooth](https://dreambooth.github.io/). You can find some example images in the following. \n
- {img_str}
- """
- with open(os.path.join(repo_folder, "README.md"), "w") as f:
- f.write(yaml + model_card)
-
-
- def import_model_class_from_model_name_or_path(pretrained_model_name_or_path: str, revision: str):
- text_encoder_config = PretrainedConfig.from_pretrained(
- pretrained_model_name_or_path,
- subfolder="text_encoder",
- revision=revision,
- )
- model_class = text_encoder_config.architectures[0]
-
- if model_class == "CLIPTextModel":
- from transformers import CLIPTextModel
-
- return CLIPTextModel
- elif model_class == "RobertaSeriesModelWithTransformation":
- from diffusers.pipelines.alt_diffusion.modeling_roberta_series import RobertaSeriesModelWithTransformation
-
- return RobertaSeriesModelWithTransformation
- else:
- raise ValueError(f"{model_class} is not supported.")
-
-
- def parse_args(input_args=None):
- parser = argparse.ArgumentParser(description="Simple example of a training script.")
- parser.add_argument(
- "--pretrained_model_name_or_path",
- type=str,
- default=None,
- required=True,
- help="Path to pretrained model or model identifier from huggingface.co/models.",
- )
- parser.add_argument(
- "--revision",
- type=str,
- default=None,
- required=False,
- help="Revision of pretrained model identifier from huggingface.co/models.",
- )
- parser.add_argument(
- "--tokenizer_name",
- type=str,
- default=None,
- help="Pretrained tokenizer name or path if not the same as model_name",
- )
- parser.add_argument(
- "--instance_data_dir",
- type=str,
- default=None,
- required=True,
- help="A folder containing the training data of instance images.",
- )
- parser.add_argument(
- "--class_data_dir",
- type=str,
- default=None,
- required=False,
- help="A folder containing the training data of class images.",
- )
- parser.add_argument(
- "--instance_prompt",
- type=str,
- default=None,
- required=True,
- help="The prompt with identifier specifying the instance",
- )
- parser.add_argument(
- "--class_prompt",
- type=str,
- default=None,
- help="The prompt to specify images in the same class as provided instance images.",
- )
- parser.add_argument(
- "--validation_prompt",
- type=str,
- default=None,
- help="A prompt that is used during validation to verify that the model is learning.",
- )
- parser.add_argument(
- "--num_validation_images",
- type=int,
- default=16,
- help="Number of images that should be generated during validation with `validation_prompt`.",
- )
- parser.add_argument(
- "--validation_epochs",
- type=int,
- default=50,
- help=(
- "Run dreambooth validation every X epochs. Dreambooth validation consists of running the prompt"
- " `args.validation_prompt` multiple times: `args.num_validation_images`."
- ),
- )
- parser.add_argument(
- "--with_prior_preservation",
- default=False,
- action="store_true",
- help="Flag to add prior preservation loss.",
- )
- parser.add_argument("--prior_loss_weight", type=float, default=1.0, help="The weight of prior preservation loss.")
- parser.add_argument(
- "--num_class_images",
- type=int,
- default=100,
- help=(
- "Minimal class images for prior preservation loss. If there are not enough images already present in"
- " class_data_dir, additional images will be sampled with class_prompt."
- ),
- )
- parser.add_argument(
- "--output_dir",
- type=str,
- default="lora-dreambooth-model",
- help="The output directory where the model predictions and checkpoints will be written.",
- )
- parser.add_argument("--seed", type=int, default=None, help="A seed for reproducible training.")
- parser.add_argument(
- "--resolution",
- type=int,
- default=512,
- help=(
- "The resolution for input images, all the images in the train/validation dataset will be resized to this"
- " resolution"
- ),
- )
- parser.add_argument(
- "--center_crop",
- default=False,
- action="store_true",
- help=(
- "Whether to center crop the input images to the resolution. If not set, the images will be randomly"
- " cropped. The images will be resized to the resolution first before cropping."
- ),
- )
- parser.add_argument(
- "--train_batch_size", type=int, default=4, help="Batch size (per device) for the training dataloader."
- )
- parser.add_argument(
- "--sample_batch_size", type=int, default=4, help="Batch size (per device) for sampling images."
- )
- parser.add_argument("--num_train_epochs", type=int, default=1)
- parser.add_argument(
- "--max_train_steps",
- type=int,
- default=None,
- help="Total number of training steps to perform. If provided, overrides num_train_epochs.",
- )
- parser.add_argument(
- "--checkpointing_steps",
- type=int,
- default=500,
- help=(
- "Save a checkpoint of the training state every X updates. These checkpoints can be used both as final"
- " checkpoints in case they are better than the last checkpoint, and are also suitable for resuming"
- " training using `--resume_from_checkpoint`."
- ),
- )
- parser.add_argument(
- "--checkpoints_total_limit",
- type=int,
- default=None,
- help=(
- "Max number of checkpoints to store. Passed as `total_limit` to the `Accelerator` `ProjectConfiguration`."
- " See Accelerator::save_state https://huggingface.co/docs/accelerate/package_reference/accelerator#accelerate.Accelerator.save_state"
- " for more docs"
- ),
- )
- parser.add_argument(
- "--resume_from_checkpoint",
- type=str,
- default=None,
- help=(
- "Whether training should be resumed from a previous checkpoint. Use a path saved by"
- ' `--checkpointing_steps`, or `"latest"` to automatically select the last available checkpoint.'
- ),
- )
- parser.add_argument(
- "--gradient_accumulation_steps",
- type=int,
- default=1,
- help="Number of updates steps to accumulate before performing a backward/update pass.",
- )
- parser.add_argument(
- "--gradient_checkpointing",
- action="store_true",
- help="Whether or not to use gradient checkpointing to save memory at the expense of slower backward pass.",
- )
- parser.add_argument(
- "--learning_rate",
- type=float,
- default=5e-4,
- help="Initial learning rate (after the potential warmup period) to use.",
- )
- parser.add_argument(
- "--scale_lr",
- action="store_true",
- default=False,
- help="Scale the learning rate by the number of GPUs, gradient accumulation steps, and batch size.",
- )
- parser.add_argument(
- "--lr_scheduler",
- type=str,
- default="constant",
- help=(
- 'The scheduler type to use. Choose between ["linear", "cosine", "cosine_with_restarts", "polynomial",'
- ' "constant", "constant_with_warmup"]'
- ),
- )
- parser.add_argument(
- "--lr_warmup_steps", type=int, default=500, help="Number of steps for the warmup in the lr scheduler."
- )
- parser.add_argument(
- "--lr_num_cycles",
- type=int,
- default=1,
- help="Number of hard resets of the lr in cosine_with_restarts scheduler.",
- )
- parser.add_argument("--lr_power", type=float, default=1.0, help="Power factor of the polynomial scheduler.")
- parser.add_argument(
- "--dataloader_num_workers",
- type=int,
- default=0,
- help=(
- "Number of subprocesses to use for data loading. 0 means that the data will be loaded in the main process."
- ),
- )
- parser.add_argument(
- "--use_8bit_adam", action="store_true", help="Whether or not to use 8-bit Adam from bitsandbytes."
- )
- parser.add_argument("--adam_beta1", type=float, default=0.9, help="The beta1 parameter for the Adam optimizer.")
- parser.add_argument("--adam_beta2", type=float, default=0.999, help="The beta2 parameter for the Adam optimizer.")
- parser.add_argument("--adam_weight_decay", type=float, default=1e-2, help="Weight decay to use.")
- parser.add_argument("--adam_epsilon", type=float, default=1e-08, help="Epsilon value for the Adam optimizer")
- parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.")
- parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.")
- parser.add_argument("--hub_token", type=str, default=None, help="The token to use to push to the Model Hub.")
- parser.add_argument(
- "--hub_model_id",
- type=str,
- default=None,
- help="The name of the repository to keep in sync with the local `output_dir`.",
- )
- parser.add_argument(
- "--logging_dir",
- type=str,
- default="logs",
- help=(
- "[TensorBoard](https://www.tensorflow.org/tensorboard) log directory. Will default to"
- " *output_dir/runs/**CURRENT_DATETIME_HOSTNAME***."
- ),
- )
- parser.add_argument(
- "--allow_tf32",
- action="store_true",
- help=(
- "Whether or not to allow TF32 on Ampere GPUs. Can be used to speed up training. For more information, see"
- " https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices"
- ),
- )
- parser.add_argument(
- "--report_to",
- type=str,
- default="tensorboard",
- help=(
- 'The integration to report the results and logs to. Supported platforms are `"tensorboard"`'
- ' (default), `"wandb"` and `"comet_ml"`. Use `"all"` to report to all integrations.'
- ),
- )
- parser.add_argument(
- "--mixed_precision",
- type=str,
- default=None,
- choices=["no", "fp16", "bf16"],
- help=(
- "Whether to use mixed precision. Choose between fp16 and bf16 (bfloat16). Bf16 requires PyTorch >="
- " 1.10.and an Nvidia Ampere GPU. Default to the value of accelerate config of the current system or the"
- " flag passed with the `accelerate.launch` command. Use this argument to override the accelerate config."
- ),
- )
- parser.add_argument(
- "--prior_generation_precision",
- type=str,
- default=None,
- choices=["no", "fp32", "fp16", "bf16"],
- help=(
- "Choose prior generation precision between fp32, fp16 and bf16 (bfloat16). Bf16 requires PyTorch >="
- " 1.10.and an Nvidia Ampere GPU. Default to fp16 if a GPU is available else fp32."
- ),
- )
- parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank")
- parser.add_argument(
- "--enable_xformers_memory_efficient_attention", action="store_true", help="Whether or not to use xformers."
- )
- parser.add_argument("--lora_rank", type=int, default=4, help="Rank parameter of lora")
-
- if input_args is not None:
- args = parser.parse_args(input_args)
- else:
- args = parser.parse_args()
-
- env_local_rank = int(os.environ.get("LOCAL_RANK", -1))
- if env_local_rank != -1 and env_local_rank != args.local_rank:
- args.local_rank = env_local_rank
-
- if args.with_prior_preservation:
- if args.class_data_dir is None:
- raise ValueError("You must specify a data directory for class images.")
- if args.class_prompt is None:
- raise ValueError("You must specify prompt for class images.")
- else:
- # logger is not available yet
- if args.class_data_dir is not None:
- warnings.warn("You need not use --class_data_dir without --with_prior_preservation.")
- if args.class_prompt is not None:
- warnings.warn("You need not use --class_prompt without --with_prior_preservation.")
-
- return args
-
-
- class DreamBoothDataset(Dataset):
- """
- A dataset to prepare the instance and class images with the prompts for fine-tuning the model.
- It pre-processes the images and the tokenizes prompts.
- """
-
- def __init__(
- self,
- instance_data_root,
- instance_prompt,
- tokenizer,
- class_data_root=None,
- class_prompt=None,
- size=512,
- center_crop=False,
- ):
- self.size = size
- self.center_crop = center_crop
- self.tokenizer = tokenizer
-
- self.instance_data_root = Path(instance_data_root)
- if not self.instance_data_root.exists():
- raise ValueError("Instance images root doesn't exists.")
-
- self.instance_images_path = list(Path(instance_data_root).iterdir())
- self.num_instance_images = len(self.instance_images_path)
- self.instance_prompt = instance_prompt
- self._length = self.num_instance_images
-
- if class_data_root is not None:
- self.class_data_root = Path(class_data_root)
- self.class_data_root.mkdir(parents=True, exist_ok=True)
- self.class_images_path = list(self.class_data_root.iterdir())
- self.num_class_images = len(self.class_images_path)
- self._length = max(self.num_class_images, self.num_instance_images)
- self.class_prompt = class_prompt
- else:
- self.class_data_root = None
-
- self.image_transforms = transforms.Compose(
- [
- transforms.Resize(size, interpolation=transforms.InterpolationMode.BILINEAR),
- transforms.CenterCrop(size) if center_crop else transforms.RandomCrop(size),
- transforms.ToTensor(),
- transforms.Normalize([0.5], [0.5]),
- ]
- )
-
- def __len__(self):
- return self._length
-
- def __getitem__(self, index):
- example = {}
- instance_image = Image.open(self.instance_images_path[index % self.num_instance_images])
- if not instance_image.mode == "RGB":
- instance_image = instance_image.convert("RGB")
- example["instance_images"] = self.image_transforms(instance_image)
- example["instance_prompt_ids"] = self.tokenizer(
- self.instance_prompt,
- truncation=True,
- padding="max_length",
- max_length=self.tokenizer.model_max_length,
- return_tensors="pt",
- ).input_ids
-
- if self.class_data_root:
- class_image = Image.open(self.class_images_path[index % self.num_class_images])
- if not class_image.mode == "RGB":
- class_image = class_image.convert("RGB")
- example["class_images"] = self.image_transforms(class_image)
- example["class_prompt_ids"] = self.tokenizer(
- self.class_prompt,
- truncation=True,
- padding="max_length",
- max_length=self.tokenizer.model_max_length,
- return_tensors="pt",
- ).input_ids
-
- return example
-
-
- def collate_fn(examples, with_prior_preservation=False):
- input_ids = [example["instance_prompt_ids"] for example in examples]
- pixel_values = [example["instance_images"] for example in examples]
-
- # Concat class and instance examples for prior preservation.
- # We do this to avoid doing two forward passes.
- if with_prior_preservation:
- input_ids += [example["class_prompt_ids"] for example in examples]
- pixel_values += [example["class_images"] for example in examples]
-
- pixel_values = torch.stack(pixel_values)
- pixel_values = pixel_values.to(memory_format=torch.contiguous_format).float()
-
- input_ids = torch.cat(input_ids, dim=0)
-
- batch = {
- "input_ids": input_ids,
- "pixel_values": pixel_values,
- }
- return batch
-
-
- class PromptDataset(Dataset):
- "A simple dataset to prepare the prompts to generate class images on multiple GPUs."
-
- def __init__(self, prompt, num_samples):
- self.prompt = prompt
- self.num_samples = num_samples
-
- def __len__(self):
- return self.num_samples
-
- def __getitem__(self, index):
- example = {}
- example["prompt"] = self.prompt
- example["index"] = index
- return example
-
-
- def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None):
- if token is None:
- token = HfFolder.get_token()
- if organization is None:
- username = whoami(token)["name"]
- return f"{username}/{model_id}"
- else:
- return f"{organization}/{model_id}"
-
-
- def main(args):
- logging_dir = Path(args.output_dir, args.logging_dir)
-
- accelerator_project_config = ProjectConfiguration(total_limit=args.checkpoints_total_limit)
-
- accelerator = Accelerator(
- gradient_accumulation_steps=args.gradient_accumulation_steps,
- mixed_precision=args.mixed_precision,
- log_with=args.report_to,
- logging_dir=logging_dir,
- project_config=accelerator_project_config,
- )
-
- if args.report_to == "wandb":
- if not is_wandb_available():
- raise ImportError("Make sure to install wandb if you want to use it for logging during training.")
- import wandb
-
- # Currently, it's not possible to do gradient accumulation when training two models with accelerate.accumulate
- # This will be enabled soon in accelerate. For now, we don't allow gradient accumulation when training two models.
- # TODO (patil-suraj): Remove this check when gradient accumulation with two models is enabled in accelerate.
- # Make one log on every process with the configuration for debugging.
- logging.basicConfig(
- format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
- datefmt="%m/%d/%Y %H:%M:%S",
- level=logging.INFO,
- )
- logger.info(accelerator.state, main_process_only=False)
- if accelerator.is_local_main_process:
- transformers.utils.logging.set_verbosity_warning()
- diffusers.utils.logging.set_verbosity_info()
- else:
- transformers.utils.logging.set_verbosity_error()
- diffusers.utils.logging.set_verbosity_error()
-
- # If passed along, set the training seed now.
- if args.seed is not None:
- set_seed(args.seed)
-
- # Generate class images if prior preservation is enabled.
- if args.with_prior_preservation:
- class_images_dir = Path(args.class_data_dir)
- if not class_images_dir.exists():
- class_images_dir.mkdir(parents=True)
- cur_class_images = len(list(class_images_dir.iterdir()))
-
- if cur_class_images < args.num_class_images:
- torch_dtype = torch.float16 if accelerator.device.type == "cuda" else torch.float32
- if args.prior_generation_precision == "fp32":
- torch_dtype = torch.float32
- elif args.prior_generation_precision == "fp16":
- torch_dtype = torch.float16
- elif args.prior_generation_precision == "bf16":
- torch_dtype = torch.bfloat16
- pipeline = DiffusionPipeline.from_pretrained(
- args.pretrained_model_name_or_path,
- torch_dtype=torch_dtype,
- safety_checker=None,
- revision=args.revision,
- )
- pipeline.set_progress_bar_config(disable=True)
-
- num_new_images = args.num_class_images - cur_class_images
- logger.info(f"Number of class images to sample: {num_new_images}.")
-
- sample_dataset = PromptDataset(args.class_prompt, num_new_images)
- sample_dataloader = torch.utils.data.DataLoader(sample_dataset, batch_size=args.sample_batch_size)
-
- sample_dataloader = accelerator.prepare(sample_dataloader)
- pipeline.to(accelerator.device)
-
- for example in tqdm(
- sample_dataloader, desc="Generating class images", disable=not accelerator.is_local_main_process
- ):
- images = pipeline(example["prompt"]).images
-
- for i, image in enumerate(images):
- hash_image = hashlib.sha1(image.tobytes()).hexdigest()
- image_filename = class_images_dir / f"{example['index'][i] + cur_class_images}-{hash_image}.jpg"
- image.save(image_filename)
-
- del pipeline
- if torch.cuda.is_available():
- torch.cuda.empty_cache()
-
- # Handle the repository creation
- if accelerator.is_main_process:
- if args.push_to_hub:
- if args.hub_model_id is None:
- repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
- else:
- repo_name = args.hub_model_id
-
- create_repo(repo_name, exist_ok=True, token=args.hub_token)
- repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token)
-
- with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
- if "step_*" not in gitignore:
- gitignore.write("step_*\n")
- if "epoch_*" not in gitignore:
- gitignore.write("epoch_*\n")
- elif args.output_dir is not None:
- os.makedirs(args.output_dir, exist_ok=True)
-
- # Load the tokenizer
- if args.tokenizer_name:
- tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name, revision=args.revision, use_fast=False)
- elif args.pretrained_model_name_or_path:
- tokenizer = AutoTokenizer.from_pretrained(
- args.pretrained_model_name_or_path,
- subfolder="tokenizer",
- revision=args.revision,
- use_fast=False,
- )
-
- # import correct text encoder class
- text_encoder_cls = import_model_class_from_model_name_or_path(args.pretrained_model_name_or_path, args.revision)
-
- # Load scheduler and models
- noise_scheduler = DDPMScheduler.from_pretrained(args.pretrained_model_name_or_path, subfolder="scheduler")
- text_encoder = text_encoder_cls.from_pretrained(
- args.pretrained_model_name_or_path, subfolder="text_encoder", revision=args.revision
- )
- vae = AutoencoderKL.from_pretrained(args.pretrained_model_name_or_path, subfolder="vae", revision=args.revision)
- unet = UNet2DConditionModel.from_pretrained(
- args.pretrained_model_name_or_path, subfolder="unet", revision=args.revision
- )
-
- # We only train the additional adapter LoRA layers
- vae.requires_grad_(False)
- text_encoder.requires_grad_(False)
- unet.requires_grad_(False)
-
- # For mixed precision training we cast the text_encoder and vae weights to half-precision
- # as these models are only used for inference, keeping weights in full precision is not required.
- weight_dtype = torch.float32
- if accelerator.mixed_precision == "fp16":
- weight_dtype = torch.float16
- elif accelerator.mixed_precision == "bf16":
- weight_dtype = torch.bfloat16
-
- # Move unet, vae and text_encoder to device and cast to weight_dtype
- unet.to(accelerator.device, dtype=weight_dtype)
- vae.to(accelerator.device, dtype=weight_dtype)
- text_encoder.to(accelerator.device, dtype=weight_dtype)
-
- if args.enable_xformers_memory_efficient_attention:
- if is_xformers_available():
- unet.enable_xformers_memory_efficient_attention()
- else:
- raise ValueError("xformers is not available. Make sure it is installed correctly")
-
- # now we will add new LoRA weights to the attention layers
- # It's important to realize here how many attention weights will be added and of which sizes
- # The sizes of the attention layers consist only of two different variables:
- # 1) - the "hidden_size", which is increased according to `unet.config.block_out_channels`.
- # 2) - the "cross attention size", which is set to `unet.config.cross_attention_dim`.
-
- # Let's first see how many attention processors we will have to set.
- # For Stable Diffusion, it should be equal to:
- # - down blocks (2x attention layers) * (2x transformer layers) * (3x down blocks) = 12
- # - mid blocks (2x attention layers) * (1x transformer layers) * (1x mid blocks) = 2
- # - up blocks (2x attention layers) * (3x transformer layers) * (3x down blocks) = 18
- # => 32 layers
-
- # Set correct lora layers
- lora_attn_procs = {}
- for name in unet.attn_processors.keys():
- cross_attention_dim = None if name.endswith("attn1.processor") else unet.config.cross_attention_dim
- if name.startswith("mid_block"):
- hidden_size = unet.config.block_out_channels[-1]
- elif name.startswith("up_blocks"):
- block_id = int(name[len("up_blocks.")])
- hidden_size = list(reversed(unet.config.block_out_channels))[block_id]
- elif name.startswith("down_blocks"):
- block_id = int(name[len("down_blocks.")])
- hidden_size = unet.config.block_out_channels[block_id]
-
- lora_attn_procs[name] = LoRACrossAttnProcessor(
- hidden_size=hidden_size, cross_attention_dim=cross_attention_dim, rank=args.lora_rank
- )
-
- unet.set_attn_processor(lora_attn_procs)
- lora_layers = AttnProcsLayers(unet.attn_processors)
-
- accelerator.register_for_checkpointing(lora_layers)
-
- if args.scale_lr:
- args.learning_rate = (
- args.learning_rate * args.gradient_accumulation_steps * args.train_batch_size * accelerator.num_processes
- )
-
- # Enable TF32 for faster training on Ampere GPUs,
- # cf https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices
- if args.allow_tf32:
- torch.backends.cuda.matmul.allow_tf32 = True
-
- if args.scale_lr:
- args.learning_rate = (
- args.learning_rate * args.gradient_accumulation_steps * args.train_batch_size * accelerator.num_processes
- )
-
- # Use 8-bit Adam for lower memory usage or to fine-tune the model in 16GB GPUs
- if args.use_8bit_adam:
- try:
- import bitsandbytes as bnb
- except ImportError:
- raise ImportError(
- "To use 8-bit Adam, please install the bitsandbytes library: `pip install bitsandbytes`."
- )
-
- optimizer_class = bnb.optim.AdamW8bit
- else:
- optimizer_class = torch.optim.AdamW
-
- # Optimizer creation
- optimizer = optimizer_class(
- lora_layers.parameters(),
- lr=args.learning_rate,
- betas=(args.adam_beta1, args.adam_beta2),
- weight_decay=args.adam_weight_decay,
- eps=args.adam_epsilon,
- )
-
- # Dataset and DataLoaders creation:
- train_dataset = DreamBoothDataset(
- instance_data_root=args.instance_data_dir,
- instance_prompt=args.instance_prompt,
- class_data_root=args.class_data_dir if args.with_prior_preservation else None,
- class_prompt=args.class_prompt,
- tokenizer=tokenizer,
- size=args.resolution,
- center_crop=args.center_crop,
- )
-
- train_dataloader = torch.utils.data.DataLoader(
- train_dataset,
- batch_size=args.train_batch_size,
- shuffle=True,
- collate_fn=lambda examples: collate_fn(examples, args.with_prior_preservation),
- num_workers=args.dataloader_num_workers,
- )
-
- # Scheduler and math around the number of training steps.
- overrode_max_train_steps = False
- num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
- if args.max_train_steps is None:
- args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch
- overrode_max_train_steps = True
-
- lr_scheduler = get_scheduler(
- args.lr_scheduler,
- optimizer=optimizer,
- num_warmup_steps=args.lr_warmup_steps * args.gradient_accumulation_steps,
- num_training_steps=args.max_train_steps * args.gradient_accumulation_steps,
- num_cycles=args.lr_num_cycles,
- power=args.lr_power,
- )
-
- # Prepare everything with our `accelerator`.
- lora_layers, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(
- lora_layers, optimizer, train_dataloader, lr_scheduler
- )
-
- # We need to recalculate our total training steps as the size of the training dataloader may have changed.
- num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
- if overrode_max_train_steps:
- args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch
- # Afterwards we recalculate our number of training epochs
- args.num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch)
-
- # Train!
- total_batch_size = args.train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
-
- logger.info("***** Running testing *****")
- logger.info(f" Num examples = {len(train_dataset)}")
- logger.info(f" Num batches each epoch = {len(train_dataloader)}")
- logger.info(f" Num Epochs = {args.num_train_epochs}")
- logger.info(f" Instantaneous batch size per device = {args.train_batch_size}")
- logger.info(f" Total train batch size (w. parallel, distributed & accumulation) = {total_batch_size}")
- logger.info(f" Gradient Accumulation steps = {args.gradient_accumulation_steps}")
- logger.info(f" Total optimization steps = {args.max_train_steps}")
-
- # Potentially load in the weights and states from a previous save
- if args.resume_from_checkpoint:
- if args.resume_from_checkpoint != "latest":
- path = os.path.basename(args.resume_from_checkpoint)
- else:
- # Get the mos recent checkpoint
- dirs = os.listdir(args.output_dir)
- dirs = [d for d in dirs if d.startswith("checkpoint")]
- dirs = sorted(dirs, key=lambda x: int(x.split("-")[1]))
- path = dirs[-1] if len(dirs) > 0 else None
-
- if path is None:
- accelerator.print(
- f"Checkpoint '{args.resume_from_checkpoint}' does not exist. Starting a new training run."
- )
- args.resume_from_checkpoint = None
- else:
- accelerator.print(f"Resuming from checkpoint {path}")
- accelerator.load_state(os.path.join(args.output_dir, path))
-
- # Save the lora layers
- accelerator.wait_for_everyone()
- if accelerator.is_main_process:
- unet = unet.to(torch.float32)
- unet.save_attn_procs(
- args.output_dir,
- weights_name=WEIGHTS_NAME,
- save_function=torch.save)
- unet.save_attn_procs(
- args.output_dir,
- weights_name=SAFETENSORS_WEIGHTS_NAME,
- save_function=safetensors.torch.save_file)
-
- if args.push_to_hub:
- save_model_card(
- repo_name,
- images=images,
- base_model=args.pretrained_model_name_or_path,
- prompt=args.instance_prompt,
- repo_folder=args.output_dir,
- )
- repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True)
-
- # Final inference
- # Load previous pipeline
- pipeline = DiffusionPipeline.from_pretrained(
- args.pretrained_model_name_or_path, revision=args.revision, torch_dtype=weight_dtype, safety_checker=None
- )
- pipeline.scheduler = DPMSolverMultistepScheduler.from_config(pipeline.scheduler.config)
- pipeline = pipeline.to(accelerator.device)
-
- # load attention processors
- pipeline.unet.load_attn_procs(args.output_dir)
-
- # run inference
- output_dir = os.path.basename(args.output_dir)
- os.makedirs(os.path.join("samples", output_dir), exist_ok=True)
- if args.validation_prompt and args.num_validation_images > 0:
- generator = torch.Generator(device=accelerator.device).manual_seed(args.seed) if args.seed else None
- images = [
- pipeline(
- args.validation_prompt, num_inference_steps=25, generator=generator).images[0].save(
- os.path.join("samples", output_dir, f"{i}.png"))
- for i in range(args.num_validation_images)
- ]
-
-
-
- if __name__ == "__main__":
- args = parse_args()
- main(args)
|