Merge remote-tracking branch 'upstream/master' into interrogate_inclu…

…de_ranks_in_output
ljingv · Oct 12, 2022 · fb3cefb · fb3cefb
2 parents d717eb0 + 698d303
commit fb3cefb
Show file tree

Hide file tree

Showing 20 changed files with 705 additions and 224 deletions.
diff --git a/artists.csv b/artists.csv
@@ -1045,7 +1045,6 @@ Bakemono Zukushi,0.67051035,anime
 Lucy Madox Brown,0.67032814,fineart
 Paul Wonner,0.6700563,scribbles
 Guido Borelli Da Caluso,0.66966087,digipa-high-impact
-Guido Borelli da Caluso,0.66966087,digipa-high-impact
 Emil Alzamora,0.5844039,nudity
 Heinrich Brocksieper,0.64469147,fineart
 Dan Smith,0.669563,digipa-high-impact

diff --git a/environment-wsl2.yaml b/environment-wsl2.yaml
@@ -3,9 +3,9 @@ channels:
   - pytorch
   - defaults
 dependencies:
-  - python=3.8.5
-  - pip=20.3
+  - python=3.10
+  - pip=22.2.2
   - cudatoolkit=11.3
-  - pytorch=1.11.0
-  - torchvision=0.12.0
-  - numpy=1.19.2
+  - pytorch=1.12.1
+  - torchvision=0.13.1
+  - numpy=1.23.1
diff --git a/javascript/edit-attention.js b/javascript/edit-attention.js
@@ -25,6 +25,7 @@ addEventListener('keydown', (event) => {
 	} else {
 		end = target.value.slice(selectionEnd + 1).indexOf(")") + 1;
 		weight = parseFloat(target.value.slice(selectionEnd + 1, selectionEnd + 1 + end));
+		if (isNaN(weight)) return;
 		if (event.key == minus) weight -= 0.1;
 		if (event.key == plus) weight += 0.1;
 

diff --git a/javascript/hints.js b/javascript/hints.js
@@ -80,7 +80,10 @@ titles = {
     "Scale latent": "Uscale the image in latent space. Alternative is to produce the full image from latent representation, upscale that, and then move it back to latent space.",
 
     "Eta noise seed delta": "If this values is non-zero, it will be added to seed and used to initialize RNG for noises when using samplers with Eta. You can use this to produce even more variation of images, or you can use this to match images of other software if you know what you are doing.",
-    "Do not add watermark to images": "If this option is enabled, watermark will not be added to created images. Warning: if you do not add watermark, you may be bevaing in an unethical manner.",
+    "Do not add watermark to images": "If this option is enabled, watermark will not be added to created images. Warning: if you do not add watermark, you may be behaving in an unethical manner.",
+
+    "Filename word regex": "This regular expression will be used extract words from filename, and they will be joined using the option below into label text used for training. Leave empty to keep filename text as it is.",
+    "Filename join string": "This string will be used to hoin split words into a single line if the option above is enabled.",
 }
 
 

diff --git a/javascript/ui.js b/javascript/ui.js
@@ -101,7 +101,8 @@ function create_tab_index_args(tabId, args){
 }
 
 function get_extras_tab_index(){
-    return create_tab_index_args('mode_extras', arguments)
+    const [,,...args] = [...arguments]
+    return [get_tab_index('mode_extras'), get_tab_index('extras_resize_mode'), ...args]
 }
 
 function create_submit_args(args){

diff --git a/modules/deepbooru.py b/modules/deepbooru.py
@@ -1,21 +1,99 @@
 import os.path
 from concurrent.futures import ProcessPoolExecutor
-from multiprocessing import get_context
+import multiprocessing
+import time
+import re
 
+re_special = re.compile(r'([\\()])')
 
-def _load_tf_and_return_tags(pil_image, threshold, include_ranks):
+def get_deepbooru_tags(pil_image):
+    """
+    This method is for running only one image at a time for simple use.  Used to the img2img interrogate.
+    """
+    from modules import shared  # prevents circular reference
+
+    try:
+        create_deepbooru_process(shared.opts.interrogate_deepbooru_score_threshold, create_deepbooru_opts())
+        return get_tags_from_process(pil_image)
+    finally:
+        release_process()
+
+
+def create_deepbooru_opts():
+    from modules import shared
+
+    return {
+        "use_spaces": shared.opts.deepbooru_use_spaces,
+        "use_escape": shared.opts.deepbooru_escape,
+        "alpha_sort": shared.opts.deepbooru_sort_alpha,
+    }
+
+
+def deepbooru_process(queue, deepbooru_process_return, threshold, deepbooru_opts):
+    model, tags = get_deepbooru_tags_model()
+    while True: # while process is running, keep monitoring queue for new image
+        pil_image = queue.get()
+        if pil_image == "QUIT":
+            break
+        else:
+            deepbooru_process_return["value"] = get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_opts)
+
+
+def create_deepbooru_process(threshold, deepbooru_opts):
+    """
+    Creates deepbooru process.  A queue is created to send images into the process.  This enables multiple images
+    to be processed in a row without reloading the model or creating a new process.  To return the data, a shared
+    dictionary is created to hold the tags created.  To wait for tags to be returned, a value of -1 is assigned
+    to the dictionary and the method adding the image to the queue should wait for this value to be updated with
+    the tags.
+    """
+    from modules import shared  # prevents circular reference
+    shared.deepbooru_process_manager = multiprocessing.Manager()
+    shared.deepbooru_process_queue = shared.deepbooru_process_manager.Queue()
+    shared.deepbooru_process_return = shared.deepbooru_process_manager.dict()
+    shared.deepbooru_process_return["value"] = -1
+    shared.deepbooru_process = multiprocessing.Process(target=deepbooru_process, args=(shared.deepbooru_process_queue, shared.deepbooru_process_return, threshold, deepbooru_opts))
+    shared.deepbooru_process.start()
+
+
+def get_tags_from_process(image):
+    from modules import shared
+
+    shared.deepbooru_process_return["value"] = -1
+    shared.deepbooru_process_queue.put(image)
+    while shared.deepbooru_process_return["value"] == -1:
+        time.sleep(0.2)
+    caption = shared.deepbooru_process_return["value"]
+    shared.deepbooru_process_return["value"] = -1
+
+    return caption
+
+
+def release_process():
+    """
+    Stops the deepbooru process to return used memory
+    """
+    from modules import shared  # prevents circular reference
+    shared.deepbooru_process_queue.put("QUIT")
+    shared.deepbooru_process.join()
+    shared.deepbooru_process_queue = None
+    shared.deepbooru_process = None
+    shared.deepbooru_process_return = None
+    shared.deepbooru_process_manager = None
+
+def get_deepbooru_tags_model():
     import deepdanbooru as dd
     import tensorflow as tf
     import numpy as np
-
     this_folder = os.path.dirname(__file__)
     model_path = os.path.abspath(os.path.join(this_folder, '..', 'models', 'deepbooru'))
     if not os.path.exists(os.path.join(model_path, 'project.json')):
         # there is no point importing these every time
         import zipfile
         from basicsr.utils.download_util import load_file_from_url
-        load_file_from_url(r"https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip",
-                           model_path)
+        load_file_from_url(
+            r"https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip",
+            model_path)
         with zipfile.ZipFile(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"), "r") as zip_ref:
             zip_ref.extractall(model_path)
         os.remove(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"))
@@ -24,6 +102,17 @@ def _load_tf_and_return_tags(pil_image, threshold, include_ranks):
     model = dd.project.load_model_from_project(
         model_path, compile_model=True
     )
+    return model, tags
+
+
+def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_opts):
+    import deepdanbooru as dd
+    import tensorflow as tf
+    import numpy as np
+
+    alpha_sort = deepbooru_opts['alpha_sort']
+    use_spaces = deepbooru_opts['use_spaces']
+    use_escape = deepbooru_opts['use_escape']
 
     width = model.input_shape[2]
     height = model.input_shape[1]
@@ -46,32 +135,35 @@ def _load_tf_and_return_tags(pil_image, threshold, include_ranks):
 
     for i, tag in enumerate(tags):
         result_dict[tag] = y[i]
-    result_tags_out = []
+
+    unsorted_tags_in_theshold = []
     result_tags_print = []
     for tag in tags:
         if result_dict[tag] >= threshold:
             if tag.startswith("rating:"):
                 continue
-            tag_formatted = tag.replace('_', ' ').replace(':', ' ')
-            if include_ranks:
-                result_tags_out.append(f'({tag_formatted}:{result_dict[tag]})')
-            else:
-                result_tags_out.append(tag_formatted)
+            unsorted_tags_in_theshold.append((result_dict[tag], tag))
             result_tags_print.append(f'{result_dict[tag]} {tag}')
 
-    print('\n'.join(sorted(result_tags_print, reverse=True)))
+    # sort tags
+    result_tags_out = []
+    sort_ndx = 0
+    if alpha_sort:
+        sort_ndx = 1
+
+    # sort by reverse by likelihood and normal for alpha
+    unsorted_tags_in_theshold.sort(key=lambda y: y[sort_ndx], reverse=(not alpha_sort))
+    for weight, tag in unsorted_tags_in_theshold:
+        result_tags_out.append(tag)
 
-    return ', '.join(result_tags_out)
+    print('\n'.join(sorted(result_tags_print, reverse=True)))
 
+    tags_text = ', '.join(result_tags_out)
 
-def subprocess_init_no_cuda():
-    import os
-    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
+    if use_spaces:
+        tags_text = tags_text.replace('_', ' ')
 
+    if use_escape:
+        tags_text = re.sub(re_special, r'\\\1', tags_text)
 
-def get_deepbooru_tags(pil_image, threshold=0.5, include_ranks=False):
-    context = get_context('spawn')
-    with ProcessPoolExecutor(initializer=subprocess_init_no_cuda, mp_context=context) as executor:
-        f = executor.submit(_load_tf_and_return_tags, pil_image, threshold, include_ranks)
-        ret = f.result()  # will rethrow any exceptions
-    return ret
+    return tags_text.replace(':', ' ')
diff --git a/modules/extras.py b/modules/extras.py
@@ -1,3 +1,4 @@
+import math
 import os
 
 import numpy as np
@@ -19,7 +20,7 @@
 cached_images = {}
 
 
-def run_extras(extras_mode, image, image_folder, gfpgan_visibility, codeformer_visibility, codeformer_weight, upscaling_resize, extras_upscaler_1, extras_upscaler_2, extras_upscaler_2_visibility):
+def run_extras(extras_mode, resize_mode, image, image_folder, gfpgan_visibility, codeformer_visibility, codeformer_weight, upscaling_resize, upscaling_resize_w, upscaling_resize_h, upscaling_crop, extras_upscaler_1, extras_upscaler_2, extras_upscaler_2_visibility):
     devices.torch_gc()
 
     imageArr = []
@@ -67,8 +68,13 @@ def run_extras(extras_mode, image, image_folder, gfpgan_visibility, codeformer_v
             info += f"CodeFormer w: {round(codeformer_weight, 2)}, CodeFormer visibility:{round(codeformer_visibility, 2)}\n"
             image = res
 
+        if resize_mode == 1:
+            upscaling_resize = max(upscaling_resize_w/image.width, upscaling_resize_h/image.height)
+            crop_info = " (crop)" if upscaling_crop else ""
+            info += f"Resize to: {upscaling_resize_w:g}x{upscaling_resize_h:g}{crop_info}\n"
+
         if upscaling_resize != 1.0:
-            def upscale(image, scaler_index, resize):
+            def upscale(image, scaler_index, resize, mode, resize_w, resize_h, crop):
                 small = image.crop((image.width // 2, image.height // 2, image.width // 2 + 10, image.height // 2 + 10))
                 pixels = tuple(np.array(small).flatten().tolist())
                 key = (resize, scaler_index, image.width, image.height, gfpgan_visibility, codeformer_visibility, codeformer_weight) + pixels
@@ -77,15 +83,19 @@ def upscale(image, scaler_index, resize):
                 if c is None:
                     upscaler = shared.sd_upscalers[scaler_index]
                     c = upscaler.scaler.upscale(image, resize, upscaler.data_path)
+                    if mode == 1 and crop:
+                        cropped = Image.new("RGB", (resize_w, resize_h))
+                        cropped.paste(c, box=(resize_w // 2 - c.width // 2, resize_h // 2 - c.height // 2))
+                        c = cropped
                     cached_images[key] = c
 
                 return c
 
             info += f"Upscale: {round(upscaling_resize, 3)}, model:{shared.sd_upscalers[extras_upscaler_1].name}\n"
-            res = upscale(image, extras_upscaler_1, upscaling_resize)
+            res = upscale(image, extras_upscaler_1, upscaling_resize, resize_mode, upscaling_resize_w, upscaling_resize_h, upscaling_crop)
 
             if extras_upscaler_2 != 0 and extras_upscaler_2_visibility > 0:
-                res2 = upscale(image, extras_upscaler_2, upscaling_resize)
+                res2 = upscale(image, extras_upscaler_2, upscaling_resize, resize_mode, upscaling_resize_w, upscaling_resize_h, upscaling_crop)
                 info += f"Upscale: {round(upscaling_resize, 3)}, visibility: {round(extras_upscaler_2_visibility, 3)}, model:{shared.sd_upscalers[extras_upscaler_2].name}\n"
                 res = Image.blend(res, res2, extras_upscaler_2_visibility)
 
@@ -190,7 +200,7 @@ def inv_sigmoid(theta0, theta1, alpha):
             theta_0[key] = theta_func(theta_0[key], theta_1[key], (float(1.0) - interp_amount))  # Need to reverse the interp_amount to match the desired mix ration in the merged checkpoint
             if save_as_half:
                 theta_0[key] = theta_0[key].half()
-    
+
     for key in theta_1.keys():
         if 'model' in key and key not in theta_0:
             theta_0[key] = theta_1[key]