Spaces:

AndyRaoTHU
/

ReVQ

Sleeping

App Files Files Community

AndyRaoTHU commited on Jul 9

Commit

4359005

1 Parent(s): 57b27d9

update

Browse files

Files changed (1) hide show

app.py +23 -61

app.py CHANGED Viewed

@@ -23,9 +23,6 @@ from revq.models.vqgan_hf import VQModelHF
 from diffusers import AutoencoderDC
 #################
-N_data = 50
-N_code = 20
-dim = 2
 handler = None
 device = torch.device("cpu")
 #################
@@ -54,10 +51,13 @@ def get_codebook(quantizer):
         codes = quantizer.embeddings.squeeze().detach()
     return codes
-def draw_fig(ax, quantizer, data, title=""):
     codes = get_codebook(quantizer)
     ax.scatter(data[:, 0], data[:, 1], s=60, marker="*")
-    ax.scatter(codes[:, 0], codes[:, 1], s=40, c='red', alpha=0.5)
     ax.set_xlim(-5, 10)
     ax.set_ylim(-10, 5)
     ax.tick_params(axis='x', labelsize=22)
@@ -83,8 +83,8 @@ def draw_reset_result(num_data=16, num_code=12):
     optimizer = torch.optim.SGD(quantizer.parameters(), lr=0.1)
     quantizer_nreset = Quantizer(TYPE='vq', code_dim=2, num_code=num_code, num_group=1, tokens_per_data=1, auto_reset=False)
     optimizer_nreset = torch.optim.SGD(quantizer_nreset.parameters(), lr=0.1)
-    draw_fig(ax_reset[0], quantizer, data, title=f"Initialization")
-    draw_fig(ax_nreset[0], quantizer_nreset, data, title=f"Initialization")
     ax_reset[0].legend(["Data", "Code"], loc="upper right", fontsize=24)
     ax_nreset[0].legend(["Data", "Code"], loc="upper right", fontsize=24)
@@ -109,10 +109,10 @@ def draw_reset_result(num_data=16, num_code=12):
         if (i+1) in i_list:
             count += 1
-            draw_fig(ax_reset[count], quantizer, data, title=f"Iters: {i+1}, MSE: {loss.item():.1f}")
             draw_arrow(ax_reset[count], quant_data.detach().numpy(), data.numpy())
-            draw_fig(ax_nreset[count], quantizer_nreset, data, title=f"Iters: {i+1}, MSE: {loss_nreset.item():.1f}")
             draw_arrow(ax_nreset[count], quant_data_nreset.detach().numpy(), data.numpy())
         quantizer.reset()
@@ -123,11 +123,10 @@ def draw_reset_result(num_data=16, num_code=12):
     img_reset = fig_to_array(fig_reset)
     img_nreset = fig_to_array(fig_nreset)
-    return img_reset, img_nreset
 # end
 # ReVQ: for multi-group
 def get_codebook_v2(quantizer):
     with torch.no_grad():
@@ -141,10 +140,13 @@ def get_codebook_v2(quantizer):
         codes = torch.cartesian_prod(group1, group2)
     return codes
-def draw_fig_v2(ax, quantizer, data, title=""):
     codes = get_codebook_v2(quantizer)
     ax.scatter(data[:, 0], data[:, 1], s=60, marker="*")
-    ax.scatter(codes[:, 0], codes[:, 1], s=20, c='red', alpha=0.5)
     ax.plot([-12, 12], [-12, 12], color='orange', linestyle='--', linewidth=2)
     ax.set_xlim(-12, 12)
     ax.set_ylim(-12, 12)
@@ -166,8 +168,8 @@ def draw_multi_group_result(num_data=16, num_code=12):
     optimizer_s = torch.optim.SGD(quantizer_s.parameters(), lr=0.1)
     quantizer_m = Quantizer(TYPE='vq', code_dim=1, num_code=num_code, num_group=2, tokens_per_data=2)
     optimizer_m = torch.optim.SGD(quantizer_m.parameters(), lr=0.1)
-    draw_fig_v2(ax_s[0], quantizer_s, data, title=f"Initialization")
-    draw_fig_v2(ax_m[0], quantizer_m, data, title=f"Initialization")
     ax_s[0].legend(["Data", "Code"], loc="upper right", fontsize=24)
     ax_m[0].legend(["Data", "Code"], loc="upper right", fontsize=24)
     i_list = [5, 20, 50, 200, 1000]
@@ -187,8 +189,8 @@ def draw_multi_group_result(num_data=16, num_code=12):
         if (i+1) in i_list:
             count += 1
-            draw_fig_v2(ax_s[count], quantizer_s, data, title=f"Iters: {i+1}, MSE: {loss_s.item():.1f}")
-            draw_fig_v2(ax_m[count], quantizer_m, data, title=f"Iters: {i+1}, MSE: {loss_m.item():.1f}")
         quantizer_s.reset()
         quantizer_m.reset()
@@ -204,7 +206,6 @@ def draw_multi_group_result(num_data=16, num_code=12):
 # end
 # ReVQ: for image reconstruction
 class Handler:
     def __init__(self, device):
         self.transform = T.Compose([
@@ -293,7 +294,7 @@ if __name__ == "__main__":
     with gr.Blocks() as demo2:
         gr.Markdown("## Demo 2: Codebook Reset Strategy Visualization")
-        gr.Markdown("Visualizes codebook and data movement at different training steps.")
         with gr.Row():
             num_data = gr.Slider(label="num_data", value=16, minimum=10, maximum=20, step=1)
@@ -302,15 +303,15 @@ if __name__ == "__main__":
         submit_btn = gr.Button("Run Visualization")
         with gr.Column():  # 垂直输出
-            out_with_reset = gr.Image(label="With Reset")
             out_without_reset = gr.Image(label="Without Reset")
-        submit_btn.click(fn=draw_reset_result, inputs=[num_data, num_code], outputs=[out_with_reset, out_without_reset])
     with gr.Blocks() as demo3:
         gr.Markdown("## Demo 3: Channel Multi-Group Strategy Visualization")
-        gr.Markdown("Visualizes codebook and data movement at different training steps with multi-group strategy.")
         with gr.Row():
             num_data = gr.Slider(label="num_data", value=32, minimum=28, maximum=40, step=1)
@@ -324,48 +325,9 @@ if __name__ == "__main__":
         submit_btn.click(fn=draw_multi_group_result, inputs=[num_data, num_code], outputs=[out_s, out_m])
-    # 合并两个 interface 成 Tabbed UI
-    # demo = gr.TabbedInterface(
-    #     interface_list=[demo1, demo2],
-    #     tab_names=["Image Reconstruction", "Reset Strategy"]
-    # )
     demo = gr.TabbedInterface(
         interface_list=[demo1, demo2, demo3],
         tab_names=["Image Reconstruction", "Reset Strategy", "Channel Multi-Group Strategy"]
     )
     demo.launch(share=True)
-    # create the interface
-    # with gr.Blocks() as demo:
-        # gr.Textbox(value="This demo shows the image reconstruction comparison between ReVQ and other methods. The input image is resized to 256 x 256 and then fed into the models. The output images are the reconstructed images from the latent codes.", label="Demo 1: Image reconstruction results")
-        # with gr.Row():
-        #     with gr.Column():
-        #         image_input = gr.Image(label="Input data", image_mode="RGB", type="numpy")
-        #         btn_demo1 = gr.Button(value="Run reconstruction")
-        #     image_basevq = gr.Image(label="BaseVQ rec.")
-        #     image_vqgan = gr.Image(label="VQGAN rec.")
-        #     image_revq = gr.Image(label="ReVQ rec.")
-        # btn_demo1.click(fn=handler.process_image, inputs=[image_input], outputs=[image_basevq, image_vqgan, image_revq])
-        # gr.Textbox(value="This demo shows the 2D visualizations of nearest neighbor and optimal transport (OptVQ) methods. The data points are randomly generated from a normal distribution, and the matching results are shown as arrows with different colors.", label="Demo 2: 2D visualizations of matching results")
-        # gr.Markdown("### Demo 2: 2D visualizations of matching results\n"
-        #         "This demo shows the 2D visualizations of nearest neighbor and optimal transport (OptVQ) methods. "
-        #         "The data points are randomly generated from a normal distribution, and the matching results are shown as arrows with different colors.")
-        # with gr.Row():
-        #     with gr.Column():
-        #         input_x = gr.Slider(label="x", value=0, minimum=-10, maximum=10, step=0.1)
-        #         input_y = gr.Slider(label="y", value=0, minimum=-10, maximum=10, step=0.1)
-        #         input_std = gr.Slider(label="std", value=1, minimum=0, maximum=5, step=0.1)
-        #         btn_demo2 = gr.Button(value="Run 2D example")
-        #     output_nn = gr.Image(label="NN", interactive=False, type="numpy")
-        #     output_optvq = gr.Image(label="OptVQ", interactive=False, type="numpy")
-        # # set the function
-        # input_x.change(fn=draw_process, inputs=[input_x, input_y, input_std], outputs=[output_nn, output_optvq])
-        # input_y.change(fn=draw_process, inputs=[input_x, input_y, input_std], outputs=[output_nn, output_optvq])
-        # input_std.change(fn=draw_process, inputs=[input_x, input_y, input_std], outputs=[output_nn, output_optvq])
-        # btn_demo2.click(fn=draw_process, inputs=[input_x, input_y, input_std], outputs=[output_nn, output_optvq])
-        # btn_demo2.click(fn=draw_process, inputs=[input_x, input_y, input_std], outputs=[output_nn, output_optvq])
-    # demo.launch()

 from diffusers import AutoencoderDC
 #################
 handler = None
 device = torch.device("cpu")
 #################
         codes = quantizer.embeddings.squeeze().detach()
     return codes
+def draw_fig(ax, quantizer, data, color="r", title=""):
     codes = get_codebook(quantizer)
     ax.scatter(data[:, 0], data[:, 1], s=60, marker="*")
+    if color == "r":
+        ax.scatter(codes[:, 0], codes[:, 1], s=40, c='red', alpha=0.5)
+    else:
+        ax.scatter(codes[:, 0], codes[:, 1], s=40, c='green', alpha=0.5)
     ax.set_xlim(-5, 10)
     ax.set_ylim(-10, 5)
     ax.tick_params(axis='x', labelsize=22)
     optimizer = torch.optim.SGD(quantizer.parameters(), lr=0.1)
     quantizer_nreset = Quantizer(TYPE='vq', code_dim=2, num_code=num_code, num_group=1, tokens_per_data=1, auto_reset=False)
     optimizer_nreset = torch.optim.SGD(quantizer_nreset.parameters(), lr=0.1)
+    draw_fig(ax_reset[0], quantizer, data, color='g', title=f"Initialization")
+    draw_fig(ax_nreset[0], quantizer_nreset, data, color='r', title=f"Initialization")
     ax_reset[0].legend(["Data", "Code"], loc="upper right", fontsize=24)
     ax_nreset[0].legend(["Data", "Code"], loc="upper right", fontsize=24)
         if (i+1) in i_list:
             count += 1
+            draw_fig(ax_reset[count], quantizer, data, color='g', title=f"Iters: {i+1}, MSE: {loss.item():.1f}")
             draw_arrow(ax_reset[count], quant_data.detach().numpy(), data.numpy())
+            draw_fig(ax_nreset[count], quantizer_nreset, data, color='r', title=f"Iters: {i+1}, MSE: {loss_nreset.item():.1f}")
             draw_arrow(ax_nreset[count], quant_data_nreset.detach().numpy(), data.numpy())
         quantizer.reset()
     img_reset = fig_to_array(fig_reset)
     img_nreset = fig_to_array(fig_nreset)
+    return img_nreset, img_reset
 # end
 # ReVQ: for multi-group
 def get_codebook_v2(quantizer):
     with torch.no_grad():
         codes = torch.cartesian_prod(group1, group2)
     return codes
+def draw_fig_v2(ax, quantizer, data, color='r', title=""):
     codes = get_codebook_v2(quantizer)
     ax.scatter(data[:, 0], data[:, 1], s=60, marker="*")
+    if color == "r":
+        ax.scatter(codes[:, 0], codes[:, 1], s=20, c='red', alpha=0.5)
+    else:
+        ax.scatter(codes[:, 0], codes[:, 1], s=20, c='green', alpha=0.5)
     ax.plot([-12, 12], [-12, 12], color='orange', linestyle='--', linewidth=2)
     ax.set_xlim(-12, 12)
     ax.set_ylim(-12, 12)
     optimizer_s = torch.optim.SGD(quantizer_s.parameters(), lr=0.1)
     quantizer_m = Quantizer(TYPE='vq', code_dim=1, num_code=num_code, num_group=2, tokens_per_data=2)
     optimizer_m = torch.optim.SGD(quantizer_m.parameters(), lr=0.1)
+    draw_fig_v2(ax_s[0], quantizer_s, data, color='r', title=f"Initialization")
+    draw_fig_v2(ax_m[0], quantizer_m, data, color='g', title=f"Initialization")
     ax_s[0].legend(["Data", "Code"], loc="upper right", fontsize=24)
     ax_m[0].legend(["Data", "Code"], loc="upper right", fontsize=24)
     i_list = [5, 20, 50, 200, 1000]
         if (i+1) in i_list:
             count += 1
+            draw_fig_v2(ax_s[count], quantizer_s, data, color='r', title=f"Iters: {i+1}, MSE: {loss_s.item():.1f}")
+            draw_fig_v2(ax_m[count], quantizer_m, data, color='g', title=f"Iters: {i+1}, MSE: {loss_m.item():.1f}")
         quantizer_s.reset()
         quantizer_m.reset()
 # end
 # ReVQ: for image reconstruction
 class Handler:
     def __init__(self, device):
         self.transform = T.Compose([
     with gr.Blocks() as demo2:
         gr.Markdown("## Demo 2: Codebook Reset Strategy Visualization")
+        gr.Markdown("Visualizes codebook and data movement at different training steps with or without codebook reset strategy.")
         with gr.Row():
             num_data = gr.Slider(label="num_data", value=16, minimum=10, maximum=20, step=1)
         submit_btn = gr.Button("Run Visualization")
         with gr.Column():  # 垂直输出
             out_without_reset = gr.Image(label="Without Reset")
+            out_with_reset = gr.Image(label="With Reset")
+        submit_btn.click(fn=draw_reset_result, inputs=[num_data, num_code], outputs=[out_without_reset, out_with_reset])
     with gr.Blocks() as demo3:
         gr.Markdown("## Demo 3: Channel Multi-Group Strategy Visualization")
+        gr.Markdown("Visualizes codebook and data movement at different training steps with or without multi-group strategy.")
         with gr.Row():
             num_data = gr.Slider(label="num_data", value=32, minimum=28, maximum=40, step=1)
         submit_btn.click(fn=draw_multi_group_result, inputs=[num_data, num_code], outputs=[out_s, out_m])
     demo = gr.TabbedInterface(
         interface_list=[demo1, demo2, demo3],
         tab_names=["Image Reconstruction", "Reset Strategy", "Channel Multi-Group Strategy"]
     )
     demo.launch(share=True)