Spaces:

AndyRaoTHU
/

ReVQ

Sleeping

App Files Files Community

AndyRaoTHU commited on Jul 9

Commit

63e0d46

1 Parent(s): af6c0a4

add multi-group

Browse files

Files changed (1) hide show

app.py +96 -85

app.py CHANGED Viewed

@@ -41,89 +41,6 @@ def load_preprocessor(device, is_eval: bool = True, ckpt_path: str = "./ckpt/pre
         preprocessor.eval()
     return preprocessor
-def nearest(src, trg):
-    dis_mat = torch.cdist(src, trg)
-    min_idx = torch.argmin(dis_mat, dim=-1)
-    return min_idx
-def normalize(A, dim, mode="all"):
-    if mode == "all":
-        A = (A - A.mean()) / (A.std() + 1e-6)
-        A = A - A.min()
-    elif mode == "dim":
-        A = A / dim
-    elif mode == "null":
-        pass
-    return A
-def draw_NN(data, code):
-    # nearest neighbor method
-    indices = nearest(data, code)
-    data = data.numpy()
-    code = code.numpy()
-    plt.figure(figsize=(3, 2.5), dpi=400)
-    # draw arrows in blue color, alpha=0.5
-    for i in range(data.shape[0]):
-        idx = indices[i].item()
-        start = data[i]
-        end = code[idx]
-        plt.arrow(start[0], start[1], end[0] - start[0], end[1] - start[1],
-                    head_width=0.05, head_length=0.05, fc='red', ec='red', alpha=0.6,
-                    ls="-", lw=0.5)
-    plt.scatter(data[:, 0], data[:, 1], s=10, marker="o", c="gray", label="Data")
-    plt.scatter(code[:, 0], code[:, 1], s=25, marker="*", c="blue", label="Code")
-    plt.legend(loc="lower right")
-    plt.grid(color="gray", alpha=0.8, ls="-.", lw=0.5)
-    plt.title("Nearest neighbor")
-    buf = BytesIO()
-    plt.savefig(buf, format="png")
-    buf.seek(0)
-    image = Image.open(buf)
-    return image
-def draw_optvq(data, code):
-    cost = torch.cdist(data, code, p=2.0)
-    cost = normalize(cost, dim, mode="all")
-    Q = sinkhorn(cost, n_iters=5, epsilon=10, is_distributed=False)
-    indices = torch.argmax(Q, dim=-1)
-    data = data.numpy()
-    code = code.numpy()
-    plt.figure(figsize=(3, 2.5), dpi=400)
-    # draw arrows in blue color, alpha=0.5
-    for i in range(data.shape[0]):
-        idx = indices[i].item()
-        start = data[i]
-        end = code[idx]
-        plt.arrow(start[0], start[1], end[0] - start[0], end[1] - start[1],
-                    head_width=0.05, head_length=0.05, fc='green', ec='green', alpha=0.6,
-                    ls="-", lw=0.5)
-    plt.scatter(data[:, 0], data[:, 1], s=10, marker="o", c="gray", label="Data")
-    plt.scatter(code[:, 0], code[:, 1], s=25, marker="*", c="blue", label="Code")
-    plt.legend(loc="lower right")
-    plt.grid(color="gray", alpha=0.8, ls="-.", lw=0.5)
-    plt.title("Optimal Transport (OptVQ)")
-    buf = BytesIO()
-    plt.savefig(buf, format="png")
-    buf.seek(0)
-    image = Image.open(buf)
-    return image
-def draw_process(x, y, std):
-    data = torch.randn(N_data, dim)
-    code = torch.randn(N_code, dim) * std
-    code[:, 0] += x
-    code[:, 1] += y
-    image_NN = draw_NN(data, code)
-    image_optvq = draw_optvq(data, code)
-    return image_NN, image_optvq
 # ReVQ: for reset strategy
 def fig_to_array(fig):
     buf = BytesIO()
@@ -211,6 +128,83 @@ def draw_reset_result(num_data=16, num_code=12):
 # end
 class Handler:
     def __init__(self, device):
         self.transform = T.Compose([
@@ -326,14 +320,31 @@ if __name__ == "__main__":
         submit_btn.click(fn=draw_reset_result, inputs=[num_data, num_code], outputs=[out_with_reset, out_without_reset])
     # 合并两个 interface 成 Tabbed UI
     # demo = gr.TabbedInterface(
     #     interface_list=[demo1, demo2],
     #     tab_names=["Image Reconstruction", "Reset Strategy"]
     # )
     demo = gr.TabbedInterface(
-        interface_list=[demo2],
-        tab_names=["Reset Strategy"]
     )
     demo.launch(share=True)

         preprocessor.eval()
     return preprocessor
 # ReVQ: for reset strategy
 def fig_to_array(fig):
     buf = BytesIO()
 # end
+# ReVQ: for multi-group
+def get_codebook_v2(quantizer):
+    with torch.no_grad():
+        embedding = quantizer.embeddings
+        if quantizer.num_group == 1:
+            group1 = embedding[0].squeeze()
+            group2 = embedding[0].squeeze()
+        else:
+            group1 = embedding[0].squeeze()
+            group2 = embedding[1].squeeze()
+        codes = torch.cartesian_prod(group1, group2)
+    return codes
+def draw_fig_v2(ax, quantizer, data, title=""):
+    codes = get_codebook(quantizer)
+    ax.scatter(data[:, 0], data[:, 1], s=60, marker="*")
+    ax.scatter(codes[:, 0], codes[:, 1], s=20, c='red', alpha=0.5)
+    ax.plot([-12, 12], [-12, 12], color='orange', linestyle='--', linewidth=2)
+    ax.set_xlim(-12, 12)
+    ax.set_ylim(-12, 12)
+    ax.tick_params(axis='x', labelsize=22)
+    ax.tick_params(axis='y', labelsize=22)
+    ax.set_xticks(np.arange(-10, 11, 5))
+    ax.set_yticks(np.arange(-10, 11, 5))
+    ax.grid(linestyle='--', color='#333333', alpha=0.7)
+    ax.set_title(f"{title}", fontsize=26)
+def draw_multi_group_result(num_data=16, num_code=12):
+    fig_s, ax_s = plt.subplots(1, 6, figsize=(36, 6), dpi=400)
+    fig_m, ax_m = plt.subplots(1, 6, figsize=(36, 6), dpi=400)
+    x = torch.randn(num_data, 1) * 3 + 4
+    y = torch.randn(num_data, 1) * 3 - 4
+    data = torch.cat([x, y], dim=1)
+    quantizer_s = Quantizer(TYPE='vq', code_dim=1, num_code=num_code, num_group=1, tokens_per_data=2)
+    optimizer_s = torch.optim.SGD(quantizer_s.parameters(), lr=0.1)
+    quantizer_m = Quantizer(TYPE='vq', code_dim=1, num_code=num_code, num_group=2, tokens_per_data=2)
+    optimizer_m = torch.optim.SGD(quantizer_m.parameters(), lr=0.1)
+    draw_fig_v2(ax_s[0], quantizer_s, data, title=f"Initialization")
+    draw_fig_v2(ax_m[0], quantizer_m, data, title=f"Initialization")
+    ax_s[0].legend(["Data", "Code"], loc="upper right", fontsize=24)
+    ax_m[0].legend(["Data", "Code"], loc="upper right", fontsize=24)
+    i_list = [5, 20, 50, 200, 1000]
+    count = 0
+    for i in range(1500):
+        optimizer_s.zero_grad()
+        optimizer_m.zero_grad()
+        quant_data_s = quantizer_s(data.unsqueeze(-1))["x_quant"].squeeze()
+        quant_data_m = quantizer_m(data.unsqueeze(-1))["x_quant"].squeeze()
+        loss_s = torch.mean((quant_data_s - data) ** 2)
+        loss_m = torch.mean((quant_data_m - data) ** 2)
+        loss_s.backward()
+        loss_m.backward()
+        optimizer_s.step()
+        optimizer_m.step()
+        if (i+1) in i_list:
+            count += 1
+            draw_fig_v2(ax_s[count], quantizer_s, data, title=f"Iters: {i+1}, MSE: {loss_s.item():.1f}")
+            draw_fig_v2(ax_m[count], quantizer_m, data, title=f"Iters: {i+1}, MSE: {loss_m.item():.1f}")
+        quantizer_s.reset()
+        quantizer_m.reset()
+    fig_s.suptitle("VQ Codebook Training with Single Group", fontsize=24, y=1.05)
+    fig_m.suptitle("VQ Codebook Training with Multi Group", fontsize=24, y=1.05)
+    img_s = fig_to_array(fig_s)
+    img_m = fig_to_array(fig_m)
+    return img_s, img_m
+# end
+# ReVQ: for image reconstruction
 class Handler:
     def __init__(self, device):
         self.transform = T.Compose([
         submit_btn.click(fn=draw_reset_result, inputs=[num_data, num_code], outputs=[out_with_reset, out_without_reset])
+    with gr.Blocks() as demo3:
+        gr.Markdown("## Demo 3: Channel Multi-Group Strategy Visualization")
+        gr.Markdown("Visualizes codebook and data movement at different training steps with multi-group strategy.")
+        with gr.Row():
+            num_data = gr.Slider(label="num_data", value=32, minimum=28, maximum=40, step=1)
+            num_code = gr.Slider(label="num_code", value=8, minimum=6, maximum=10, step=1)
+        submit_btn = gr.Button("Run Visualization")
+        with gr.Column():  # 垂直输出
+            out_s = gr.Image(label="Single Group")
+            out_m = gr.Image(label="Multi Group")
+        submit_btn.click(fn=draw_multi_group_result, inputs=[num_data, num_code], outputs=[out_s, out_m])
     # 合并两个 interface 成 Tabbed UI
     # demo = gr.TabbedInterface(
     #     interface_list=[demo1, demo2],
     #     tab_names=["Image Reconstruction", "Reset Strategy"]
     # )
     demo = gr.TabbedInterface(
+        interface_list=[demo2, demo3],
+        tab_names=["Reset Strategy", "Channel Multi-Group Strategy"]
     )
     demo.launch(share=True)