Spaces:

Derendering
/

Model-Output-Playground

Running

App Files Files Community

Charlie Li commited on Oct 29, 2024

Commit

17f8269

1 Parent(s): 396546f

update page

Browse files

Files changed (2) hide show

app.py +32 -34
utils.py +12 -33

app.py CHANGED Viewed

@@ -39,10 +39,7 @@ captions = [
     "you",
     "letter",
 ]
-gif_base64_strings = {
-    caption: get_base64_encoded_gif(f"gifs/{name}")
-    for caption, name in zip(captions, gif_filenames)
-}
 sketches = [
     "bird.gif",
@@ -50,21 +47,23 @@ sketches = [
     "coffee.gif",
     "penguin.gif",
 ]
-sketches_base64_strings = {
-    name: get_base64_encoded_gif(f"sketches/{name}") for name in sketches
-}
 if not pre_generate:
-    print("Downloading pre-generated videos from google drive.")
-    # Download from gdown 1oT6zw1EbWg3lavBMXsL28piULGNmqJzA
-    gdown.download(
-        "https://drive.google.com/uc?id=1oT6zw1EbWg3lavBMXsL28piULGNmqJzA",
-        str(video_cache_dir / "gdrive_file.zip"),
-        quiet=False,
-    )
-    # Unzip the file to video_cache_dir
-    unzip_file(str(video_cache_dir / "gdrive_file.zip"))
 else:
     pregenerate_videos(video_cache_dir=video_cache_dir)
     print("Videos cached.")
@@ -143,14 +142,21 @@ def demo(Dataset, Model):
 with gr.Blocks() as app:
     gr.HTML(org_content)
-    gr.Markdown(
-        "# InkSight: Offline-to-Online Handwriting Conversion by Learning to Read and Write"
-    )
     gr.HTML(
         """
-        <div style="display: flex; align-items: center; margin-bottom: 20px;">
-            <a href="https://arxiv.org/pdf/2402.05804.pdf" target="_blank" style="font-size: 16px; background-color: #4CAF50; color: white; padding: 5px 7px; text-decoration: none; border-radius: 2px;">
-                📄 Read the Paper
             </a>
         </div>
         """
@@ -163,9 +169,7 @@ with gr.Blocks() as app:
         """
     )
     with gr.Row():
-        dataset = gr.Dropdown(
-            ["IAM", "IMGUR5K", "HierText"], label="Dataset", value="IAM"
-        )
         model = gr.Dropdown(
             ["Small-i", "Large-i", "Small-p"],
             label="InkSight Model Variant",
@@ -179,18 +183,12 @@ with gr.Blocks() as app:
     #     vanilla_img = gr.Image(label="Vanilla")
     with gr.Row():
-        d_t_text = gr.Textbox(
-            label="OCR recognition input to the model", interactive=False
-        )
         r_d_text = gr.Textbox(label="Recognition from the model", interactive=False)
         vanilla_text = gr.Textbox(label="Vanilla", interactive=False)
     with gr.Row():
-        d_t_vid = gr.Video(
-            label="Derender with Text (Click to stop/play)", autoplay=True
-        )
-        r_d_vid = gr.Video(
-            label="Recognize and Derender (Click to stop/play)", autoplay=True
-        )
         vanilla_vid = gr.Video(label="Vanilla (Click to stop/play)", autoplay=True)
     with gr.Row():

     "you",
     "letter",
 ]
+gif_base64_strings = {caption: get_base64_encoded_gif(f"gifs/{name}") for caption, name in zip(captions, gif_filenames)}
 sketches = [
     "bird.gif",
     "coffee.gif",
     "penguin.gif",
 ]
+sketches_base64_strings = {name: get_base64_encoded_gif(f"sketches/{name}") for name in sketches}
 if not pre_generate:
+    # Check if the file already exists
+    if not (video_cache_dir / "gdrive_file.zip").exists():
+        print("Downloading pre-generated videos from Google Drive.")
+        # Download from Google Drive using gdown
+        gdown.download(
+            "https://drive.google.com/uc?id=1oT6zw1EbWg3lavBMXsL28piULGNmqJzA",
+            str(video_cache_dir / "gdrive_file.zip"),
+            quiet=False,
+        )
+        # Unzip the file to video_cache_dir
+        unzip_file(str(video_cache_dir / "gdrive_file.zip"))
+    else:
+        print("File already exists. Skipping download.")
 else:
     pregenerate_videos(video_cache_dir=video_cache_dir)
     print("Videos cached.")
 with gr.Blocks() as app:
     gr.HTML(org_content)
+    gr.Markdown("# InkSight: Offline-to-Online Handwriting Conversion by Learning to Read and Write")
     gr.HTML(
         """
+        <div style="display: flex; gap: 10px; justify-content: left;">
+            <a href="https://arxiv.org/abs/2402.05804">
+                <img src="https://img.shields.io/badge/📄_Read_the_Paper-4CAF50?style=for-the-badge&logo=arxiv&logoColor=white" alt="Read the Paper">
+            </a>
+            <a href="https://github.com/google-research/inksight">
+            <img src="https://img.shields.io/badge/View_on_GitHub-181717?style=for-the-badge&logo=github&logoColor=white" alt="View on GitHub">
+            </a>
+            <a href="https://research.google/blog/a-return-to-hand-written-notes-by-learning-to-read-write/">
+                <img src="https://img.shields.io/badge/🌐_Google_Research_Blog-333333?style=for-the-badge&logo=google&logoColor=white" alt="Google Research Blog">
+            </a>
+            <a href="https://charlieleee.github.io/publication/inksight/">
+                <img src="https://img.shields.io/badge/ℹ️_Info-FFA500?style=for-the-badge&logo=info&logoColor=white" alt="Info">
             </a>
         </div>
         """
         """
     )
     with gr.Row():
+        dataset = gr.Dropdown(["IAM", "IMGUR5K", "HierText"], label="Dataset", value="IAM")
         model = gr.Dropdown(
             ["Small-i", "Large-i", "Small-p"],
             label="InkSight Model Variant",
     #     vanilla_img = gr.Image(label="Vanilla")
     with gr.Row():
+        d_t_text = gr.Textbox(label="OCR recognition input to the model", interactive=False)
         r_d_text = gr.Textbox(label="Recognition from the model", interactive=False)
         vanilla_text = gr.Textbox(label="Vanilla", interactive=False)
     with gr.Row():
+        d_t_vid = gr.Video(label="Derender with Text (Click to stop/play)", autoplay=True)
+        r_d_vid = gr.Video(label="Recognize and Derender (Click to stop/play)", autoplay=True)
         vanilla_vid = gr.Video(label="Vanilla (Click to stop/play)", autoplay=True)
     with gr.Row():

utils.py CHANGED Viewed

@@ -32,6 +32,8 @@ def get_svg_content(svg_path):
 def download_file(url, filename):
     response = requests.get(url)
     with open(filename, "wb") as f:
         f.write(response.content)
@@ -84,22 +86,15 @@ def plot_ink(ink, ax, lw=1.8, input_image=None, with_path=True, path_color="whit
         base_color = base_colors(len(ink.strokes) - 1 - i)
         hsv_color = colorsys.rgb_to_hsv(*base_color[:3])
-        darker_color = colorsys.hsv_to_rgb(
-            hsv_color[0], hsv_color[1], max(0, hsv_color[2] * 0.65)
-        )
-        colors = [
-            mcolors.to_rgba(darker_color, alpha=1 - (0.5 * j / len(x)))
-            for j in range(len(x))
-        ]
         points = np.array([x, y]).T.reshape(-1, 1, 2)
         segments = np.concatenate([points[:-1], points[1:]], axis=1)
         lc = LineCollection(segments, colors=colors, linewidth=lw)
         if with_path:
-            lc.set_path_effects(
-                [withStroke(linewidth=lw * 1.25, foreground=path_color)]
-            )
         ax.add_collection(lc)
     ax.set_xlim(0, 224)
@@ -107,9 +102,7 @@ def plot_ink(ink, ax, lw=1.8, input_image=None, with_path=True, path_color="whit
     ax.invert_yaxis()
-def plot_ink_to_video(
-    ink, output_name, lw=1.8, input_image=None, path_color="white", fps=30
-):
     fig, ax = plt.subplots(figsize=(4, 4), dpi=150)
     if input_image is not None:
@@ -143,26 +136,16 @@ def plot_ink_to_video(
             base_color = base_colors(len(ink.strokes) - 1 - stroke_index)
             hsv_color = colorsys.rgb_to_hsv(*base_color[:3])
-            darker_color = colorsys.hsv_to_rgb(
-                hsv_color[0], hsv_color[1], max(0, hsv_color[2] * 0.65)
-            )
-            visible_segments = (
-                segments[: frame - points_drawn]
-                if frame - points_drawn < len(segments)
-                else segments
-            )
             colors = [
-                mcolors.to_rgba(
-                    darker_color, alpha=1 - (0.5 * j / len(visible_segments))
-                )
                 for j in range(len(visible_segments))
             ]
             if len(visible_segments) > 0:
                 lc = LineCollection(visible_segments, colors=colors, linewidth=lw)
-                lc.set_path_effects(
-                    [withStroke(linewidth=lw * 1.25, foreground=path_color)]
-                )
                 ax.add_collection(lc)
             points_drawn += len(segments)
@@ -254,13 +237,9 @@ def pregenerate_videos(video_cache_dir):
                 if not os.path.exists(path):
                     continue
                 samples = os.listdir(path)
-                for name in tqdm(
-                    samples, desc=f"Generating {Model}-{Dataset}-{mode} videos"
-                ):
                     example_id = name.strip(".png")
-                    inkml_file = os.path.join(
-                        inkml_path_base, mode, f"{example_id}.inkml"
-                    )
                     if not os.path.exists(inkml_file):
                         continue
                     video_filename = f"{Model}_{Dataset}_{mode}_{example_id}.mp4"

 def download_file(url, filename):
+    if os.path.exists(filename):
+        return
     response = requests.get(url)
     with open(filename, "wb") as f:
         f.write(response.content)
         base_color = base_colors(len(ink.strokes) - 1 - i)
         hsv_color = colorsys.rgb_to_hsv(*base_color[:3])
+        darker_color = colorsys.hsv_to_rgb(hsv_color[0], hsv_color[1], max(0, hsv_color[2] * 0.65))
+        colors = [mcolors.to_rgba(darker_color, alpha=1 - (0.5 * j / len(x))) for j in range(len(x))]
         points = np.array([x, y]).T.reshape(-1, 1, 2)
         segments = np.concatenate([points[:-1], points[1:]], axis=1)
         lc = LineCollection(segments, colors=colors, linewidth=lw)
         if with_path:
+            lc.set_path_effects([withStroke(linewidth=lw * 1.25, foreground=path_color)])
         ax.add_collection(lc)
     ax.set_xlim(0, 224)
     ax.invert_yaxis()
+def plot_ink_to_video(ink, output_name, lw=1.8, input_image=None, path_color="white", fps=30):
     fig, ax = plt.subplots(figsize=(4, 4), dpi=150)
     if input_image is not None:
             base_color = base_colors(len(ink.strokes) - 1 - stroke_index)
             hsv_color = colorsys.rgb_to_hsv(*base_color[:3])
+            darker_color = colorsys.hsv_to_rgb(hsv_color[0], hsv_color[1], max(0, hsv_color[2] * 0.65))
+            visible_segments = segments[: frame - points_drawn] if frame - points_drawn < len(segments) else segments
             colors = [
+                mcolors.to_rgba(darker_color, alpha=1 - (0.5 * j / len(visible_segments)))
                 for j in range(len(visible_segments))
             ]
             if len(visible_segments) > 0:
                 lc = LineCollection(visible_segments, colors=colors, linewidth=lw)
+                lc.set_path_effects([withStroke(linewidth=lw * 1.25, foreground=path_color)])
                 ax.add_collection(lc)
             points_drawn += len(segments)
                 if not os.path.exists(path):
                     continue
                 samples = os.listdir(path)
+                for name in tqdm(samples, desc=f"Generating {Model}-{Dataset}-{mode} videos"):
                     example_id = name.strip(".png")
+                    inkml_file = os.path.join(inkml_path_base, mode, f"{example_id}.inkml")
                     if not os.path.exists(inkml_file):
                         continue
                     video_filename = f"{Model}_{Dataset}_{mode}_{example_id}.mp4"