Spaces:

Pixeltable
/

Text-image-similarity-search-on-video-frames-embedding-indexes

Runtime error

App Files Files Community

PierreBrunelle commited on Apr 1

Commit

214ca3f

verified ·

1 Parent(s): 36f43f4

Update app.py

Browse files

Files changed (1) hide show

app.py +161 -80

app.py CHANGED Viewed

@@ -2,117 +2,187 @@ import gradio as gr
 import pixeltable as pxt
 from pixeltable.functions.huggingface import clip_image, clip_text
 from pixeltable.iterators import FrameIterator
-import PIL.Image
 import os
 # Process video and create index
 def process_video(video_file, progress=gr.Progress()):
-    progress(0, desc="Initializing...")
-    # Pixeltable setup
-    pxt.drop_dir('video_search', force=True)
-    pxt.create_dir('video_search')
-    # Update type declaration to use simpler syntax
-    video_table = pxt.create_table('video_search.videos', {'video': pxt.Video})
-    frames_view = pxt.create_view(
-        'video_search.frames',
-        video_table,
-        iterator=FrameIterator.create(video=video_table.video, fps=1)
-    )
-    progress(0.2, desc="Inserting video...")
-    video_table.insert([{'video': video_file.name}])
-    progress(0.4, desc="Creating embedding index...")
-    # Updated embedding pattern using .using()
-    frames_view.add_embedding_index(
-        'frame',
-        string_embed=clip_text.using(model_id='openai/clip-vit-base-patch32'),
-        image_embed=clip_image.using(model_id='openai/clip-vit-base-patch32')
-    )
-    progress(1.0, desc="Processing complete")
-    return "Good news! Your video has been processed. Easily find the moments you need by searching with text or images."
 # Perform similarity search
 def similarity_search(query, search_type, num_results, progress=gr.Progress()):
-    frames_view = pxt.get_table('video_search.frames')
-    progress(0.5, desc="Performing search...")
-    if search_type == "Text":
-        sim = frames_view.frame.similarity(query)
-    else:  # Image search
         sim = frames_view.frame.similarity(query)
-    results = frames_view.order_by(sim, asc=False).limit(num_results).select(frames_view.frame, sim=sim).collect()
-    progress(1.0, desc="Search complete")
-    return [row['frame'] for row in results]
 # Gradio interface
-with gr.Blocks(theme=gr.themes.Base()) as demo:
-    gr.Markdown(
         """
-        <div style= margin-bottom: 20px;">
-            <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/resources/pixeltable-logo-large.png" alt="Pixeltable" style="max-width: 150px;" />
-            <h2>Text and Image similarity search on video frames with embedding indexes</h2>
         </div>
         """
     )
-    gr.HTML(
-    """
-    <p>
-        <a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #F25022; text-decoration: none; font-weight: bold;">Pixeltable</a> is a declarative interface for working with text, images, embeddings, and even video, enabling you to store, transform, index, and iterate on data.
-    </p>
-    """
-    )
     with gr.Row():
         with gr.Column(scale=1):
-            gr.Markdown(
-            """
-            <h3>1. Insert video</h3>
-            """)
-            video_file = gr.File(label="Upload Video")
-            process_button = gr.Button("Process Video")
             process_output = gr.Textbox(label="Status", lines=2)
-            gr.Markdown(
-            """
-            <h3>2. Search video frames</h3>
-            """)
-            search_type = gr.Radio(["Text", "Image"], label="Search Type", value="Text")
-            text_input = gr.Textbox(label="Text Query")
-            image_input = gr.Image(label="Image Query", type="pil", visible=False)
-            num_results = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Number of Results")
-            search_button = gr.Button("Search")
         with gr.Column(scale=2):
-            gr.Markdown(
-            """
-            <h3>3. Visualize results</h3>
-            """)
-            results_gallery = gr.Gallery(label="Search Results", columns=3)
-            gr.Examples(
-            examples=[
-                ["bangkok.mp4"],
-                ["lotr.mp4"],
-                ["mi.mp4"],
-            ],
-            label="Click one of the examples below to get started",
-            inputs=[video_file],
-            fn=process_video
             )
     def update_search_input(choice):
         return gr.update(visible=choice=="Text"), gr.update(visible=choice=="Image")
@@ -126,6 +196,9 @@ with gr.Blocks(theme=gr.themes.Base()) as demo:
     def perform_search(search_type, text_query, image_query, num_results):
         query = text_query if search_type == "Text" else image_query
         return similarity_search(query, search_type, num_results)
     search_button.click(
@@ -134,5 +207,13 @@ with gr.Blocks(theme=gr.themes.Base()) as demo:
         outputs=[results_gallery]
     )
 if __name__ == "__main__":
     demo.launch()

 import pixeltable as pxt
 from pixeltable.functions.huggingface import clip_image, clip_text
 from pixeltable.iterators import FrameIterator
 import os
+import logging
+# Set up logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+# Initialize Pixeltable directory constants
+PROJECT_DIR = 'video_search'
+VIDEOS_TABLE = f'{PROJECT_DIR}.videos'
+FRAMES_VIEW = f'{PROJECT_DIR}.frames'
 # Process video and create index
 def process_video(video_file, progress=gr.Progress()):
+    if video_file is None:
+        return "Please upload a video file first."
+    try:
+        progress(0, desc="Initializing...")
+        logger.info(f"Processing video: {video_file.name}")
+        # Pixeltable setup
+        pxt.drop_dir(PROJECT_DIR, force=True)
+        pxt.create_dir(PROJECT_DIR)
+        # Create video table
+        video_table = pxt.create_table(VIDEOS_TABLE, {'video': pxt.Video})
+        # Create frames view
+        frames_view = pxt.create_view(
+            FRAMES_VIEW,
+            video_table,
+            iterator=FrameIterator.create(video=video_table.video, fps=1)
+        )
+        progress(0.2, desc="Inserting video...")
+        video_table.insert([{'video': video_file.name}])
+        progress(0.4, desc="Creating embedding index...")
+        # Use the CLIP model for both text and image embedding
+        clip_model = 'openai/clip-vit-base-patch32'
+        frames_view.add_embedding_index(
+            'frame',
+            string_embed=clip_text.using(model_id=clip_model),
+            image_embed=clip_image.using(model_id=clip_model)
+        )
+        progress(1.0, desc="Processing complete")
+        return "✅ Video processed successfully! You can now search for specific moments using text or images."
+    except Exception as e:
+        logger.error(f"Error processing video: {str(e)}")
+        return f"Error processing video: {str(e)}"
 # Perform similarity search
 def similarity_search(query, search_type, num_results, progress=gr.Progress()):
+    try:
+        if not query:
+            return []
+        frames_view = pxt.get_table(FRAMES_VIEW)
+        if frames_view is None:
+            return []
+        progress(0.5, desc="Performing search...")
         sim = frames_view.frame.similarity(query)
+        results = frames_view.order_by(sim, asc=False).limit(num_results).select(
+            frames_view.frame,
+            similarity=sim
+        ).collect()
+        progress(1.0, desc="Search complete")
+        return [row['frame'] for row in results]
+    except Exception as e:
+        logger.error(f"Error during search: {str(e)}")
+        return []
+# Create CSS for better styling
+css = """
+.container {
+    max-width: 1200px;
+    margin: 0 auto;
+}
+.header {
+    display: flex;
+    align-items: center;
+    margin-bottom: 20px;
+}
+.header img {
+    max-width: 120px;
+    margin-right: 20px;
+}
+.step-header {
+    background-color: #f5f5f5;
+    padding: 10px;
+    border-radius: 5px;
+    margin-bottom: 15px;
+}
+.examples-section {
+    margin-top: 30px;
+}
+"""
 # Gradio interface
+with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
+    gr.HTML(
         """
+        <div class="header">
+            <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/resources/pixeltable-logo-large.png" alt="Pixeltable" />
+            <div>
+                <h1>Video Frame Search with AI</h1>
+                <p>Search through video content using natural language or images powered by <a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #F25022; text-decoration: none; font-weight: bold;">Pixeltable</a>.</p>
+            </div>
         </div>
         """
     )
     with gr.Row():
         with gr.Column(scale=1):
+            gr.HTML('<div class="step-header"><h3>1. Insert video</h3></div>')
+            video_file = gr.File(label="Upload Video", file_types=["video"])
+            process_button = gr.Button("Process Video", variant="primary")
             process_output = gr.Textbox(label="Status", lines=2)
+            gr.HTML('<div class="step-header"><h3>2. Search video frames</h3></div>')
+            search_type = gr.Radio(
+                ["Text", "Image"],
+                label="Search Type",
+                value="Text",
+                info="Choose whether to search using text or an image"
+            )
+            text_input = gr.Textbox(
+                label="Text Query",
+                placeholder="Describe what you're looking for...",
+                info="Example: 'person walking' or 'red car'"
+            )
+            image_input = gr.Image(
+                label="Image Query",
+                type="pil",
+                visible=False,
+                info="Upload an image to find similar frames"
+            )
+            num_results = gr.Slider(
+                minimum=1,
+                maximum=20,
+                value=5,
+                step=1,
+                label="Number of Results",
+                info="How many matching frames to display"
+            )
+            search_button = gr.Button("Search", variant="primary")
         with gr.Column(scale=2):
+            gr.HTML('<div class="step-header"><h3>3. Visualize results</h3></div>')
+            results_gallery = gr.Gallery(
+                label="Search Results",
+                columns=3,
+                allow_preview=True,
+                object_fit="contain"
             )
+            with gr.Accordion("Example Videos", open=False):
+                gr.Markdown("Click one of the examples below to get started")
+                gr.Examples(
+                    examples=[
+                        ["bangkok.mp4"],
+                        ["lotr.mp4"],
+                        ["mi.mp4"],
+                    ],
+                    inputs=[video_file],
+                    outputs=[process_output],
+                    fn=process_video,
+                    cache_examples=True
+                )
+    # Handle UI interactions
     def update_search_input(choice):
         return gr.update(visible=choice=="Text"), gr.update(visible=choice=="Image")
     def perform_search(search_type, text_query, image_query, num_results):
         query = text_query if search_type == "Text" else image_query
+        if query is None or (isinstance(query, str) and query.strip() == ""):
+            return gr.Gallery(label="Please enter a valid search query")
         return similarity_search(query, search_type, num_results)
     search_button.click(
         outputs=[results_gallery]
     )
+    # Add keyboard shortcuts
+    search_type.change(lambda: None, None, None, _js="() => {document.activeElement.blur();}")
+    text_input.submit(
+        perform_search,
+        inputs=[search_type, text_input, image_input, num_results],
+        outputs=[results_gallery]
+    )
 if __name__ == "__main__":
     demo.launch()