Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import streamlit.components.v1 as stc | |
| import pandas as pd | |
| import numpy as np | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| from PIL import Image | |
| import exifread # Extracts Meta data of images | |
| import os | |
| from datetime import datetime | |
| import mutagen # Extracts Meta data of Audio | |
| from PIL.ExifTags import TAGS, GPSTAGS | |
| import base64 | |
| import time | |
| from PyPDF2 import PdfReader | |
| timestr = time.strftime("%Y%m%d-%H%M%S") | |
| details = """ | |
| Metadata is defined as the data providing information about one or more aspects of the data; it is used to summarize basic information about data which can make tracking and working with specific data easier | |
| """ | |
| HTML_BANNER = """ | |
| <div style="background-color:violet;padding:10px;border-radius:10px"> | |
| <h1 style="color:white;text-align:center;">MetaData Extractor App </h1> | |
| </div> | |
| """ | |
| def file_download(data): | |
| csv_file= data.to_csv() | |
| b64=base64.b64encode(csv_file.encode()).decode() | |
| new_filename="result_{}.csv".format(timestr) | |
| st.markdown('### ποΈ Download csv file ') | |
| href=f'<a href="data:file/csv;base64,{b64}" download="{new_filename}"> Click Here! </a>' | |
| st.markdown(href, unsafe_allow_html=True) | |
| def view_all_data(): | |
| c.execute('SELECT * FROM filestable') | |
| data = c.fetchall() | |
| return data | |
| def load_image(file): | |
| img = Image.open(file) | |
| return img | |
| def get_readable_time(time): | |
| return datetime.fromtimestamp(time).strftime('%Y-%m-%d-%H:%M') | |
| def get_exif(filename): | |
| exif = Image.open(filename).getexif() | |
| if exif is not None and isinstance(exif, dict): | |
| for key, value in exif.items(): | |
| name = TAGS.get(key, value) | |
| exif[name] = exif.pop(key) | |
| if 'GPSInfo' in exif: | |
| for key in exif['GPSInfo'].keys(): | |
| name = GPSTAGS.get(key,key) | |
| exif['GPSInfo'][name] = exif['GPSInfo'].pop(key) | |
| return exif | |
| def metadata(): | |
| # st.title('Meta-Data Extractor App') | |
| stc.html(HTML_BANNER) | |
| menu=['Home','Image','Audio','Document_Files'] | |
| choice=st.sidebar.selectbox('Menu',menu) | |
| if choice=='Home': | |
| st.image(load_image('extraction_process.png')) | |
| st.write(details) | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| with st.expander("Get Image Metadata π·"): | |
| st.info("Image Metadata") | |
| st.markdown("π·") | |
| st.text("Upload JPEG,JPG,PNG Images") | |
| with col2: | |
| with st.expander("Get Audio Metadata π"): | |
| st.info("Audio Metadata") | |
| st.markdown("π") | |
| st.text("Upload Mp3,Ogg") | |
| with col3: | |
| with st.expander("Get Document Metadata ππ"): | |
| st.info("Document Files Metadata") | |
| st.markdown("ππ") | |
| st.text("Upload PDF,Docx") | |
| elif choice=='Image': | |
| st.subheader('Image MetaData Extractor') | |
| image_file = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg"]) | |
| if image_file is not None: | |
| with st.expander('File Stats'): | |
| file_details={'Filename':image_file.name, | |
| 'Filesize':image_file.size, | |
| 'Filetype':image_file.type} | |
| statinfo=os.stat(image_file.readable()) | |
| statdetails={ | |
| 'Accessed Time': get_readable_time(statinfo.st_atime), | |
| 'Creation Time':get_readable_time(statinfo.st_ctime), | |
| 'Modified Time':get_readable_time(statinfo.st_mtime)} | |
| full_details={ | |
| 'Filename':image_file.name, | |
| 'Filesize':image_file.size, | |
| 'Filetype':image_file.type, | |
| 'Accessed Time': get_readable_time(statinfo.st_atime), | |
| 'Creation Time':get_readable_time(statinfo.st_ctime), | |
| 'Modified Time':get_readable_time(statinfo.st_mtime) | |
| } | |
| # st.write(full_details) | |
| file_details_df = pd.DataFrame( | |
| list(full_details.items()), columns=["Meta Tags", "Value"] | |
| ) | |
| st.dataframe(file_details_df) | |
| c1, c2 = st.columns(2) | |
| with c1: | |
| with st.expander("View Image"): | |
| img = load_image(image_file) | |
| st.image(img,width=250) | |
| with c2: | |
| with st.expander("Default(JPEG)"): | |
| st.info("Using PILLOW") | |
| img = load_image(image_file) | |
| img_details = { | |
| "format": img.format, | |
| "format_desc": img.format_description, | |
| "filename": img.filename, | |
| "size": img.size, | |
| "height": img.height, | |
| "width": img.width, | |
| "info": img.info, | |
| } | |
| df_img_details = pd.DataFrame( | |
| list(img_details.items()), columns=["Meta Tags", "Value"] | |
| ) | |
| st.dataframe(df_img_details) | |
| c3,c4=st.columns(2) | |
| with c3: | |
| with st.expander('Using ExifRead Tool'): | |
| meta_data=exifread.process_file(image_file) | |
| # st.write(meta_data) | |
| meta_data_df=pd.DataFrame( | |
| list(meta_data.items()),columns=['Meta Data','Values']) | |
| st.dataframe(meta_data_df) | |
| with c4: | |
| with st.expander('Image geo Coordinates'): | |
| img_gps_details=get_exif(image_file) | |
| latitude = img_gps_details.get('GPSLatitude') | |
| longitude = img_gps_details.get('GPSLongitude') | |
| try: | |
| gps_info = img_gps_details | |
| lat=latitude | |
| long=longitude | |
| except: | |
| gps_info = "None Found" | |
| st.write(gps_info) | |
| st.write(lat) | |
| st.write(long) | |
| with st.expander('Download Results'): | |
| final_df=pd.concat([file_details_df,df_img_details,meta_data_df]) | |
| st.dataframe(final_df) | |
| file_download(final_df) | |
| elif choice=='Audio': | |
| st.subheader('Audio MetaData Extractor') | |
| audio_file = st.file_uploader("Upload Audio", type=["mp3", "ogg"]) | |
| if audio_file is not None: | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.audio(audio_file.read()) | |
| with col2: | |
| with st.expander("File Stats"): | |
| file_details = { | |
| "FileName": audio_file.name, | |
| "FileSize": audio_file.size, | |
| "FileType": audio_file.type, | |
| } | |
| st.write(file_details) | |
| statinfo = os.stat(audio_file.readable()) | |
| stats_details = { | |
| "Accessed_Time": get_readable_time(statinfo.st_atime), | |
| "Creation_Time": get_readable_time(statinfo.st_ctime), | |
| "Modified_Time": get_readable_time(statinfo.st_mtime), | |
| } | |
| st.write(stats_details) | |
| file_details_combined = { | |
| "FileName": audio_file.name, | |
| "FileSize": audio_file.size, | |
| "FileType": audio_file.type, | |
| "Accessed_Time": get_readable_time(statinfo.st_atime), | |
| "Creation_Time": get_readable_time(statinfo.st_ctime), | |
| "Modified_Time": get_readable_time(statinfo.st_mtime), | |
| } | |
| df_file_details = pd.DataFrame( | |
| list(file_details_combined.items()), | |
| columns=["Meta Tags", "Value"], | |
| ) | |
| st.dataframe(df_file_details) | |
| with st.expander('Metadata using Mutagen'): | |
| meta_data=mutagen.File(audio_file) | |
| meta_data_dict={str(key):str(value) for key,value in meta_data.items()} | |
| meta_data_audio_df=pd.DataFrame( | |
| list(meta_data_dict.items()),columns=['Tag','Values']) | |
| st.dataframe(meta_data_audio_df) | |
| with st.expander("Download Results"): | |
| combined_df = pd.concat([df_file_details, meta_data_audio_df]) | |
| st.dataframe(combined_df) | |
| file_download(combined_df) | |
| elif choice=='Document_Files': | |
| st.subheader('Document MetaData Extractor') | |
| text_file = st.file_uploader("Upload File", type=["PDF"]) | |
| if text_file is not None: | |
| col1, col2 = st.columns([1, 2]) | |
| with col1: | |
| with st.expander("File Stats"): | |
| file_details = { | |
| "FileName": text_file.name, | |
| "FileSize": text_file.size, | |
| "FileType": text_file.type, | |
| } | |
| st.write(file_details) | |
| statinfo = os.stat(text_file.readable()) | |
| stats_details = { | |
| "Accessed_Time": get_readable_time(statinfo.st_atime), | |
| "Creation_Time": get_readable_time(statinfo.st_ctime), | |
| "Modified_Time": get_readable_time(statinfo.st_mtime), | |
| } | |
| st.write(stats_details) | |
| # Combine All Details | |
| file_details_combined = { | |
| "FileName": text_file.name, | |
| "FileSize": text_file.size, | |
| "FileType": text_file.type, | |
| "Accessed_Time": get_readable_time(statinfo.st_atime), | |
| "Creation_Time": get_readable_time(statinfo.st_ctime), | |
| "Modified_Time": get_readable_time(statinfo.st_mtime), | |
| } | |
| # Convert to DataFrame | |
| df_file_details = pd.DataFrame( | |
| list(file_details_combined.items()), | |
| columns=["Meta Tags", "Value"], | |
| ) | |
| with col2: | |
| with st.expander("Metadata"): | |
| pdf_file = PdfReader(text_file) | |
| pdf_info = pdf_file.metadata | |
| df_file_details_with_pdf = pd.DataFrame( | |
| list(pdf_info.items()), columns=["Meta Tags", "Value"] | |
| ) | |
| st.dataframe(df_file_details_with_pdf) | |
| with st.expander("Download Results"): | |
| pdf_combined_df = pd.concat([df_file_details, df_file_details_with_pdf]) | |
| st.dataframe(pdf_combined_df) | |
| file_download(pdf_combined_df) | |