Spaces:
Build error
Build error
More improvements
Browse files
app.py
CHANGED
|
@@ -2,10 +2,10 @@ from datetime import datetime
|
|
| 2 |
|
| 3 |
import streamlit as st
|
| 4 |
import pandas as pd
|
|
|
|
| 5 |
import matplotlib.pyplot as plt
|
| 6 |
|
| 7 |
# from load_dataframe import get_data
|
| 8 |
-
from urllib.parse import quote
|
| 9 |
|
| 10 |
|
| 11 |
def aggregated_data(df, aggregation_level="week"):
|
|
@@ -25,10 +25,17 @@ def aggregated_data(df, aggregation_level="week"):
|
|
| 25 |
|
| 26 |
# Calculate the growth rate
|
| 27 |
growth_rate = percentage_papers_with_artifacts.pct_change() * 100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
| 32 |
|
| 33 |
# Create the plot
|
| 34 |
plt.figure(figsize=(12, 6))
|
|
@@ -53,8 +60,9 @@ def display_data(df):
|
|
| 53 |
percentage_of_at_least_one_artifact = num_artifacts / df.shape[0] if df.shape[0] > 0 else 0
|
| 54 |
percentage_of_at_least_one_artifact = round(percentage_of_at_least_one_artifact * 100, 2)
|
| 55 |
|
| 56 |
-
# add reached out
|
| 57 |
df['reached_out'] = [False for _ in range(df.shape[0])]
|
|
|
|
| 58 |
|
| 59 |
st.markdown(f"""
|
| 60 |
## {percentage_of_at_least_one_artifact}% papers with at least one 🤗 artifact
|
|
@@ -67,7 +75,7 @@ def display_data(df):
|
|
| 67 |
st.write("Papers with at least one artifact")
|
| 68 |
st.data_editor(df[df['has_artifact']],
|
| 69 |
hide_index=True,
|
| 70 |
-
column_order=("reached_out", "paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"),
|
| 71 |
column_config={"github": st.column_config.LinkColumn(),
|
| 72 |
"paper_page": st.column_config.LinkColumn(),
|
| 73 |
"paper_page_with_title": st.column_config.LinkColumn(display_text=r'\|(.*)')},
|
|
@@ -77,7 +85,7 @@ def display_data(df):
|
|
| 77 |
st.write("Papers without artifacts")
|
| 78 |
st.data_editor(df[~df['has_artifact']],
|
| 79 |
hide_index=True,
|
| 80 |
-
column_order=("reached_out", "paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"),
|
| 81 |
column_config={"github": st.column_config.LinkColumn(),
|
| 82 |
"paper_page": st.column_config.LinkColumn()},
|
| 83 |
width=2000,
|
|
@@ -86,7 +94,7 @@ def display_data(df):
|
|
| 86 |
st.write("Papers with a HF mention in README but no artifacts")
|
| 87 |
st.data_editor(df[(df['hf_mention'] == 1) & (~df['has_artifact'])],
|
| 88 |
hide_index=True,
|
| 89 |
-
column_order=("reached_out", "paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"),
|
| 90 |
column_config={"github": st.column_config.LinkColumn(),
|
| 91 |
"paper_page": st.column_config.LinkColumn()},
|
| 92 |
width=2000,
|
|
@@ -109,9 +117,6 @@ def main():
|
|
| 109 |
df.index = pd.to_datetime(df.index)
|
| 110 |
df = df.sort_index()
|
| 111 |
|
| 112 |
-
# hack: include title in URL column
|
| 113 |
-
df['updated_url'] = df.apply(lambda row: f'{row["paper_page"]}/title/{quote(row["title"])}', axis=1)
|
| 114 |
-
|
| 115 |
if selection == "Daily/weekly/monthly data":
|
| 116 |
# Button to select day, month or week
|
| 117 |
# Add streamlit selectbox.
|
|
@@ -123,8 +128,6 @@ def main():
|
|
| 123 |
# convert to the day of a Pandas Timestamp
|
| 124 |
day = pd.Timestamp(day)
|
| 125 |
|
| 126 |
-
print("Day:", day)
|
| 127 |
-
|
| 128 |
df = df[df.index.date == day.date()]
|
| 129 |
|
| 130 |
st.write(f"Showing data for {day.day_name()} {day.strftime('%d/%m/%Y')}")
|
|
@@ -173,9 +176,6 @@ def main():
|
|
| 173 |
else:
|
| 174 |
st.write("Error: selection not recognized")
|
| 175 |
|
| 176 |
-
# Display data based on aggregation level
|
| 177 |
-
|
| 178 |
-
|
| 179 |
|
| 180 |
if __name__ == "__main__":
|
| 181 |
main()
|
|
|
|
| 2 |
|
| 3 |
import streamlit as st
|
| 4 |
import pandas as pd
|
| 5 |
+
import numpy as np
|
| 6 |
import matplotlib.pyplot as plt
|
| 7 |
|
| 8 |
# from load_dataframe import get_data
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
def aggregated_data(df, aggregation_level="week"):
|
|
|
|
| 25 |
|
| 26 |
# Calculate the growth rate
|
| 27 |
growth_rate = percentage_papers_with_artifacts.pct_change() * 100
|
| 28 |
+
|
| 29 |
+
print("Type of growth rate:", growth_rate)
|
| 30 |
+
print("Growth rate:", type(growth_rate))
|
| 31 |
+
|
| 32 |
+
# growth_rate = growth_rate.dropna()
|
| 33 |
|
| 34 |
+
print("Growht rate after removing nan:", growth_rate)
|
| 35 |
+
|
| 36 |
+
# Display the average growth rate as a big number
|
| 37 |
+
average_growth_rate = growth_rate.mean()
|
| 38 |
+
st.metric(label=f"{aggregation_level.capitalize()}ly average Growth Rate", value=f"{average_growth_rate:.2f}%")
|
| 39 |
|
| 40 |
# Create the plot
|
| 41 |
plt.figure(figsize=(12, 6))
|
|
|
|
| 60 |
percentage_of_at_least_one_artifact = num_artifacts / df.shape[0] if df.shape[0] > 0 else 0
|
| 61 |
percentage_of_at_least_one_artifact = round(percentage_of_at_least_one_artifact * 100, 2)
|
| 62 |
|
| 63 |
+
# add reached out and reached out link columns
|
| 64 |
df['reached_out'] = [False for _ in range(df.shape[0])]
|
| 65 |
+
df["reached_out_link"] = ["" for _ in range(df.shape[0])]
|
| 66 |
|
| 67 |
st.markdown(f"""
|
| 68 |
## {percentage_of_at_least_one_artifact}% papers with at least one 🤗 artifact
|
|
|
|
| 75 |
st.write("Papers with at least one artifact")
|
| 76 |
st.data_editor(df[df['has_artifact']],
|
| 77 |
hide_index=True,
|
| 78 |
+
column_order=("reached_out", "reached_out_link", "paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"),
|
| 79 |
column_config={"github": st.column_config.LinkColumn(),
|
| 80 |
"paper_page": st.column_config.LinkColumn(),
|
| 81 |
"paper_page_with_title": st.column_config.LinkColumn(display_text=r'\|(.*)')},
|
|
|
|
| 85 |
st.write("Papers without artifacts")
|
| 86 |
st.data_editor(df[~df['has_artifact']],
|
| 87 |
hide_index=True,
|
| 88 |
+
column_order=("reached_out", "reached_out_link", "paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"),
|
| 89 |
column_config={"github": st.column_config.LinkColumn(),
|
| 90 |
"paper_page": st.column_config.LinkColumn()},
|
| 91 |
width=2000,
|
|
|
|
| 94 |
st.write("Papers with a HF mention in README but no artifacts")
|
| 95 |
st.data_editor(df[(df['hf_mention'] == 1) & (~df['has_artifact'])],
|
| 96 |
hide_index=True,
|
| 97 |
+
column_order=("reached_out", "reached_out_link", "paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"),
|
| 98 |
column_config={"github": st.column_config.LinkColumn(),
|
| 99 |
"paper_page": st.column_config.LinkColumn()},
|
| 100 |
width=2000,
|
|
|
|
| 117 |
df.index = pd.to_datetime(df.index)
|
| 118 |
df = df.sort_index()
|
| 119 |
|
|
|
|
|
|
|
|
|
|
| 120 |
if selection == "Daily/weekly/monthly data":
|
| 121 |
# Button to select day, month or week
|
| 122 |
# Add streamlit selectbox.
|
|
|
|
| 128 |
# convert to the day of a Pandas Timestamp
|
| 129 |
day = pd.Timestamp(day)
|
| 130 |
|
|
|
|
|
|
|
| 131 |
df = df[df.index.date == day.date()]
|
| 132 |
|
| 133 |
st.write(f"Showing data for {day.day_name()} {day.strftime('%d/%m/%Y')}")
|
|
|
|
| 176 |
else:
|
| 177 |
st.write("Error: selection not recognized")
|
| 178 |
|
|
|
|
|
|
|
|
|
|
| 179 |
|
| 180 |
if __name__ == "__main__":
|
| 181 |
main()
|