Spaces:
Runtime error
Runtime error
added offline files
Browse files
app.py
CHANGED
|
@@ -202,6 +202,25 @@ def topic_distribution(weights, smoothing=0.01):
|
|
| 202 |
# for category in categories_sorted:
|
| 203 |
# return(topic_frequencies[category], topic_frequencies_spotlight[category], topic_ratios[category], category)
|
| 204 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
|
| 206 |
if __name__ == "__main__":
|
| 207 |
### STREAMLIT APP CONGFIG ###
|
|
@@ -224,26 +243,10 @@ if __name__ == "__main__":
|
|
| 224 |
["distilbert-base-uncased-finetuned-sst-2-english",
|
| 225 |
"albert-base-v2-yelp-polarity"],
|
| 226 |
)
|
| 227 |
-
### LOAD DATA AND SESSION VARIABLES ###
|
| 228 |
-
data_df = pd.read_parquet('./assets/data/'+dataset+ '_'+ model+'.parquet')
|
| 229 |
-
if model == 'albert-base-v2-yelp-polarity':
|
| 230 |
-
tokenizer = AutoTokenizer.from_pretrained('textattack/'+model)
|
| 231 |
-
else:
|
| 232 |
-
tokenizer = AutoTokenizer.from_pretrained(model)
|
| 233 |
-
if "user_data" not in st.session_state:
|
| 234 |
-
st.session_state["user_data"] = data_df
|
| 235 |
-
if "selected_slice" not in st.session_state:
|
| 236 |
-
st.session_state["selected_slice"] = None
|
| 237 |
-
|
| 238 |
-
loss_quantile = st.sidebar.slider(
|
| 239 |
-
"Loss Quantile", min_value=0.5, max_value=1.0,step=0.01,value=0.95
|
| 240 |
-
)
|
| 241 |
-
data_df['loss'] = data_df['loss'].astype(float)
|
| 242 |
-
losses = data_df['loss']
|
| 243 |
-
high_loss = losses.quantile(loss_quantile)
|
| 244 |
-
data_df['slice'] = 'high-loss'
|
| 245 |
-
data_df['slice'] = data_df['slice'].where(data_df['loss'] > high_loss, 'low-loss')
|
| 246 |
|
|
|
|
|
|
|
|
|
|
| 247 |
with rcol:
|
| 248 |
with st.spinner(text='loading...'):
|
| 249 |
st.markdown('<h3>Word Distribution in Error Slice</h3>', unsafe_allow_html=True)
|
|
|
|
| 202 |
# for category in categories_sorted:
|
| 203 |
# return(topic_frequencies[category], topic_frequencies_spotlight[category], topic_ratios[category], category)
|
| 204 |
|
| 205 |
+
def populate_session(dataset,model):
|
| 206 |
+
data_df = pd.read_parquet('./assets/data/'+dataset+ '_'+ model+'.parquet')
|
| 207 |
+
if model == 'albert-base-v2-yelp-polarity':
|
| 208 |
+
tokenizer = AutoTokenizer.from_pretrained('textattack/'+model)
|
| 209 |
+
else:
|
| 210 |
+
tokenizer = AutoTokenizer.from_pretrained(model)
|
| 211 |
+
if "user_data" not in st.session_state:
|
| 212 |
+
st.session_state["user_data"] = data_df
|
| 213 |
+
if "selected_slice" not in st.session_state:
|
| 214 |
+
st.session_state["selected_slice"] = None
|
| 215 |
+
|
| 216 |
+
loss_quantile = st.sidebar.slider(
|
| 217 |
+
"Loss Quantile", min_value=0.5, max_value=1.0,step=0.01,value=0.95
|
| 218 |
+
)
|
| 219 |
+
data_df['loss'] = data_df['loss'].astype(float)
|
| 220 |
+
losses = data_df['loss']
|
| 221 |
+
high_loss = losses.quantile(loss_quantile)
|
| 222 |
+
data_df['slice'] = 'high-loss'
|
| 223 |
+
data_df['slice'] = data_df['slice'].where(data_df['loss'] > high_loss, 'low-loss')
|
| 224 |
|
| 225 |
if __name__ == "__main__":
|
| 226 |
### STREAMLIT APP CONGFIG ###
|
|
|
|
| 243 |
["distilbert-base-uncased-finetuned-sst-2-english",
|
| 244 |
"albert-base-v2-yelp-polarity"],
|
| 245 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
|
| 247 |
+
### LOAD DATA AND SESSION VARIABLES ###
|
| 248 |
+
##uncomment the next next line to run dynamically and not from file
|
| 249 |
+
#populate_session(dataset, model)
|
| 250 |
with rcol:
|
| 251 |
with st.spinner(text='loading...'):
|
| 252 |
st.markdown('<h3>Word Distribution in Error Slice</h3>', unsafe_allow_html=True)
|