Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -27,6 +27,39 @@ tokenizer_ner = AutoTokenizer.from_pretrained(model_name)
|
|
| 27 |
model_ner = AutoModelForTokenClassification.from_pretrained(model_name)
|
| 28 |
predict_ner = TokenClassificationPipeline(model=model_ner, tokenizer=tokenizer_ner)
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
def greet(name):
|
| 31 |
return "Hello " + name + "!!"
|
| 32 |
|
|
|
|
| 27 |
model_ner = AutoModelForTokenClassification.from_pretrained(model_name)
|
| 28 |
predict_ner = TokenClassificationPipeline(model=model_ner, tokenizer=tokenizer_ner)
|
| 29 |
|
| 30 |
+
def transcribe(audio_path):
|
| 31 |
+
|
| 32 |
+
speech_array, sampling_rate = librosa.load(audio_path, sr=16_000)
|
| 33 |
+
|
| 34 |
+
inputs = processor_asr(speech_array, sampling_rate=16_000, return_tensors="pt", padding=True)
|
| 35 |
+
|
| 36 |
+
with torch.no_grad():
|
| 37 |
+
logits = model_asr(inputs.input_values, attention_mask=inputs.attention_mask).logits
|
| 38 |
+
|
| 39 |
+
predicted_ids = torch.argmax(logits, dim=-1)
|
| 40 |
+
|
| 41 |
+
return processor_asr.batch_decode(predicted_ids)[0]
|
| 42 |
+
|
| 43 |
+
def getUniform(text):
|
| 44 |
+
|
| 45 |
+
idx = 0
|
| 46 |
+
res = {}
|
| 47 |
+
|
| 48 |
+
for t in text:
|
| 49 |
+
|
| 50 |
+
raw = t["entity"].replace("B-","").replace("I-","")
|
| 51 |
+
word = t["word"].replace("▁","")
|
| 52 |
+
|
| 53 |
+
if "B-" in t["entity"]:
|
| 54 |
+
res[f"{raw}|{idx}"] = [word]
|
| 55 |
+
idx += 1
|
| 56 |
+
else:
|
| 57 |
+
res[f"{raw}|{idx}"].append(word)
|
| 58 |
+
|
| 59 |
+
res = [(r.split("|")[0], res[r]) for r in res]
|
| 60 |
+
|
| 61 |
+
return res
|
| 62 |
+
|
| 63 |
def greet(name):
|
| 64 |
return "Hello " + name + "!!"
|
| 65 |
|