cc_audio_8 / examples /download_wav /step_1_download_wav.py
HoneyTian's picture
update
85abe14
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse
from datetime import datetime
from pathlib import Path
import pandas as pd
import requests
from tqdm import tqdm
from project_settings import project_path
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--output_dir",
default=(project_path / "data/make_analysis_excel/download_wav/20251204/").as_posix(),
type=str
)
args = parser.parse_args()
return args
call_id_str = """
c6840978-207e-43e0-b62e-4ad1a4269917
a6961fd4-19a2-4403-ac8a-8c1e2ebe714b
921a94e3-292a-4092-b532-a25aac195ed5
898d85e2-9d5a-450c-b1bd-e02b88d3f703
20a52fe5-b4f9-47b9-9af5-5670bbc77ec2
e4a3e15c-97e0-4cf5-858a-8cbd728d71e6
59cc7e56-f956-49f4-8004-6bdbf306161e
79e8d43f-3f17-484f-966b-681557a19dd9
ffba67b6-5ad5-437d-be71-23f2786b9364
4ce36ae6-b5d0-4108-ab6f-bf08ea6ba50c
3690e91b-282d-4a40-82bb-f7b4dad7bf32
0764293a-95ae-492f-b08e-9873450e417f
7f16a22d-2194-4656-bce3-ad6d39c7bee3
c7fb4444-f669-490a-898a-ddec20d02318
70e1c32f-0aaa-404d-a70e-4a19931174fb
9a03a1ee-5cce-4cfd-b479-096067484b13
a8934418-5626-4bf4-9525-7924d3413dc3
7ad6154a-5191-4ef0-a72e-717b62c5fd1f
204abc19-ff7d-4495-8969-2faa431a5efa
6adfceb3-d625-4036-bb3a-ef9db5f761bb
9aa34ed8-98ed-49bc-b8e6-8f98c7574d50
82817c32-6dfa-4622-aae3-71712fa2159b
13958016-250b-4c60-9b31-5c85df365cb6
aa560c1c-58d7-4e63-95b7-4a0045962dbc
e20c2660-d291-46fd-94a9-d9b554f2c305
cf63816f-3fe3-4b61-a6cd-7d640bb02372
e84f5e8b-4fff-4a22-bf3b-371ebb956156
73659b92-5fc2-4a73-8aeb-572ab492abea
ac9ab878-9f27-48e9-b2d5-1b212c85a8d4
70de5e15-2cbc-4371-9f08-fa3f46339254
11ae324f-8c43-4f65-bf68-761de7097099
0d5d7635-0c6f-48f9-beb7-509806f783ce
f84ca27e-cc70-4557-a9a0-39df493b5807
c065e5f6-0a59-417a-8a7a-df1deb769559
8a9d1668-a5c2-440f-820d-bf792ce3cd8b
de6450f6-9e92-48fe-8080-640cdffc4e00
8e31e1b7-fe5a-4aa3-abba-4ad42ddaf9f6
eb124d81-c0b5-4ddb-ad42-42163374d037
69b1f442-0b43-4b70-af47-585a2a1383bf
ea64fc33-32be-46eb-b211-8cd5e8b142a2
7c670953-bd32-4a1b-9fc8-e35b8d26229b
a47120e1-0c4f-419b-babe-ed8e0c89fb0f
7fba0c62-3bf5-4db8-8e8c-63cc15227d19
a0e00c7e-c35e-45db-9864-35588b89193f
6b0ff409-a3c9-45d3-9900-853e34d5dccb
ff288628-ac15-4039-8819-e69dbe4596cd
19d63f42-05e3-479a-8292-caccd26d32c4
af386af3-373a-4f6e-a093-a3d15b6afe77
14c78d26-5080-40d3-b1f6-3461e75f6598
95d85c9b-b7de-4b01-8c39-03102fa3248f
ec03d767-8286-42de-b481-f8105c5ad298
c22b61f2-ccae-4cd9-babd-650aabd86c59
01d931b2-5bcb-4b5d-ad4d-d441329fb79b
ce38a1b8-6b6c-4aae-886b-9c04e528527f
a3f8a853-c8fd-4d35-a2c6-b15ca1cb3ea5
56ec6fb2-29e5-4148-af7c-5a9b38f4d407
702a39b8-ae30-4d54-97c0-50158d2ab848
dff637d4-0862-4034-b552-a118ec57290a
d05aee2a-e8c2-4a00-8929-7dba26464339
939c6a18-e606-4af8-ab88-01e4e25664de
4a532921-e886-4f61-a2b6-46c0b0cbbde3
6317de3a-95b0-4ac3-83a5-2d70d445b0cc
0334372b-7af8-46b4-84db-3977f41520ba
0370b4af-ffcd-43b6-a852-7207fa1a992d
9b272724-c624-4972-91b8-54fade919640
cfd1b906-e977-4706-8a7a-183992ffe025
63cbb310-dddd-4c97-8f92-d8e5056b8550
61ba0fe6-cbb8-47f1-a020-4719807d7992
9251e484-c76f-4c33-b331-c9b88b6e7f4e
b6c7c47f-bebd-4efa-ae25-a1dc5cd99f30
1870396b-c4ba-4d83-9ecd-aabecc8ed203
af11ed20-9f70-49c6-93c9-c3dc5066f90f
2c14d303-8f1e-4663-8e56-96299bd06bc8
e6f8c638-07cb-4d30-b6f1-66f950e74c92
ec8ac7da-e090-40c0-a93c-cef10f96b6d6
7d0225ac-03c6-43dc-9e2e-b6203f40cd7c
c6b5c8a8-4339-420b-a643-79e1487a5d9e
e5c4411b-1294-475f-9d4a-2434e7ac14c4
867f37cb-a7a2-4caa-89d3-95557b58d8a4
532813ac-037f-4c2d-ad55-a16f24564157
9e66794d-43b1-41a1-97ef-42b1bb2a01b3
7678c113-56b0-4c5e-b14b-67b05b9b38a2
ce7c6a54-2d7c-4c02-8721-2c875d1fd062
49c6a88d-4913-4351-a2d3-4090e512819b
9e02d2c8-89f9-4721-b504-f29fd44d878a
f3d19980-ea20-4c2d-88ed-3b4712222998
3ba69f36-df6b-4e52-98ae-a652df403c4f
82743f14-26bb-4019-85d1-3ef5edc90454
05d10d13-69e8-438d-b65c-7cfbdafaca17
7eefc24b-673a-4b45-89d7-444f12846c93
0199dd38-de6b-4be3-80f7-cf4f170ef2f2
271494db-8a44-4f0f-9c2e-2dede59e03bf
54dcf802-6d3a-431e-b958-bfc8af7afb30
bdf48a65-3cad-4b3c-92f1-94c977486d98
b7336c2f-7aca-4a88-bfc5-4d188a6add83
010542f1-2767-4d7e-9969-79216a8d799d
18e644ee-6ca2-40bf-8b41-ca68f94c5fcb
9ef629ac-c190-45e7-bf69-b83a213b356c
313f22a9-afa8-4464-87c5-abcb5b2f32d7
49a17069-dfdc-4c9a-b84a-2079d04d833e
fdc64c2b-d233-4817-8792-d1caaf2c591a
741a6203-ac89-4061-9799-a51c5e4cfc49
8fba001c-59da-4b63-8a3d-6d8f56c26e0a
87719779-8355-43ce-9c78-746b060f0ca2
68fec1e9-fbb1-44fd-abaf-5659a5464fa6
ab26e325-9c7d-4aeb-88c0-898647896ef8
4d0f4d96-4c79-47a3-a8af-6481b463f5a6
85db8889-ffe8-4c1c-a879-83faf1e878e8
4114acf2-e508-420c-a868-26ab9aae2250
229b5d6f-4542-40f2-817a-d8218b073967
4ef0dd76-fb7a-4340-b44d-5347178df527
4e223d73-1f32-4a0b-b607-cea8e407c0d5
42bed8d9-4833-4c3c-8e43-50ec2ef6bdf2
99e18f68-4e84-48c2-991b-282992f51570
"""
def main():
args = get_args()
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
# finished
finished = set()
for filename in output_dir.glob("*.wav"):
name = filename.stem
finished.add(name)
print(f"finished count: {len(finished)}")
splits = call_id_str.strip().split("\n")
for call_id in splits:
call_id = str(call_id).strip()
record_url_early_media = f"https://record-prod.obs.la-south-2.myhuaweicloud.com/audio_corpus/callbot/es-MX/20251201/{call_id}_early_media.wav"
record_url_active_media = f"https://record-prod.obs.la-south-2.myhuaweicloud.com/audio_corpus/callbot/es-MX/20251201/{call_id}_active_media.wav"
for media_type, record_url in [("early_media", record_url_early_media), ("active_media", record_url_active_media)]:
name = f"{media_type}_{call_id}"
if name in finished:
continue
try:
print(f"record_url: {record_url}")
resp = requests.get(
url=record_url,
)
except (TimeoutError, requests.exceptions.ConnectionError):
print(f"record_url timeout: {record_url}")
continue
except Exception as e:
print(e)
continue
if resp.status_code == 404:
print(f"record_url not found: {record_url}")
continue
if resp.status_code != 200:
raise AssertionError("status_code: {}; text: {}".format(resp.status_code, resp.text))
filename = output_dir / f"{name}.wav"
with open(filename.as_posix(), "wb") as f:
f.write(resp.content)
return
if __name__ == "__main__":
main()