#!/usr/bin/python3 # -*- coding: utf-8 -*- import argparse from datetime import datetime from pathlib import Path import pandas as pd import requests from tqdm import tqdm from project_settings import project_path def get_args(): parser = argparse.ArgumentParser() parser.add_argument( "--output_dir", default=(project_path / "data/make_analysis_excel/download_wav/20251215/").as_posix(), type=str ) args = parser.parse_args() return args call_id_str = """ f54cb4af-79f4-418a-b6bc-8dab274a1508 5dc6b2d4-f764-4a8e-8e69-95602f8df873 888c236d-a948-4170-b7cb-1799f1f5e19c 7a7748c4-7b3c-4c85-85d0-709006ec8bde 91a97064-28ff-4158-9757-1e552005a1bc 9f3b57fe-61d3-427d-8e8f-a4eb78274cd6 1270eea1-f227-436f-9090-ea433bbdebe0 5e04b0a0-d411-4189-bd28-e1f064d8adfa 08de6a4a-907c-4b01-832e-3bd7b480c5cc f0406a5f-1fa5-4cfd-9fc7-7709e7892c95 518ceecd-c55d-4bd4-97ed-150c44526baf 7f71b548-4603-4e21-a481-0624e04eabcc 341788b8-1fe5-43ce-93fb-d76af0a5b5f2 b0680e2b-218e-4596-942b-705dcee762af 546165f0-7ce8-43c6-8a3d-021445ffe5fc ce86b7b1-3165-4213-8841-180c23d0a4e1 19c4fbb3-6dec-426a-9f60-a3d68f0e7de2 88ba5c51-69dd-4b10-a051-9f465ab4856f 27d66db7-58ee-4c6d-b15e-e0fec3ff2b3b 2582627e-eeb7-4ae3-af58-d68479b3f4da ae500a11-2ae0-4111-b509-525303ccdc53 dd7c7c7a-418d-48c6-8f5e-3fd36933d74d 3b56f4b2-3d5f-4097-9b17-0afd43c573c2 4bc00fb2-90c5-462f-ad1d-57124592b5b7 17d2f119-580d-483c-9a85-49d6e5b4beef 5aac0734-0b17-475a-b183-6fdff6c14139 ca5038f2-12e2-4cfb-96b1-91c54e9335f2 aba8750d-70fb-4883-a5ac-9a94906a4a08 a66a6550-7140-46ee-96f3-6e81fdf54c94 5ec615c6-2dce-4594-98dd-d35be9216e5d f2759f0f-079d-44b6-b4f2-9aa133740bf5 7848ae03-6e26-43f8-b27e-ad3fa983577a 0afb48e1-de17-4a45-87cd-af8ef0f3d2bd 0b49f875-1f4d-47d9-b58e-6c489a50bde3 e5025696-e07d-4ce2-937f-b16108df1bc7 9ebc8681-fa2a-49a0-a25f-e688dae6675b 1784f666-c6b5-4da2-8943-cb849da81e86 cb4067c6-586b-4601-afe2-d93b76dce364 8c04ddf8-c744-4c7d-9b36-2d16971dfcad 0b8063d0-3afb-45b5-8287-a8e46ab444d3 c68c2e49-c6a0-4834-8c1b-7767760d2496 af037ee2-aa00-44de-bea7-137458220a6f bcdd7960-5ef1-4d59-97f1-fccf3ca4ad0f a4dbeb2c-03b5-4b95-9af2-62446e65a5c7 aea52ba4-b313-4992-a4f3-cf257272f22e f42f26f7-c5ae-460b-bae6-8a34383b259c 26bd9584-4906-4aaf-8088-ba478a3e2fd6 99de2f01-b136-4e35-a47a-e5c6481a24cc 397efe24-821f-48bf-a80a-e98ddd0e0ad5 19e4b642-d155-47ad-a8e9-da80cbb42dad c0a473a7-d2e3-48d8-9d68-688e36f663cf a81da68d-5f05-4200-9934-0ba89d52b206 9dc5424d-28db-440d-9c1b-d399333fdd9d 00bdc9f3-43e2-4554-aed1-537e5992266e 16276b10-20fc-4bbf-9d9a-2d79effa8e8d e48e4af4-ac4e-4d9f-b8e1-1dd02b042c42 66979e24-ca94-41b1-b7c6-28203a15a063 49f6cc50-e378-4e43-8995-5217a9db9960 10e4a354-9e7a-4865-9d52-f10e035ffa6f 2061f58f-1d80-42c3-9948-260643e2f696 9d09ed54-68fa-4890-bf0b-4cde40215521 7a689faf-c536-4054-9403-f3280fe0edee c23b380c-bb01-42ca-bb2a-e4ce23453025 1b4b30b3-5b47-4e61-8c0d-baf73e3b14b4 0c90182a-bc16-4fb0-8548-6b04ef99ef3e 5b3f4684-dc12-4ee7-a642-9993a37c53b7 5bfa33d3-0f4b-4766-b590-5fac6d385d04 ae812760-7bb5-4dbf-ab82-53c612214510 57de9387-a732-485c-9cd3-40f649d51427 dcf4d47d-4565-4719-b930-ce329c5e314c 42081340-b1b9-40a0-ace4-726b6d2b5013 31fd412c-c423-4f88-873b-1e6963237bdf 0b4afe1f-6ee0-4b9b-9cd0-654c0d9092c5 33c964e8-60d8-4c15-b9b7-1cf079600f6f 4764430a-582a-4dec-aefd-c52a41f7bf63 5a557802-aaa6-4e68-b3fa-2a758b6df02c 307173ac-53c6-439c-a1a9-b209ecf0510c 40b606d9-999d-40c6-b54a-b43ce537befa d870c0e9-fe4f-457e-84c0-ed928cd2d08c acf22708-db3c-4b9a-8618-2b598659ff80 2920e680-b570-4e53-9835-72a48e36df85 76fc09cc-7a3f-4c86-b0ef-414514785683 430dddcc-59e3-4372-882a-99fabb89bde1 f30a13de-2615-45d4-8cc0-00c4a31a2bf0 6040cc90-a3bd-4886-938e-8f16c8b3888b afc3d8b9-80d0-425c-b35f-54afbde82f0f 877360d2-cbf5-4918-841f-f69e9612f309 04b4bf0e-a68f-40bb-83f9-37238a582a9e cc180a01-7b35-4fe7-9e50-add7743fb08a 19ca3431-e4a3-4c4e-a618-f5a2e15f3d72 17a58d61-c141-4cb1-9b74-a603206ff296 9465561c-60c0-4044-94a1-bc098c66a351 b9d4a8c6-f21b-4d6d-9d9e-a5cec3a65355 b5161d01-2fe4-495d-8064-1232c3f06281 d5cc3baa-dc30-4557-81da-051aabd570d7 """ def main(): args = get_args() output_dir = Path(args.output_dir) output_dir.mkdir(parents=True, exist_ok=True) # finished finished = set() for filename in output_dir.glob("*.wav"): name = filename.stem finished.add(name) print(f"finished count: {len(finished)}") splits = call_id_str.strip().split("\n") for call_id in splits: call_id = str(call_id).strip() record_url_early_media = f"https://record-prod.obs.la-south-2.myhuaweicloud.com/audio_corpus/callbot/es-MX/20251215/{call_id}_early_media.wav" record_url_active_media = f"https://record-prod.obs.la-south-2.myhuaweicloud.com/audio_corpus/callbot/es-MX/20251215/{call_id}_active_media.wav" for media_type, record_url in [("early_media", record_url_early_media), ("active_media", record_url_active_media)]: name = f"{media_type}_{call_id}" if name in finished: continue try: print(f"record_url: {record_url}") resp = requests.get( url=record_url, ) except (TimeoutError, requests.exceptions.ConnectionError): print(f"record_url timeout: {record_url}") continue except Exception as e: print(e) continue if resp.status_code == 404: print(f"record_url not found: {record_url}") continue if resp.status_code != 200: raise AssertionError("status_code: {}; text: {}".format(resp.status_code, resp.text)) filename = output_dir / f"{name}.wav" with open(filename.as_posix(), "wb") as f: f.write(resp.content) return if __name__ == "__main__": main()