Spaces:
Sleeping
Sleeping
| #!/usr/bin/python3 | |
| # -*- coding: utf-8 -*- | |
| import argparse | |
| from datetime import datetime | |
| from pathlib import Path | |
| import pandas as pd | |
| import requests | |
| from tqdm import tqdm | |
| from project_settings import project_path | |
| def get_args(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument( | |
| "--output_dir", | |
| default=(project_path / "data/make_analysis_excel/download_wav/20251204/").as_posix(), | |
| type=str | |
| ) | |
| args = parser.parse_args() | |
| return args | |
| call_id_str = """ | |
| c6840978-207e-43e0-b62e-4ad1a4269917 | |
| a6961fd4-19a2-4403-ac8a-8c1e2ebe714b | |
| 921a94e3-292a-4092-b532-a25aac195ed5 | |
| 898d85e2-9d5a-450c-b1bd-e02b88d3f703 | |
| 20a52fe5-b4f9-47b9-9af5-5670bbc77ec2 | |
| e4a3e15c-97e0-4cf5-858a-8cbd728d71e6 | |
| 59cc7e56-f956-49f4-8004-6bdbf306161e | |
| 79e8d43f-3f17-484f-966b-681557a19dd9 | |
| ffba67b6-5ad5-437d-be71-23f2786b9364 | |
| 4ce36ae6-b5d0-4108-ab6f-bf08ea6ba50c | |
| 3690e91b-282d-4a40-82bb-f7b4dad7bf32 | |
| 0764293a-95ae-492f-b08e-9873450e417f | |
| 7f16a22d-2194-4656-bce3-ad6d39c7bee3 | |
| c7fb4444-f669-490a-898a-ddec20d02318 | |
| 70e1c32f-0aaa-404d-a70e-4a19931174fb | |
| 9a03a1ee-5cce-4cfd-b479-096067484b13 | |
| a8934418-5626-4bf4-9525-7924d3413dc3 | |
| 7ad6154a-5191-4ef0-a72e-717b62c5fd1f | |
| 204abc19-ff7d-4495-8969-2faa431a5efa | |
| 6adfceb3-d625-4036-bb3a-ef9db5f761bb | |
| 9aa34ed8-98ed-49bc-b8e6-8f98c7574d50 | |
| 82817c32-6dfa-4622-aae3-71712fa2159b | |
| 13958016-250b-4c60-9b31-5c85df365cb6 | |
| aa560c1c-58d7-4e63-95b7-4a0045962dbc | |
| e20c2660-d291-46fd-94a9-d9b554f2c305 | |
| cf63816f-3fe3-4b61-a6cd-7d640bb02372 | |
| e84f5e8b-4fff-4a22-bf3b-371ebb956156 | |
| 73659b92-5fc2-4a73-8aeb-572ab492abea | |
| ac9ab878-9f27-48e9-b2d5-1b212c85a8d4 | |
| 70de5e15-2cbc-4371-9f08-fa3f46339254 | |
| 11ae324f-8c43-4f65-bf68-761de7097099 | |
| 0d5d7635-0c6f-48f9-beb7-509806f783ce | |
| f84ca27e-cc70-4557-a9a0-39df493b5807 | |
| c065e5f6-0a59-417a-8a7a-df1deb769559 | |
| 8a9d1668-a5c2-440f-820d-bf792ce3cd8b | |
| de6450f6-9e92-48fe-8080-640cdffc4e00 | |
| 8e31e1b7-fe5a-4aa3-abba-4ad42ddaf9f6 | |
| eb124d81-c0b5-4ddb-ad42-42163374d037 | |
| 69b1f442-0b43-4b70-af47-585a2a1383bf | |
| ea64fc33-32be-46eb-b211-8cd5e8b142a2 | |
| 7c670953-bd32-4a1b-9fc8-e35b8d26229b | |
| a47120e1-0c4f-419b-babe-ed8e0c89fb0f | |
| 7fba0c62-3bf5-4db8-8e8c-63cc15227d19 | |
| a0e00c7e-c35e-45db-9864-35588b89193f | |
| 6b0ff409-a3c9-45d3-9900-853e34d5dccb | |
| ff288628-ac15-4039-8819-e69dbe4596cd | |
| 19d63f42-05e3-479a-8292-caccd26d32c4 | |
| af386af3-373a-4f6e-a093-a3d15b6afe77 | |
| 14c78d26-5080-40d3-b1f6-3461e75f6598 | |
| 95d85c9b-b7de-4b01-8c39-03102fa3248f | |
| ec03d767-8286-42de-b481-f8105c5ad298 | |
| c22b61f2-ccae-4cd9-babd-650aabd86c59 | |
| 01d931b2-5bcb-4b5d-ad4d-d441329fb79b | |
| ce38a1b8-6b6c-4aae-886b-9c04e528527f | |
| a3f8a853-c8fd-4d35-a2c6-b15ca1cb3ea5 | |
| 56ec6fb2-29e5-4148-af7c-5a9b38f4d407 | |
| 702a39b8-ae30-4d54-97c0-50158d2ab848 | |
| dff637d4-0862-4034-b552-a118ec57290a | |
| d05aee2a-e8c2-4a00-8929-7dba26464339 | |
| 939c6a18-e606-4af8-ab88-01e4e25664de | |
| 4a532921-e886-4f61-a2b6-46c0b0cbbde3 | |
| 6317de3a-95b0-4ac3-83a5-2d70d445b0cc | |
| 0334372b-7af8-46b4-84db-3977f41520ba | |
| 0370b4af-ffcd-43b6-a852-7207fa1a992d | |
| 9b272724-c624-4972-91b8-54fade919640 | |
| cfd1b906-e977-4706-8a7a-183992ffe025 | |
| 63cbb310-dddd-4c97-8f92-d8e5056b8550 | |
| 61ba0fe6-cbb8-47f1-a020-4719807d7992 | |
| 9251e484-c76f-4c33-b331-c9b88b6e7f4e | |
| b6c7c47f-bebd-4efa-ae25-a1dc5cd99f30 | |
| 1870396b-c4ba-4d83-9ecd-aabecc8ed203 | |
| af11ed20-9f70-49c6-93c9-c3dc5066f90f | |
| 2c14d303-8f1e-4663-8e56-96299bd06bc8 | |
| e6f8c638-07cb-4d30-b6f1-66f950e74c92 | |
| ec8ac7da-e090-40c0-a93c-cef10f96b6d6 | |
| 7d0225ac-03c6-43dc-9e2e-b6203f40cd7c | |
| c6b5c8a8-4339-420b-a643-79e1487a5d9e | |
| e5c4411b-1294-475f-9d4a-2434e7ac14c4 | |
| 867f37cb-a7a2-4caa-89d3-95557b58d8a4 | |
| 532813ac-037f-4c2d-ad55-a16f24564157 | |
| 9e66794d-43b1-41a1-97ef-42b1bb2a01b3 | |
| 7678c113-56b0-4c5e-b14b-67b05b9b38a2 | |
| ce7c6a54-2d7c-4c02-8721-2c875d1fd062 | |
| 49c6a88d-4913-4351-a2d3-4090e512819b | |
| 9e02d2c8-89f9-4721-b504-f29fd44d878a | |
| f3d19980-ea20-4c2d-88ed-3b4712222998 | |
| 3ba69f36-df6b-4e52-98ae-a652df403c4f | |
| 82743f14-26bb-4019-85d1-3ef5edc90454 | |
| 05d10d13-69e8-438d-b65c-7cfbdafaca17 | |
| 7eefc24b-673a-4b45-89d7-444f12846c93 | |
| 0199dd38-de6b-4be3-80f7-cf4f170ef2f2 | |
| 271494db-8a44-4f0f-9c2e-2dede59e03bf | |
| 54dcf802-6d3a-431e-b958-bfc8af7afb30 | |
| bdf48a65-3cad-4b3c-92f1-94c977486d98 | |
| b7336c2f-7aca-4a88-bfc5-4d188a6add83 | |
| 010542f1-2767-4d7e-9969-79216a8d799d | |
| 18e644ee-6ca2-40bf-8b41-ca68f94c5fcb | |
| 9ef629ac-c190-45e7-bf69-b83a213b356c | |
| 313f22a9-afa8-4464-87c5-abcb5b2f32d7 | |
| 49a17069-dfdc-4c9a-b84a-2079d04d833e | |
| fdc64c2b-d233-4817-8792-d1caaf2c591a | |
| 741a6203-ac89-4061-9799-a51c5e4cfc49 | |
| 8fba001c-59da-4b63-8a3d-6d8f56c26e0a | |
| 87719779-8355-43ce-9c78-746b060f0ca2 | |
| 68fec1e9-fbb1-44fd-abaf-5659a5464fa6 | |
| ab26e325-9c7d-4aeb-88c0-898647896ef8 | |
| 4d0f4d96-4c79-47a3-a8af-6481b463f5a6 | |
| 85db8889-ffe8-4c1c-a879-83faf1e878e8 | |
| 4114acf2-e508-420c-a868-26ab9aae2250 | |
| 229b5d6f-4542-40f2-817a-d8218b073967 | |
| 4ef0dd76-fb7a-4340-b44d-5347178df527 | |
| 4e223d73-1f32-4a0b-b607-cea8e407c0d5 | |
| 42bed8d9-4833-4c3c-8e43-50ec2ef6bdf2 | |
| 99e18f68-4e84-48c2-991b-282992f51570 | |
| """ | |
| def main(): | |
| args = get_args() | |
| output_dir = Path(args.output_dir) | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| # finished | |
| finished = set() | |
| for filename in output_dir.glob("*.wav"): | |
| name = filename.stem | |
| finished.add(name) | |
| print(f"finished count: {len(finished)}") | |
| splits = call_id_str.strip().split("\n") | |
| for call_id in splits: | |
| call_id = str(call_id).strip() | |
| record_url_early_media = f"https://record-prod.obs.la-south-2.myhuaweicloud.com/audio_corpus/callbot/es-MX/20251201/{call_id}_early_media.wav" | |
| record_url_active_media = f"https://record-prod.obs.la-south-2.myhuaweicloud.com/audio_corpus/callbot/es-MX/20251201/{call_id}_active_media.wav" | |
| for media_type, record_url in [("early_media", record_url_early_media), ("active_media", record_url_active_media)]: | |
| name = f"{media_type}_{call_id}" | |
| if name in finished: | |
| continue | |
| try: | |
| print(f"record_url: {record_url}") | |
| resp = requests.get( | |
| url=record_url, | |
| ) | |
| except (TimeoutError, requests.exceptions.ConnectionError): | |
| print(f"record_url timeout: {record_url}") | |
| continue | |
| except Exception as e: | |
| print(e) | |
| continue | |
| if resp.status_code == 404: | |
| print(f"record_url not found: {record_url}") | |
| continue | |
| if resp.status_code != 200: | |
| raise AssertionError("status_code: {}; text: {}".format(resp.status_code, resp.text)) | |
| filename = output_dir / f"{name}.wav" | |
| with open(filename.as_posix(), "wb") as f: | |
| f.write(resp.content) | |
| return | |
| if __name__ == "__main__": | |
| main() | |