#!/usr/bin/python3 # -*- coding: utf-8 -*- import argparse from datetime import datetime from pathlib import Path import pandas as pd import requests from tqdm import tqdm from project_settings import project_path def get_args(): parser = argparse.ArgumentParser() parser.add_argument( "--excel_file_dir", default=(project_path / "examples/download_wav").as_posix(), type=str ) parser.add_argument( "--start_date", default="2022-04-10 00:00:00", type=str ) parser.add_argument( "--end_date", default="2026-04-21 00:00:00", type=str ) parser.add_argument( "--output_dir", default=(project_path / "data/calling/63/wav_2ch").as_posix(), type=str ) args = parser.parse_args() return args excel_file_str = """ record_1110.csv record_1104.csv """ def main(): args = get_args() format_str = "%Y-%m-%d %H:%M:%S" start_date = datetime.strptime(args.start_date, format_str) end_date = datetime.strptime(args.end_date, format_str) excel_file_dir = Path(args.excel_file_dir) output_dir = Path(args.output_dir) output_dir.mkdir(parents=True, exist_ok=True) print(f"start_date: {start_date}") print(f"end_date: {end_date}") # finished finished = set() for filename in output_dir.glob("*.wav"): call_id = filename.stem finished.add(call_id) splits = excel_file_str.split("\n") for row in splits: name = str(row).strip() if len(name) == 0: continue excel_file = excel_file_dir / name # df = pd.read_excel(excel_file.as_posix()) df = pd.read_csv(excel_file.as_posix()) for i, row in tqdm(df.iterrows()): call_date = "2025-10-12 00:00:00" record_url = row["record_file"] call_id = Path(record_url).stem record_name = Path(record_url).name # call_date = row["Attempt time"] # call_id = row["Call ID"] # record_url = row["Recording file"] if pd.isna(record_url): continue if call_id in finished: continue finished.add(call_id) call_date = datetime.strptime(str(call_date), format_str) if not start_date < call_date < end_date: continue call_date_str = call_date.strftime("%Y%m%d") # record_url = f"https://phl-01.obs.ap-southeast-3.myhuaweicloud.com/{call_date_str}/21964/{call_id}.wav" # record_url = f"https://nxai-hk-1259196162.cos.ap-hongkong.myqcloud.com/{call_date_str}/3101/{call_id}.wav" # print(record_url) try: resp = requests.get( url=record_url, ) except (TimeoutError, requests.exceptions.ConnectionError): continue except Exception as e: print(e) continue if resp.status_code == 404: continue if resp.status_code != 200: raise AssertionError("status_code: {}; text: {}".format(resp.status_code, resp.text)) filename = output_dir / f"{record_name}" with open(filename.as_posix(), "wb") as f: f.write(resp.content) return if __name__ == "__main__": main()