File size: 3,426 Bytes
66bfb7e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse
from datetime import datetime
from pathlib import Path

import pandas as pd
import requests
from tqdm import tqdm

from project_settings import project_path


def get_args():
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "--excel_file_dir",
        default=(project_path / "examples/download_wav").as_posix(),
        type=str
    )
    parser.add_argument(
        "--start_date",
        default="2022-04-10 00:00:00",
        type=str
    )
    parser.add_argument(
        "--end_date",
        default="2026-04-21 00:00:00",
        type=str
    )
    parser.add_argument(
        "--output_dir",
        default=(project_path / "data/calling/63/wav_2ch").as_posix(),
        type=str
    )
    args = parser.parse_args()
    return args


excel_file_str = """
record_1110.csv
record_1104.csv
"""


def main():
    args = get_args()

    format_str = "%Y-%m-%d %H:%M:%S"

    start_date = datetime.strptime(args.start_date, format_str)
    end_date = datetime.strptime(args.end_date, format_str)

    excel_file_dir = Path(args.excel_file_dir)
    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    print(f"start_date: {start_date}")
    print(f"end_date: {end_date}")

    # finished
    finished = set()
    for filename in output_dir.glob("*.wav"):
        call_id = filename.stem
        finished.add(call_id)

    splits = excel_file_str.split("\n")
    for row in splits:
        name = str(row).strip()
        if len(name) == 0:
            continue
        excel_file = excel_file_dir / name

        # df = pd.read_excel(excel_file.as_posix())
        df = pd.read_csv(excel_file.as_posix())
        for i, row in tqdm(df.iterrows()):
            call_date = "2025-10-12 00:00:00"
            record_url = row["record_file"]
            call_id = Path(record_url).stem
            record_name = Path(record_url).name

            # call_date = row["Attempt time"]
            # call_id = row["Call ID"]
            # record_url = row["Recording file"]
            if pd.isna(record_url):
                continue

            if call_id in finished:
                continue
            finished.add(call_id)

            call_date = datetime.strptime(str(call_date), format_str)

            if not start_date < call_date < end_date:
                continue

            call_date_str = call_date.strftime("%Y%m%d")
            # record_url = f"https://phl-01.obs.ap-southeast-3.myhuaweicloud.com/{call_date_str}/21964/{call_id}.wav"
            # record_url = f"https://nxai-hk-1259196162.cos.ap-hongkong.myqcloud.com/{call_date_str}/3101/{call_id}.wav"
            # print(record_url)
            try:
                resp = requests.get(
                    url=record_url,
                )
            except (TimeoutError, requests.exceptions.ConnectionError):
                continue
            except Exception as e:
                print(e)
                continue

            if resp.status_code == 404:
                continue
            if resp.status_code != 200:
                raise AssertionError("status_code: {}; text: {}".format(resp.status_code, resp.text))

            filename = output_dir / f"{record_name}"
            with open(filename.as_posix(), "wb") as f:
                f.write(resp.content)

    return


if __name__ == "__main__":
    main()