HoneyTian commited on
Commit
85abe14
·
1 Parent(s): 164db19
examples/download_wav/step_1_download_wav.py CHANGED
@@ -16,7 +16,7 @@ def get_args():
16
 
17
  parser.add_argument(
18
  "--output_dir",
19
- default=(project_path / "temp/").as_posix(),
20
  type=str
21
  )
22
  args = parser.parse_args()
@@ -24,27 +24,120 @@ def get_args():
24
 
25
 
26
  call_id_str = """
 
 
 
 
 
 
 
27
  79e8d43f-3f17-484f-966b-681557a19dd9
 
 
 
 
 
 
 
 
28
  a8934418-5626-4bf4-9525-7924d3413dc3
 
29
  204abc19-ff7d-4495-8969-2faa431a5efa
 
 
30
  82817c32-6dfa-4622-aae3-71712fa2159b
 
31
  aa560c1c-58d7-4e63-95b7-4a0045962dbc
 
32
  cf63816f-3fe3-4b61-a6cd-7d640bb02372
 
 
 
33
  70de5e15-2cbc-4371-9f08-fa3f46339254
 
 
 
 
 
 
 
 
 
34
  ea64fc33-32be-46eb-b211-8cd5e8b142a2
 
 
 
35
  a0e00c7e-c35e-45db-9864-35588b89193f
 
 
 
 
 
36
  95d85c9b-b7de-4b01-8c39-03102fa3248f
 
 
 
37
  ce38a1b8-6b6c-4aae-886b-9c04e528527f
 
 
 
 
 
38
  939c6a18-e606-4af8-ab88-01e4e25664de
 
39
  6317de3a-95b0-4ac3-83a5-2d70d445b0cc
 
 
 
40
  cfd1b906-e977-4706-8a7a-183992ffe025
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  49c6a88d-4913-4351-a2d3-4090e512819b
 
 
 
 
 
 
 
42
  271494db-8a44-4f0f-9c2e-2dede59e03bf
 
43
  bdf48a65-3cad-4b3c-92f1-94c977486d98
 
 
 
44
  9ef629ac-c190-45e7-bf69-b83a213b356c
45
  313f22a9-afa8-4464-87c5-abcb5b2f32d7
 
 
 
 
46
  87719779-8355-43ce-9c78-746b060f0ca2
 
 
 
 
 
47
  229b5d6f-4542-40f2-817a-d8218b073967
 
 
 
 
48
  """
49
 
50
 
@@ -57,36 +150,43 @@ def main():
57
  # finished
58
  finished = set()
59
  for filename in output_dir.glob("*.wav"):
60
- call_id = filename.stem
61
- finished.add(call_id)
 
62
 
63
  splits = call_id_str.strip().split("\n")
64
  for call_id in splits:
65
  call_id = str(call_id).strip()
66
- record_url = f"https://record-prod.obs.la-south-2.myhuaweicloud.com/audio_corpus/callbot/es-MX/20251201/{call_id}_active_media.wav"
67
-
68
- if call_id in finished:
69
- continue
70
-
71
- try:
72
- print(f"record_url: {record_url}")
73
- resp = requests.get(
74
- url=record_url,
75
- )
76
- except (TimeoutError, requests.exceptions.ConnectionError):
77
- continue
78
- except Exception as e:
79
- print(e)
80
- continue
81
-
82
- if resp.status_code == 404:
83
- continue
84
- if resp.status_code != 200:
85
- raise AssertionError("status_code: {}; text: {}".format(resp.status_code, resp.text))
86
-
87
- filename = output_dir / f"{call_id}.wav"
88
- with open(filename.as_posix(), "wb") as f:
89
- f.write(resp.content)
 
 
 
 
 
 
90
 
91
  return
92
 
 
16
 
17
  parser.add_argument(
18
  "--output_dir",
19
+ default=(project_path / "data/make_analysis_excel/download_wav/20251204/").as_posix(),
20
  type=str
21
  )
22
  args = parser.parse_args()
 
24
 
25
 
26
  call_id_str = """
27
+ c6840978-207e-43e0-b62e-4ad1a4269917
28
+ a6961fd4-19a2-4403-ac8a-8c1e2ebe714b
29
+ 921a94e3-292a-4092-b532-a25aac195ed5
30
+ 898d85e2-9d5a-450c-b1bd-e02b88d3f703
31
+ 20a52fe5-b4f9-47b9-9af5-5670bbc77ec2
32
+ e4a3e15c-97e0-4cf5-858a-8cbd728d71e6
33
+ 59cc7e56-f956-49f4-8004-6bdbf306161e
34
  79e8d43f-3f17-484f-966b-681557a19dd9
35
+ ffba67b6-5ad5-437d-be71-23f2786b9364
36
+ 4ce36ae6-b5d0-4108-ab6f-bf08ea6ba50c
37
+ 3690e91b-282d-4a40-82bb-f7b4dad7bf32
38
+ 0764293a-95ae-492f-b08e-9873450e417f
39
+ 7f16a22d-2194-4656-bce3-ad6d39c7bee3
40
+ c7fb4444-f669-490a-898a-ddec20d02318
41
+ 70e1c32f-0aaa-404d-a70e-4a19931174fb
42
+ 9a03a1ee-5cce-4cfd-b479-096067484b13
43
  a8934418-5626-4bf4-9525-7924d3413dc3
44
+ 7ad6154a-5191-4ef0-a72e-717b62c5fd1f
45
  204abc19-ff7d-4495-8969-2faa431a5efa
46
+ 6adfceb3-d625-4036-bb3a-ef9db5f761bb
47
+ 9aa34ed8-98ed-49bc-b8e6-8f98c7574d50
48
  82817c32-6dfa-4622-aae3-71712fa2159b
49
+ 13958016-250b-4c60-9b31-5c85df365cb6
50
  aa560c1c-58d7-4e63-95b7-4a0045962dbc
51
+ e20c2660-d291-46fd-94a9-d9b554f2c305
52
  cf63816f-3fe3-4b61-a6cd-7d640bb02372
53
+ e84f5e8b-4fff-4a22-bf3b-371ebb956156
54
+ 73659b92-5fc2-4a73-8aeb-572ab492abea
55
+ ac9ab878-9f27-48e9-b2d5-1b212c85a8d4
56
  70de5e15-2cbc-4371-9f08-fa3f46339254
57
+ 11ae324f-8c43-4f65-bf68-761de7097099
58
+ 0d5d7635-0c6f-48f9-beb7-509806f783ce
59
+ f84ca27e-cc70-4557-a9a0-39df493b5807
60
+ c065e5f6-0a59-417a-8a7a-df1deb769559
61
+ 8a9d1668-a5c2-440f-820d-bf792ce3cd8b
62
+ de6450f6-9e92-48fe-8080-640cdffc4e00
63
+ 8e31e1b7-fe5a-4aa3-abba-4ad42ddaf9f6
64
+ eb124d81-c0b5-4ddb-ad42-42163374d037
65
+ 69b1f442-0b43-4b70-af47-585a2a1383bf
66
  ea64fc33-32be-46eb-b211-8cd5e8b142a2
67
+ 7c670953-bd32-4a1b-9fc8-e35b8d26229b
68
+ a47120e1-0c4f-419b-babe-ed8e0c89fb0f
69
+ 7fba0c62-3bf5-4db8-8e8c-63cc15227d19
70
  a0e00c7e-c35e-45db-9864-35588b89193f
71
+ 6b0ff409-a3c9-45d3-9900-853e34d5dccb
72
+ ff288628-ac15-4039-8819-e69dbe4596cd
73
+ 19d63f42-05e3-479a-8292-caccd26d32c4
74
+ af386af3-373a-4f6e-a093-a3d15b6afe77
75
+ 14c78d26-5080-40d3-b1f6-3461e75f6598
76
  95d85c9b-b7de-4b01-8c39-03102fa3248f
77
+ ec03d767-8286-42de-b481-f8105c5ad298
78
+ c22b61f2-ccae-4cd9-babd-650aabd86c59
79
+ 01d931b2-5bcb-4b5d-ad4d-d441329fb79b
80
  ce38a1b8-6b6c-4aae-886b-9c04e528527f
81
+ a3f8a853-c8fd-4d35-a2c6-b15ca1cb3ea5
82
+ 56ec6fb2-29e5-4148-af7c-5a9b38f4d407
83
+ 702a39b8-ae30-4d54-97c0-50158d2ab848
84
+ dff637d4-0862-4034-b552-a118ec57290a
85
+ d05aee2a-e8c2-4a00-8929-7dba26464339
86
  939c6a18-e606-4af8-ab88-01e4e25664de
87
+ 4a532921-e886-4f61-a2b6-46c0b0cbbde3
88
  6317de3a-95b0-4ac3-83a5-2d70d445b0cc
89
+ 0334372b-7af8-46b4-84db-3977f41520ba
90
+ 0370b4af-ffcd-43b6-a852-7207fa1a992d
91
+ 9b272724-c624-4972-91b8-54fade919640
92
  cfd1b906-e977-4706-8a7a-183992ffe025
93
+ 63cbb310-dddd-4c97-8f92-d8e5056b8550
94
+ 61ba0fe6-cbb8-47f1-a020-4719807d7992
95
+ 9251e484-c76f-4c33-b331-c9b88b6e7f4e
96
+ b6c7c47f-bebd-4efa-ae25-a1dc5cd99f30
97
+ 1870396b-c4ba-4d83-9ecd-aabecc8ed203
98
+ af11ed20-9f70-49c6-93c9-c3dc5066f90f
99
+ 2c14d303-8f1e-4663-8e56-96299bd06bc8
100
+ e6f8c638-07cb-4d30-b6f1-66f950e74c92
101
+ ec8ac7da-e090-40c0-a93c-cef10f96b6d6
102
+ 7d0225ac-03c6-43dc-9e2e-b6203f40cd7c
103
+ c6b5c8a8-4339-420b-a643-79e1487a5d9e
104
+ e5c4411b-1294-475f-9d4a-2434e7ac14c4
105
+ 867f37cb-a7a2-4caa-89d3-95557b58d8a4
106
+ 532813ac-037f-4c2d-ad55-a16f24564157
107
+ 9e66794d-43b1-41a1-97ef-42b1bb2a01b3
108
+ 7678c113-56b0-4c5e-b14b-67b05b9b38a2
109
+ ce7c6a54-2d7c-4c02-8721-2c875d1fd062
110
  49c6a88d-4913-4351-a2d3-4090e512819b
111
+ 9e02d2c8-89f9-4721-b504-f29fd44d878a
112
+ f3d19980-ea20-4c2d-88ed-3b4712222998
113
+ 3ba69f36-df6b-4e52-98ae-a652df403c4f
114
+ 82743f14-26bb-4019-85d1-3ef5edc90454
115
+ 05d10d13-69e8-438d-b65c-7cfbdafaca17
116
+ 7eefc24b-673a-4b45-89d7-444f12846c93
117
+ 0199dd38-de6b-4be3-80f7-cf4f170ef2f2
118
  271494db-8a44-4f0f-9c2e-2dede59e03bf
119
+ 54dcf802-6d3a-431e-b958-bfc8af7afb30
120
  bdf48a65-3cad-4b3c-92f1-94c977486d98
121
+ b7336c2f-7aca-4a88-bfc5-4d188a6add83
122
+ 010542f1-2767-4d7e-9969-79216a8d799d
123
+ 18e644ee-6ca2-40bf-8b41-ca68f94c5fcb
124
  9ef629ac-c190-45e7-bf69-b83a213b356c
125
  313f22a9-afa8-4464-87c5-abcb5b2f32d7
126
+ 49a17069-dfdc-4c9a-b84a-2079d04d833e
127
+ fdc64c2b-d233-4817-8792-d1caaf2c591a
128
+ 741a6203-ac89-4061-9799-a51c5e4cfc49
129
+ 8fba001c-59da-4b63-8a3d-6d8f56c26e0a
130
  87719779-8355-43ce-9c78-746b060f0ca2
131
+ 68fec1e9-fbb1-44fd-abaf-5659a5464fa6
132
+ ab26e325-9c7d-4aeb-88c0-898647896ef8
133
+ 4d0f4d96-4c79-47a3-a8af-6481b463f5a6
134
+ 85db8889-ffe8-4c1c-a879-83faf1e878e8
135
+ 4114acf2-e508-420c-a868-26ab9aae2250
136
  229b5d6f-4542-40f2-817a-d8218b073967
137
+ 4ef0dd76-fb7a-4340-b44d-5347178df527
138
+ 4e223d73-1f32-4a0b-b607-cea8e407c0d5
139
+ 42bed8d9-4833-4c3c-8e43-50ec2ef6bdf2
140
+ 99e18f68-4e84-48c2-991b-282992f51570
141
  """
142
 
143
 
 
150
  # finished
151
  finished = set()
152
  for filename in output_dir.glob("*.wav"):
153
+ name = filename.stem
154
+ finished.add(name)
155
+ print(f"finished count: {len(finished)}")
156
 
157
  splits = call_id_str.strip().split("\n")
158
  for call_id in splits:
159
  call_id = str(call_id).strip()
160
+
161
+ record_url_early_media = f"https://record-prod.obs.la-south-2.myhuaweicloud.com/audio_corpus/callbot/es-MX/20251201/{call_id}_early_media.wav"
162
+ record_url_active_media = f"https://record-prod.obs.la-south-2.myhuaweicloud.com/audio_corpus/callbot/es-MX/20251201/{call_id}_active_media.wav"
163
+
164
+ for media_type, record_url in [("early_media", record_url_early_media), ("active_media", record_url_active_media)]:
165
+ name = f"{media_type}_{call_id}"
166
+ if name in finished:
167
+ continue
168
+
169
+ try:
170
+ print(f"record_url: {record_url}")
171
+ resp = requests.get(
172
+ url=record_url,
173
+ )
174
+ except (TimeoutError, requests.exceptions.ConnectionError):
175
+ print(f"record_url timeout: {record_url}")
176
+ continue
177
+ except Exception as e:
178
+ print(e)
179
+ continue
180
+
181
+ if resp.status_code == 404:
182
+ print(f"record_url not found: {record_url}")
183
+ continue
184
+ if resp.status_code != 200:
185
+ raise AssertionError("status_code: {}; text: {}".format(resp.status_code, resp.text))
186
+
187
+ filename = output_dir / f"{name}.wav"
188
+ with open(filename.as_posix(), "wb") as f:
189
+ f.write(resp.content)
190
 
191
  return
192
 
examples/download_wav/step_3_split_two_second_wav.py CHANGED
@@ -16,24 +16,19 @@ def get_args():
16
  parser.add_argument(
17
  "--audio_dir",
18
  # default=(project_path / "data/calling/63/wav_1ch").as_posix(),
19
- # default=(project_path / "data/calling/358/wav_1ch/finished/voicemail_annotation").as_posix(),
20
- # default=(project_path / "data/calling/358/wav_1ch/finished/voicemail_annotation").as_posix(),
21
- # default=r"D:\Users\tianx\HuggingSpaces\template_match_asr\data\wav\early_media\60\music",
22
- # default=r"D:\Users\tianx\HuggingDatasets\calling_analysis\data\analysis\es-MX-2\bell_and_machine_voice",
23
- default=r"D:\Users\tianx\HuggingSpaces\cc_audio_8\temp",
24
  type=str
25
  )
26
  parser.add_argument(
27
  "--output_dir",
28
- # default=(project_path / "data/calling/63/wav_segmented").as_posix(),
29
- # default=r"D:\Users\tianx\HuggingSpaces\template_match_asr\data\wav\early_media\60\music\wav_segmented",
30
- # default=r"D:\Users\tianx\HuggingDatasets\calling_analysis\data\analysis\es-MX-2\bell_and_machine_voice\wav_segmented",
31
- default=r"D:\Users\tianx\HuggingSpaces\cc_audio_8\temp\wav_segmented",
32
  type=str
33
  )
34
  parser.add_argument(
35
  "--first_n_seconds",
36
- default=8,
37
  type=int
38
  )
39
  args = parser.parse_args()
@@ -72,7 +67,8 @@ def main():
72
 
73
  # to_filename = output_dir / f"active_media_r_{call_id}_{language}_{scene_id}_{begin}_from_mp3.wav"
74
  # to_filename = output_dir / f"active_media_r_{call_id}_{language}_{scene_id}_{begin}.wav"
75
- to_filename = output_dir / f"active_media_r_{filename.stem}_{begin}.wav"
 
76
  wavfile.write(
77
  to_filename.as_posix(),
78
  sample_rate,
 
16
  parser.add_argument(
17
  "--audio_dir",
18
  # default=(project_path / "data/calling/63/wav_1ch").as_posix(),
19
+ default=r"D:\Users\tianx\HuggingSpaces\template_match_asr\data\wav\early_media\63\bell",
20
+ # default=r"D:\Users\tianx\HuggingSpaces\cc_audio_8\temp",
 
 
 
21
  type=str
22
  )
23
  parser.add_argument(
24
  "--output_dir",
25
+ default=r"D:\Users\tianx\HuggingSpaces\template_match_asr\data\wav\early_media\63\bell\wav_segmented",
26
+ # default=r"D:\Users\tianx\HuggingSpaces\cc_audio_8\temp\wav_segmented",
 
 
27
  type=str
28
  )
29
  parser.add_argument(
30
  "--first_n_seconds",
31
+ default=100,
32
  type=int
33
  )
34
  args = parser.parse_args()
 
67
 
68
  # to_filename = output_dir / f"active_media_r_{call_id}_{language}_{scene_id}_{begin}_from_mp3.wav"
69
  # to_filename = output_dir / f"active_media_r_{call_id}_{language}_{scene_id}_{begin}.wav"
70
+ # to_filename = output_dir / f"active_media_r_{filename.stem}_{begin}.wav"
71
+ to_filename = output_dir / f"{filename.stem}_{begin}.wav"
72
  wavfile.write(
73
  to_filename.as_posix(),
74
  sample_rate,
examples/nx_spider_transferred_bad_case_filter/step_1_bad_case_filter.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import argparse
4
+ import json
5
+ import os
6
+ from pathlib import Path
7
+ import shutil
8
+ import logging
9
+ import tempfile
10
+ from urllib.parse import urlparse
11
+
12
+ import requests
13
+ from gradio_client import Client, handle_file
14
+ from tqdm import tqdm
15
+ from tenacity import before_sleep_log, retry, retry_if_exception_type, stop_after_attempt, wait_fixed
16
+
17
+ import log
18
+ from project_settings import project_path, log_directory, temp_directory
19
+
20
+ log.setup_size_rotating(log_directory=log_directory, tz_info="Asia/Shanghai")
21
+
22
+
23
+ logger = logging.getLogger("main")
24
+
25
+
26
+ def get_args():
27
+ parser = argparse.ArgumentParser()
28
+ parser.add_argument(
29
+ "--task_file",
30
+ default=r"D:\Users\tianx\intelli-zen\nx_spider\data\transferred_bad_case_filter\transferred_2025-11-20_2025-11-26.jsonl",
31
+ type=str
32
+ )
33
+ parser.add_argument(
34
+ "--output_dir",
35
+ default=(project_path / "data/nx_spider_transferred_bad_case_filter/20251203").as_posix(),
36
+ type=str
37
+ )
38
+ parser.add_argument(
39
+ "--output_file",
40
+ default=(project_path / "data/nx_spider_transferred_bad_case_filter/20251203/bad_case.jsonl").as_posix(),
41
+ type=str
42
+ )
43
+ args = parser.parse_args()
44
+ return args
45
+
46
+
47
+ def retry_call(fn, *args, **kwargs):
48
+ @retry(
49
+ wait=wait_fixed(10),
50
+ stop=stop_after_attempt(3),
51
+ before_sleep=before_sleep_log(logger, logging.ERROR),
52
+ )
53
+ def wrapped():
54
+ return fn(*args, **kwargs)
55
+ return wrapped()
56
+
57
+
58
+ @retry(
59
+ wait=wait_fixed(10),
60
+ stop=stop_after_attempt(3),
61
+ before_sleep=before_sleep_log(logger, logging.ERROR),
62
+ )
63
+ def download_wav_by_url(url: str, output_dir: str):
64
+ p = urlparse(url)
65
+ path = Path(p.path)
66
+ filename = os.path.join(output_dir, path.name)
67
+
68
+ resp = requests.get(
69
+ url=url,
70
+ )
71
+ if resp.status_code == 404:
72
+ return None, "not found; 404;"
73
+
74
+ with open(filename, "wb") as f:
75
+ f.write(resp.content)
76
+ return filename, "success"
77
+
78
+
79
+ def main():
80
+ args = get_args()
81
+
82
+ task_file = Path(args.task_file)
83
+ output_dir = Path(args.output_dir)
84
+ output_dir.mkdir(parents=True, exist_ok=True)
85
+ output_file = Path(args.output_file)
86
+ output_file.parent.mkdir(parents=True, exist_ok=True)
87
+
88
+ client = Client("http://127.0.0.1:7864/")
89
+ model_name = f"voicemail-es-mx-2-l3-ch4-cnn"
90
+ labels = client.predict(
91
+ model_name=model_name,
92
+ api_name="/partial"
93
+ )
94
+ target_label = labels["value"]
95
+
96
+ # finished
97
+ finished = set()
98
+ if output_file.exists():
99
+ with open(output_file.as_posix(), "r", encoding="utf-8") as f:
100
+ for row in f:
101
+ row = json.loads(row)
102
+ call_id = row["call_id"]
103
+ finished.add(call_id)
104
+ logger.info(f"finished count: {len(finished)}")
105
+
106
+ with open(task_file.as_posix(), "r", encoding="utf-8") as fin, open(output_file.as_posix(), "a+", encoding="utf-8") as fout:
107
+ for row in fin:
108
+ row = json.loads(row)
109
+ call_id = row["call_id"]
110
+ call_elapsed = row["call_elapsed"]
111
+ early_media_record_url = row["early_media_record_url"]
112
+ active_media_record_url = row["active_media_record_url"]
113
+ if call_id in finished:
114
+ continue
115
+ if call_elapsed < 45:
116
+ continue
117
+
118
+ logger.info(f"process call_id: {call_id}; url: {active_media_record_url}")
119
+
120
+ tmp, message = download_wav_by_url(active_media_record_url, temp_directory.as_posix())
121
+ if tmp is None:
122
+ logger.error(f"download wav failed; url: {active_media_record_url}, message: {message}")
123
+ continue
124
+
125
+ max_duration = min(call_elapsed // 2, 15)
126
+
127
+ try:
128
+ outputs = retry_call(
129
+ client.predict,
130
+ audio_t=handle_file(tmp),
131
+ model_name=model_name,
132
+ # target_label=target_label,
133
+ target_label="voice",
134
+ win_size=2,
135
+ win_step=2,
136
+ max_duration=max_duration,
137
+ api_name="/when_click_event_button"
138
+ )
139
+ except Exception as error:
140
+ continue
141
+ outputs = json.loads(outputs)
142
+ row_ = {
143
+ "call_id": call_id,
144
+ "call_elapsed": call_elapsed,
145
+ "early_media_record_url": early_media_record_url,
146
+ "active_media_record_url": active_media_record_url,
147
+ "labels": outputs,
148
+ }
149
+ row_ = json.dumps(row_, ensure_ascii=False)
150
+ fout.write(f"{row_}\n")
151
+ fout.flush()
152
+
153
+ if len(outputs) == 0:
154
+ continue
155
+ labels = [row["label"] for row in outputs]
156
+ if any([label in ("voicemail",) for label in labels]):
157
+ shutil.copy(
158
+ tmp,
159
+ output_dir.as_posix()
160
+ )
161
+
162
+ return
163
+
164
+
165
+ if __name__ == "__main__":
166
+ main()
examples/online_model_test/step_1_predict.py DELETED
@@ -1,218 +0,0 @@
1
- #!/usr/bin/python3
2
- # -*- coding: utf-8 -*-
3
- import argparse
4
- import glob
5
- import json
6
- import os
7
- from pathlib import Path
8
- from tqdm import tqdm
9
-
10
- import librosa
11
- import numpy as np
12
- import onnxruntime as ort
13
- import pandas as pd
14
- import torch
15
- import torchaudio
16
-
17
-
18
- def get_args():
19
- parser = argparse.ArgumentParser()
20
- parser.add_argument(
21
- "--audio_dir",
22
- default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\audio_lib_hkg_1\th-TH\th-TH\early_media_no_voice",
23
- type=str,
24
- )
25
- parser.add_argument("--onnx_model_file", default="models/th-TH.onnx", type=str)
26
- parser.add_argument("--target_duration", default=8.0, type=float)
27
-
28
- parser.add_argument("--output_file", default="th-TH_predict.xlsx", type=str)
29
-
30
- args = parser.parse_args()
31
- return args
32
-
33
-
34
- class OnlineModelConfig(object):
35
- def __init__(self,
36
- sample_rate: int = 8000,
37
- n_fft: int = 1024,
38
- hop_size: int = 512,
39
- n_mels: int = 80,
40
- f_min: float = 10.0,
41
- f_max: float = 3800.0,
42
- ):
43
- self.sample_rate = sample_rate
44
- self.n_fft = n_fft
45
- self.hop_size = hop_size
46
- self.n_mels = n_mels
47
- self.f_min = f_min
48
- self.f_max = f_max
49
-
50
-
51
- class OnlineModelInference(object):
52
- def __init__(self,
53
- model_path: str,
54
- ):
55
- self.model_path = model_path
56
-
57
- providers = [
58
- "CUDAExecutionProvider", "CPUExecutionProvider"
59
- ] if torch.cuda.is_available() else [
60
- "CPUExecutionProvider"
61
- ]
62
- self.session = ort.InferenceSession(self.model_path, providers=providers)
63
-
64
- self.config = OnlineModelConfig()
65
-
66
- self.mel_transform = torchaudio.transforms.MelSpectrogram(
67
- sample_rate=self.config.sample_rate,
68
- n_fft=self.config.n_fft,
69
- hop_length=self.config.hop_size,
70
- n_mels=self.config.n_mels,
71
- f_min=self.config.f_min,
72
- f_max=self.config.f_max,
73
- window_fn=torch.hamming_window
74
- )
75
-
76
- def predict_by_ndarray(self,
77
- sub_signal: np.ndarray,
78
- h: np.ndarray = None,
79
- c: np.ndarray = None,
80
- ):
81
- # sub_signal, shape: [num_samples,]
82
- sub_signal = torch.tensor(sub_signal, dtype=torch.float32)
83
-
84
- sub_signal = sub_signal.unsqueeze(0)
85
- # sub_signal, shape: [1, num_samples]
86
- mel_spec = self.mel_transform.forward(sub_signal)
87
- # mel_spec, shape: [1, n_mels, n_frames]
88
- mel_spec = torch.transpose(mel_spec, dim0=1, dim1=2)
89
- # mel_spec, shape: [1, n_frames, n_mels]
90
-
91
- h = torch.tensor(h) if h is not None else None
92
- c = torch.tensor(c) if h is not None else None
93
- label, prob, h, c = self.predict_by_mel_spec(mel_spec, h=h, c=c)
94
- # h, c: torch.Tensor
95
- h = h.numpy()
96
- c = c.numpy()
97
- return label, prob, h, c
98
-
99
- def predict_by_mel_spec(self,
100
- mel_spec: torch.Tensor,
101
- h: torch.Tensor = None,
102
- c: torch.Tensor = None,
103
- ):
104
- # mel_spec, shape: [1, n_frames, n_mels]
105
-
106
- if h is None:
107
- h = np.zeros((3, 1, 64), dtype=np.float32) # 3层LSTM,批次大小1,隐藏大小64
108
- else:
109
- h = h.numpy()
110
- if c is None:
111
- c = np.zeros((3, 1, 64), dtype=np.float32) # 3层LSTM,批次大小1,隐藏大小64
112
- else:
113
- c = c.numpy()
114
-
115
- mel_spec_np = mel_spec.numpy()
116
- outputs = self.session.run(
117
- input_feed={
118
- "input": mel_spec_np,
119
- "h": h,
120
- "c": c
121
- },
122
- output_names=[
123
- "output", "h_out", "c_out"
124
- ],
125
- )
126
- logits, h, c = outputs
127
- # logits, np.ndarray, shape: [b, num_labels]
128
- # h, c: np.ndarray
129
- h = torch.tensor(h)
130
- c = torch.tensor(c)
131
-
132
- probs = torch.softmax(torch.tensor(logits), dim=1)
133
- max_prob, predicted_label_index = torch.max(probs, dim=1)
134
-
135
- label = self.get_label_by_index(predicted_label_index.item())
136
- prob = max_prob.item()
137
- return label, prob, h, c
138
-
139
- @staticmethod
140
- def get_label_by_index(index: int):
141
- label_map = {
142
- 0: "voice",
143
- 1: "voicemail",
144
- 2: "mute",
145
- 3: "noise"
146
- }
147
- result = label_map[index]
148
- return result
149
-
150
-
151
- def main():
152
- args = get_args()
153
-
154
- audio_dir = Path(args.audio_dir)
155
-
156
- model = OnlineModelInference(model_path=args.onnx_model_file)
157
-
158
- result = list()
159
- for filename in tqdm(audio_dir.glob("**/active_media_r_*.wav")):
160
- splits = filename.stem.split("_")
161
- call_id = splits[3]
162
- language = splits[4]
163
- scene_id = splits[5]
164
-
165
- signal, sample_rate = librosa.load(filename.as_posix(), sr=8000)
166
- duration = librosa.get_duration(y=signal, sr=sample_rate)
167
- signal_length = len(signal)
168
- if signal_length == 0:
169
- continue
170
-
171
- target_duration = args.target_duration * sample_rate
172
- target_duration = int(target_duration)
173
-
174
- predict_result = list()
175
- h = None
176
- c = None
177
- for begin in range(0, target_duration, sample_rate*2):
178
- end = begin + sample_rate*2
179
- sub_signal = signal[begin: end]
180
- if len(sub_signal) < 0.5 * sample_rate:
181
- break
182
- label, prob, h, c = model.predict_by_ndarray(sub_signal, h=h, c=c)
183
- predict_result.append({
184
- "label": label,
185
- "prob": prob,
186
- })
187
- if len(predict_result) == 0:
188
- continue
189
- label_list = [p["label"] for p in predict_result]
190
- predict_result_ = json.dumps(predict_result, ensure_ascii=False, indent=4)
191
- label2 = predict_result[0]["label"]
192
- prob2 = predict_result[0]["prob"]
193
-
194
- ground_truth_ = "voicemail" if any([l == "voicemail" for l in label_list]) else "else"
195
- flag = 1 if label2 == "voicemail" else 0
196
-
197
- row = {
198
- "call_id": call_id,
199
- "language": language,
200
- "scene_id": scene_id,
201
- "filename": filename.as_posix(),
202
- "duration": duration,
203
- "predict_result": predict_result_,
204
- "label2": label2,
205
- "prob2": prob2,
206
- "ground_truth_": ground_truth_,
207
- "flag": flag,
208
- }
209
- result.append(row)
210
-
211
- result = pd.DataFrame(result)
212
- result.to_excel(args.output_file, index=False)
213
-
214
- return
215
-
216
-
217
- if __name__ == "__main__":
218
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/online_model_test/step_2_audio_filter.py DELETED
@@ -1,47 +0,0 @@
1
- #!/usr/bin/python3
2
- # -*- coding: utf-8 -*-
3
- import argparse
4
- from pathlib import Path
5
- import shutil
6
-
7
- import pandas as pd
8
-
9
-
10
- def get_args():
11
- parser = argparse.ArgumentParser()
12
-
13
- parser.add_argument("--predict_file", default="th-TH_predict.xlsx", type=str)
14
- parser.add_argument(
15
- "--output_dir",
16
- default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\audio_lib_hkg_1\th-TH\th-TH\early_media_no_voice\bad_case",
17
- type=str,
18
- )
19
- args = parser.parse_args()
20
- return args
21
-
22
-
23
- def main():
24
- args = get_args()
25
-
26
- output_dir = Path(args.output_dir)
27
- output_dir.mkdir(parents=True, exist_ok=True)
28
-
29
- df = pd.read_excel(args.predict_file)
30
- for i, row in df.iterrows():
31
- filename = row["filename"]
32
- ground_truth_ = row["ground_truth_"]
33
- flag = row["flag"]
34
-
35
- if flag == 1:
36
- continue
37
- if ground_truth_ == "voicemail":
38
- shutil.copy(
39
- filename,
40
- output_dir.as_posix()
41
- )
42
-
43
- return
44
-
45
-
46
- if __name__ == "__main__":
47
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/online_model_test/step_3_make_test.py DELETED
@@ -1,75 +0,0 @@
1
- #!/usr/bin/python3
2
- # -*- coding: utf-8 -*-
3
- import argparse
4
- from pathlib import Path
5
- import shutil
6
-
7
- from gradio_client import Client, handle_file
8
- import librosa
9
- import pandas as pd
10
- from tqdm import tqdm
11
-
12
-
13
- def get_args():
14
- parser = argparse.ArgumentParser()
15
-
16
- parser.add_argument(
17
- "--src_dir",
18
- default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\calling\63\voicemail",
19
- type=str,
20
- )
21
- parser.add_argument(
22
- "--tgt_dir",
23
- default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\voice_test_examples\63\96",
24
- type=str,
25
- )
26
- parser.add_argument(
27
- "--early_media_file",
28
- default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\voice_test_examples\886\97\early_media_ba95fafd-8e2f-488f-8e5a-9bada95e24fb.wav",
29
- type=str,
30
- )
31
- args = parser.parse_args()
32
- return args
33
-
34
-
35
- def main():
36
- args = get_args()
37
-
38
- src_dir = Path(args.src_dir)
39
- tgt_dir = Path(args.tgt_dir)
40
- tgt_dir.mkdir(parents=True, exist_ok=True)
41
-
42
- client = Client("http://10.75.27.247:7861/")
43
-
44
- for filename in tqdm(src_dir.glob("*.wav")):
45
- splits = filename.stem.split("_")
46
- call_id = splits[3]
47
-
48
- filename_ = filename.as_posix()
49
- y, sr = librosa.load(filename_)
50
- duration = librosa.get_duration(y=y, sr=sr)
51
- if duration < 20:
52
- filename_, _ = client.predict(
53
- audio_t=handle_file(filename_),
54
- pad_seconds=20,
55
- pad_mode="repeat",
56
- api_name="/when_click_pad_audio"
57
- )
58
-
59
- active_media_file = tgt_dir / f"active_media_{call_id}.wav"
60
- early_media_file = tgt_dir / f"early_media_{call_id}.wav"
61
-
62
- shutil.copy(
63
- filename_,
64
- active_media_file.as_posix(),
65
- )
66
- shutil.copy(
67
- args.early_media_file,
68
- early_media_file.as_posix(),
69
- )
70
-
71
- return
72
-
73
-
74
- if __name__ == "__main__":
75
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/online_model_test/test.py DELETED
@@ -1,84 +0,0 @@
1
- #!/usr/bin/python3
2
- # -*- coding: utf-8 -*-
3
- import argparse
4
- from collections import defaultdict
5
- from pathlib import Path
6
- import shutil
7
-
8
- from gradio_client import Client, handle_file
9
- import librosa
10
- import pandas as pd
11
- from tqdm import tqdm
12
-
13
-
14
- def get_args():
15
- parser = argparse.ArgumentParser()
16
- parser.add_argument(
17
- "--finished_dir",
18
- default=r"D:\Users\tianx\HuggingSpaces\cc_audio_8\data\calling\66\wav_1ch",
19
- type=str,
20
- )
21
- parser.add_argument(
22
- "--src_dir",
23
- default=r"D:/Users/tianx/HuggingDatasets/international_voice/data/sea-idn/audio_lib_hkg_1/audio_lib_hkg_1/th-TH/th-TH/",
24
- type=str,
25
- )
26
- parser.add_argument(
27
- "--tgt_dir",
28
- default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\audio_lib_hkg_1\th-TH\bad_case",
29
- type=str,
30
- )
31
- args = parser.parse_args()
32
- return args
33
-
34
-
35
- def main():
36
- args = get_args()
37
-
38
- finished_dir = Path(args.finished_dir)
39
- src_dir = Path(args.src_dir)
40
- tgt_dir = Path(args.tgt_dir)
41
- tgt_dir.mkdir(parents=True, exist_ok=True)
42
-
43
- # finished
44
- finished = set()
45
- for filename in finished_dir.glob("*.wav"):
46
- splits = filename.stem.split("_")
47
- call_id = splits[3]
48
- if call_id in ("27521940-feef-4bfa-ba55-b1f00a10c64d",):
49
- print(f"call_id: {call_id}")
50
-
51
- finished.add(call_id)
52
- print(f"finished count: {len(finished)}")
53
-
54
- # call_id_to_wav_file_list
55
- call_id_to_wav_file_list = defaultdict(list)
56
- for filename in src_dir.glob("**/*.wav"):
57
- splits = filename.stem.split("_")
58
- call_id = splits[3]
59
- language = splits[4]
60
- scene_id = splits[5]
61
- if call_id in ("27521940-feef-4bfa-ba55-b1f00a10c64d",):
62
- print(f"call_id: {call_id}")
63
-
64
- call_id_to_wav_file_list[call_id].append(filename.as_posix())
65
- print(f"src count: {len(call_id_to_wav_file_list)}")
66
-
67
- for filename in tqdm(src_dir.glob("**/active_media_r_*.wav")):
68
- splits = filename.stem.split("_")
69
- call_id = splits[3]
70
- if call_id in ("27521940-feef-4bfa-ba55-b1f00a10c64d",):
71
- print(f"call_id: {call_id}")
72
-
73
- if call_id in finished:
74
- wav_file_list = call_id_to_wav_file_list[call_id]
75
- for wav_file in wav_file_list:
76
- shutil.move(
77
- wav_file,
78
- tgt_dir.as_posix(),
79
- )
80
- return
81
-
82
-
83
- if __name__ == "__main__":
84
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/sample_filter/bad_case_find.py CHANGED
@@ -16,28 +16,28 @@ def get_args():
16
  parser.add_argument(
17
  "--data_dir",
18
  # default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\es-MX-backup",
19
- default=r"D:\Users\tianx\HuggingSpaces\wav_segmented",
20
  # default=(project_path / "data/calling/63/wav_segmented"),
21
  type=str
22
  )
23
  parser.add_argument(
24
  "--keep_dir1",
25
  # default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\es-MX-backup\keep1",
26
- default=r"D:\Users\tianx\HuggingSpaces\wav_segmented\keep1",
27
  # default=(project_path / "data/calling/63/wav_segmented/keep1"),
28
  type=str
29
  )
30
  parser.add_argument(
31
  "--keep_dir2",
32
  # default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\es-MX-backup\keep2",
33
- default=r"D:\Users\tianx\HuggingSpaces\wav_segmented\keep2",
34
  # default=(project_path / "data/calling/63/wav_segmented/keep2"),
35
  type=str
36
  )
37
  parser.add_argument(
38
  "--trash_dir",
39
  # default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\es-MX-backup\trash",
40
- default=r"D:\Users\tianx\HuggingSpaces\wav_segmented\trash",
41
  # default=(project_path / "data/calling/63/wav_segmented/trash"),
42
  type=str
43
  )
@@ -64,19 +64,22 @@ def main():
64
  # continue
65
  filename = filename.as_posix()
66
 
67
- outputs1 = client.predict(
68
- audio_t=handle_file(filename),
69
- model_name="voicemail-en-ph-2-ch4-cnn",
70
- ground_true="Hello!!",
71
- api_name="/when_click_cls_button"
72
- )
 
 
 
73
  outputs1 = json.loads(outputs1)
74
  label1 = outputs1["label"]
75
  prob1 = outputs1["prob"]
76
 
77
  outputs2 = client.predict(
78
  audio_t=handle_file(filename),
79
- model_name="sound-8-ch32-cnn",
80
  ground_true="Hello!!",
81
  api_name="/when_click_cls_button"
82
  )
 
16
  parser.add_argument(
17
  "--data_dir",
18
  # default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\es-MX-backup",
19
+ default=r"D:\Users\tianx\HuggingSpaces\cc_audio_8\data\temp",
20
  # default=(project_path / "data/calling/63/wav_segmented"),
21
  type=str
22
  )
23
  parser.add_argument(
24
  "--keep_dir1",
25
  # default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\es-MX-backup\keep1",
26
+ default=r"D:\Users\tianx\HuggingSpaces\cc_audio_8\data\temp\keep1",
27
  # default=(project_path / "data/calling/63/wav_segmented/keep1"),
28
  type=str
29
  )
30
  parser.add_argument(
31
  "--keep_dir2",
32
  # default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\es-MX-backup\keep2",
33
+ default=r"D:\Users\tianx\HuggingSpaces\cc_audio_8\data\temp\keep2",
34
  # default=(project_path / "data/calling/63/wav_segmented/keep2"),
35
  type=str
36
  )
37
  parser.add_argument(
38
  "--trash_dir",
39
  # default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\es-MX-backup\trash",
40
+ default=r"D:\Users\tianx\HuggingSpaces\cc_audio_8\data\temp\trash",
41
  # default=(project_path / "data/calling/63/wav_segmented/trash"),
42
  type=str
43
  )
 
64
  # continue
65
  filename = filename.as_posix()
66
 
67
+ try:
68
+ outputs1 = client.predict(
69
+ audio_t=handle_file(filename),
70
+ model_name="voicemail-en-ph-2-l3-ch4-cnn",
71
+ ground_true="Hello!!",
72
+ api_name="/when_click_cls_button"
73
+ )
74
+ except Exception:
75
+ continue
76
  outputs1 = json.loads(outputs1)
77
  label1 = outputs1["label"]
78
  prob1 = outputs1["prob"]
79
 
80
  outputs2 = client.predict(
81
  audio_t=handle_file(filename),
82
+ model_name="sound-8-l3-ch32-cnn",
83
  ground_true="Hello!!",
84
  api_name="/when_click_cls_button"
85
  )
examples/sound_classification_by_lstm/run_batch.sh CHANGED
@@ -3,25 +3,25 @@
3
 
4
  # sound ch64
5
 
6
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-l3-ch64-lstm \
7
- --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
8
- --label_plan 2 \
9
- --config_file "yaml/lstm-classifier-2-l3-ch64.yaml"
10
-
11
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-l3-ch64-lstm \
12
- --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
13
- --label_plan 3 \
14
- --config_file "yaml/lstm-classifier-3-l3-ch64.yaml"
15
-
16
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-l3-ch64-lstm \
17
- --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
18
- --label_plan 4 \
19
- --config_file "yaml/lstm-classifier-4-l3-ch64.yaml"
20
-
21
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-l3-ch64-lstm \
22
- --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
23
- --label_plan 8 \
24
- --config_file "yaml/lstm-classifier-8-l3-ch64.yaml"
25
 
26
 
27
  # pretrained voicemail
@@ -34,23 +34,23 @@ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name fi
34
 
35
  # voicemail ch64
36
 
37
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-ph-2-l3-ch64-lstm \
38
- --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-PH/wav_finished/*/*.wav" \
39
- --label_plan 2-voicemail \
40
- --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
41
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
42
-
43
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-sg-2-l3-ch64-lstm \
44
- --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-SG/wav_finished/*/*.wav" \
45
- --label_plan 2-voicemail \
46
- --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
47
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
48
-
49
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-us-2-l3-ch64-lstm \
50
- --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-US/wav_finished/*/*.wav" \
51
- --label_plan 2-voicemail \
52
- --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
53
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
54
 
55
  sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-mx-2-l3-ch64-lstm \
56
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-MX/wav_finished/*/*.wav" \
@@ -58,57 +58,57 @@ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name fi
58
  --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
59
  --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
60
 
61
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-pe-2-l3-ch64-lstm \
62
- --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-PE/wav_finished/*/*.wav" \
63
- --label_plan 2-voicemail \
64
- --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
65
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
66
-
67
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-fi-fi-2-l3-ch64-lstm \
68
- --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/fi-FI/wav_finished/*/*.wav" \
69
- --label_plan 2-voicemail \
70
- --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
71
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
72
-
73
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-id-id-2-l3-ch64-lstm \
74
- --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/id-ID/wav_finished/*/*.wav" \
75
- --label_plan 2-voicemail \
76
- --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
77
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
78
-
79
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ja-jp-2-l3-ch64-lstm \
80
- --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ja-JP/wav_finished/*/*.wav" \
81
- --label_plan 2-voicemail \
82
- --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
83
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
84
-
85
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ko-kr-2-l3-ch64-lstm \
86
- --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ko-KR/wav_finished/*/*.wav" \
87
- --label_plan 2-voicemail \
88
- --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
89
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
90
-
91
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-l3-ch64-lstm \
92
- --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
93
- --label_plan 2-voicemail \
94
- --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
95
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
96
-
97
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-pt-br-2-l3-ch64-lstm \
98
- --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/pt-BR/wav_finished/*/*.wav" \
99
- --label_plan 2-voicemail \
100
- --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
101
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
102
-
103
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-th-th-2-l3-ch64-lstm \
104
- --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/th-TH/wav_finished/*/*.wav" \
105
- --label_plan 2-voicemail \
106
- --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
107
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
108
-
109
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-zh-tw-2-l3-ch64-lstm \
110
- --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/zh-TW/wav_finished/*/*.wav" \
111
- --label_plan 2-voicemail \
112
- --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
113
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
114
 
 
3
 
4
  # sound ch64
5
 
6
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-l3-ch64-lstm \
7
+ #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
8
+ #--label_plan 2 \
9
+ #--config_file "yaml/lstm-classifier-2-l3-ch64.yaml"
10
+ #
11
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-l3-ch64-lstm \
12
+ #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
13
+ #--label_plan 3 \
14
+ #--config_file "yaml/lstm-classifier-3-l3-ch64.yaml"
15
+ #
16
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-l3-ch64-lstm \
17
+ #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
18
+ #--label_plan 4 \
19
+ #--config_file "yaml/lstm-classifier-4-l3-ch64.yaml"
20
+ #
21
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-l3-ch64-lstm \
22
+ #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
23
+ #--label_plan 8 \
24
+ #--config_file "yaml/lstm-classifier-8-l3-ch64.yaml"
25
 
26
 
27
  # pretrained voicemail
 
34
 
35
  # voicemail ch64
36
 
37
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-ph-2-l3-ch64-lstm \
38
+ #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-PH/wav_finished/*/*.wav" \
39
+ #--label_plan 2-voicemail \
40
+ #--config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
41
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
42
+ #
43
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-sg-2-l3-ch64-lstm \
44
+ #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-SG/wav_finished/*/*.wav" \
45
+ #--label_plan 2-voicemail \
46
+ #--config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
47
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
48
+ #
49
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-us-2-l3-ch64-lstm \
50
+ #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-US/wav_finished/*/*.wav" \
51
+ #--label_plan 2-voicemail \
52
+ #--config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
53
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
54
 
55
  sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-mx-2-l3-ch64-lstm \
56
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-MX/wav_finished/*/*.wav" \
 
58
  --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
59
  --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
60
 
61
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-pe-2-l3-ch64-lstm \
62
+ #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-PE/wav_finished/*/*.wav" \
63
+ #--label_plan 2-voicemail \
64
+ #--config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
65
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
66
+ #
67
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-fi-fi-2-l3-ch64-lstm \
68
+ #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/fi-FI/wav_finished/*/*.wav" \
69
+ #--label_plan 2-voicemail \
70
+ #--config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
71
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
72
+ #
73
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-id-id-2-l3-ch64-lstm \
74
+ #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/id-ID/wav_finished/*/*.wav" \
75
+ #--label_plan 2-voicemail \
76
+ #--config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
77
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
78
+ #
79
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ja-jp-2-l3-ch64-lstm \
80
+ #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ja-JP/wav_finished/*/*.wav" \
81
+ #--label_plan 2-voicemail \
82
+ #--config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
83
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
84
+ #
85
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ko-kr-2-l3-ch64-lstm \
86
+ #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ko-KR/wav_finished/*/*.wav" \
87
+ #--label_plan 2-voicemail \
88
+ #--config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
89
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
90
+ #
91
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-l3-ch64-lstm \
92
+ #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
93
+ #--label_plan 2-voicemail \
94
+ #--config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
95
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
96
+ #
97
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-pt-br-2-l3-ch64-lstm \
98
+ #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/pt-BR/wav_finished/*/*.wav" \
99
+ #--label_plan 2-voicemail \
100
+ #--config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
101
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
102
+ #
103
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-th-th-2-l3-ch64-lstm \
104
+ #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/th-TH/wav_finished/*/*.wav" \
105
+ #--label_plan 2-voicemail \
106
+ #--config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
107
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
108
+ #
109
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-zh-tw-2-l3-ch64-lstm \
110
+ #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/zh-TW/wav_finished/*/*.wav" \
111
+ #--label_plan 2-voicemail \
112
+ #--config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
113
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
114
 
examples/sound_classification_by_lstm/step_9_evaluation_onnx_model.py CHANGED
@@ -93,6 +93,12 @@ def main():
93
  filename = row["filename"]
94
  ground_true = row["labels"]
95
 
 
 
 
 
 
 
96
  # infer
97
  sample_rate, waveform = wavfile.read(filename)
98
  waveform = waveform[:16000]
@@ -101,56 +107,56 @@ def main():
101
  waveform = torch.unsqueeze(waveform, dim=0)
102
  waveform = waveform.to(device)
103
 
104
- spec = wave_to_mel_spectrogram(waveform) + 1e-6
105
- spec = spec.log()
106
- # shape = [b, f, t]
107
- spec = spec.transpose(1, 2)
108
- # shape = [b, t, f]
109
- inputs = spec
110
-
111
- lstm_layer_param = config.lstm_layer_param
112
- num_layers = lstm_layer_param["num_layers"]
113
- hidden_size = lstm_layer_param["hidden_size"]
114
- h = torch.zeros(size=(num_layers, 1, hidden_size), dtype=torch.float32)
115
- c = torch.zeros(size=(num_layers, 1, hidden_size), dtype=torch.float32)
116
-
117
- input_feed = {
118
- "inputs": inputs.numpy(),
119
- "h": h.numpy(),
120
- "c": c.numpy(),
121
- }
122
- output_names = [
123
- "logits", "new_h", "new_c"
124
- ]
125
- logits, new_h, new_c = ort_session.run(output_names, input_feed)
126
- # print(f"logits: {logits.shape}")
127
- # print(f"new_h: {new_h.shape}")
128
- # print(f"new_c: {new_c.shape}")
129
-
130
- logits = torch.tensor(logits, dtype=torch.float32)
131
- probs = torch.nn.functional.softmax(logits, dim=-1)
132
- label_idx = torch.argmax(probs, dim=-1)
133
-
134
- label_idx = label_idx.cpu()
135
- probs = probs.cpu()
136
-
137
- label_idx = label_idx.numpy()[0]
138
- prob = probs.numpy()[0][label_idx]
139
-
140
- label_str = vocabulary.get_token_from_index(label_idx, namespace="labels")
141
-
142
- correct = 1 if label_str == ground_true else 0
143
-
144
  row_ = {
145
  **row,
146
- "onnx_predict": label_str,
147
- "onnx_prob": prob,
148
- "onnx_correct": correct,
149
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  result.append(row_)
151
 
152
  total_examples += 1
153
- total_correct += correct
154
  accuracy = total_correct / total_examples
155
 
156
  progress_bar.update(1)
 
93
  filename = row["filename"]
94
  ground_true = row["labels"]
95
 
96
+ lstm_layer_param = config.lstm_layer_param
97
+ num_layers = lstm_layer_param["num_layers"]
98
+ hidden_size = lstm_layer_param["hidden_size"]
99
+ h = torch.zeros(size=(num_layers, 1, hidden_size), dtype=torch.float32)
100
+ c = torch.zeros(size=(num_layers, 1, hidden_size), dtype=torch.float32)
101
+
102
  # infer
103
  sample_rate, waveform = wavfile.read(filename)
104
  waveform = waveform[:16000]
 
107
  waveform = torch.unsqueeze(waveform, dim=0)
108
  waveform = waveform.to(device)
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  row_ = {
111
  **row,
 
 
 
112
  }
113
+ for idx in range(4):
114
+ begin = idx * 4000
115
+ end = begin + 4000
116
+
117
+ waveform_ = waveform[:, begin: end]
118
+ spec = wave_to_mel_spectrogram(waveform_) + 1e-6
119
+ spec = spec.log()
120
+ # shape = [b, f, t]
121
+ spec = spec.transpose(1, 2)
122
+ # shape = [b, t, f]
123
+ inputs = spec
124
+
125
+ input_feed = {
126
+ "inputs": inputs.numpy(),
127
+ "h": h.numpy(),
128
+ "c": c.numpy(),
129
+ }
130
+ output_names = [
131
+ "logits", "new_h", "new_c"
132
+ ]
133
+ logits, new_h, new_c = ort_session.run(output_names, input_feed)
134
+ # print(f"logits: {logits.shape}")
135
+ # print(f"new_h: {new_h.shape}")
136
+ # print(f"new_c: {new_c.shape}")
137
+
138
+ logits = torch.tensor(logits, dtype=torch.float32)
139
+ probs = torch.nn.functional.softmax(logits, dim=-1)
140
+ label_idx = torch.argmax(probs, dim=-1)
141
+
142
+ label_idx = label_idx.cpu()
143
+ probs = probs.cpu()
144
+
145
+ label_idx = label_idx.numpy()[0]
146
+ prob = probs.numpy()[0][label_idx]
147
+
148
+ label_str = vocabulary.get_token_from_index(label_idx, namespace="labels")
149
+
150
+ correct = 1 if label_str == ground_true else 0
151
+
152
+ row_[f"onnx_predict_{idx}"] = label_str
153
+ row_[f"onnx_prob_{idx}"] = prob
154
+ row_[f"onnx_correct_{idx}"] = correct
155
+
156
  result.append(row_)
157
 
158
  total_examples += 1
159
+ total_correct += row_["onnx_correct_3"]
160
  accuracy = total_correct / total_examples
161
 
162
  progress_bar.update(1)
log.py ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ from datetime import datetime
4
+ import logging
5
+ from logging.handlers import RotatingFileHandler, TimedRotatingFileHandler
6
+ import os
7
+ from zoneinfo import ZoneInfo # Python 3.9+ 自带,无需安装
8
+
9
+
10
+ def get_converter(tz_info: str = "Asia/Shanghai"):
11
+ def converter(timestamp):
12
+ dt = datetime.fromtimestamp(timestamp, ZoneInfo(tz_info))
13
+ result = dt.timetuple()
14
+ return result
15
+ return converter
16
+
17
+
18
+ def setup_stream(tz_info: str = "Asia/Shanghai"):
19
+ fmt = "%(asctime)s|%(name)s|%(levelname)s|%(filename)s|%(lineno)d|%(message)s"
20
+
21
+ formatter = logging.Formatter(
22
+ fmt=fmt,
23
+ datefmt="%Y-%m-%d %H:%M:%S %z"
24
+ )
25
+ formatter.converter = get_converter(tz_info)
26
+
27
+ stream_handler = logging.StreamHandler()
28
+ stream_handler.setLevel(logging.INFO)
29
+ stream_handler.setFormatter(formatter)
30
+
31
+ # main
32
+ main_logger = logging.getLogger("main")
33
+ main_logger.addHandler(stream_handler)
34
+
35
+ # http
36
+ http_logger = logging.getLogger("http")
37
+ http_logger.addHandler(stream_handler)
38
+
39
+ # api
40
+ api_logger = logging.getLogger("api")
41
+ api_logger.addHandler(stream_handler)
42
+
43
+ logging.basicConfig(
44
+ level=logging.DEBUG,
45
+ datefmt="%a, %d %b %Y %H:%M:%S",
46
+ handlers=[
47
+
48
+ ]
49
+ )
50
+ return
51
+
52
+
53
+ def setup_size_rotating(log_directory: str, tz_info: str = "Asia/Shanghai"):
54
+ fmt = "%(asctime)s|%(name)s|%(levelname)s|%(filename)s|%(lineno)d|%(message)s"
55
+
56
+ formatter = logging.Formatter(
57
+ fmt=fmt,
58
+ datefmt="%Y-%m-%d %H:%M:%S %z"
59
+ )
60
+ formatter.converter = get_converter(tz_info)
61
+
62
+ stream_handler = logging.StreamHandler()
63
+ stream_handler.setLevel(logging.INFO)
64
+ stream_handler.setFormatter(formatter)
65
+
66
+ # main
67
+ main_logger = logging.getLogger("main")
68
+ main_logger.addHandler(stream_handler)
69
+ main_info_file_handler = RotatingFileHandler(
70
+ filename=os.path.join(log_directory, "main.log"),
71
+ maxBytes=100*1024*1024, # 100MB
72
+ encoding="utf-8",
73
+ backupCount=2,
74
+ )
75
+ main_info_file_handler.setLevel(logging.INFO)
76
+ main_info_file_handler.setFormatter(formatter)
77
+ main_logger.addHandler(main_info_file_handler)
78
+
79
+ # http
80
+ http_logger = logging.getLogger("http")
81
+ http_logger.addHandler(stream_handler)
82
+ http_file_handler = RotatingFileHandler(
83
+ filename=os.path.join(log_directory, "http.log"),
84
+ maxBytes=100*1024*1024, # 100MB
85
+ encoding="utf-8",
86
+ backupCount=2,
87
+ )
88
+ http_file_handler.setLevel(logging.DEBUG)
89
+ http_file_handler.setFormatter(formatter)
90
+ http_logger.addHandler(http_file_handler)
91
+
92
+ # api
93
+ api_logger = logging.getLogger("api")
94
+ api_logger.addHandler(stream_handler)
95
+ api_file_handler = RotatingFileHandler(
96
+ filename=os.path.join(log_directory, "api.log"),
97
+ maxBytes=10*1024*1024, # 10MB
98
+ encoding="utf-8",
99
+ backupCount=2,
100
+ )
101
+ api_file_handler.setLevel(logging.DEBUG)
102
+ api_file_handler.setFormatter(formatter)
103
+ api_logger.addHandler(api_file_handler)
104
+
105
+ # alarm
106
+ alarm_logger = logging.getLogger("alarm")
107
+ alarm_file_handler = RotatingFileHandler(
108
+ filename=os.path.join(log_directory, "alarm.log"),
109
+ maxBytes=1*1024*1024, # 1MB
110
+ encoding="utf-8",
111
+ backupCount=2,
112
+ )
113
+ alarm_file_handler.setLevel(logging.DEBUG)
114
+ alarm_file_handler.setFormatter(formatter)
115
+ alarm_logger.addHandler(alarm_file_handler)
116
+
117
+ debug_file_handler = RotatingFileHandler(
118
+ filename=os.path.join(log_directory, "debug.log"),
119
+ maxBytes=1*1024*1024, # 1MB
120
+ encoding="utf-8",
121
+ backupCount=2,
122
+ )
123
+ debug_file_handler.setLevel(logging.DEBUG)
124
+ debug_file_handler.setFormatter(formatter)
125
+
126
+ info_file_handler = RotatingFileHandler(
127
+ filename=os.path.join(log_directory, "info.log"),
128
+ maxBytes=1*1024*1024, # 1MB
129
+ encoding="utf-8",
130
+ backupCount=2,
131
+ )
132
+ info_file_handler.setLevel(logging.INFO)
133
+ info_file_handler.setFormatter(formatter)
134
+
135
+ error_file_handler = RotatingFileHandler(
136
+ filename=os.path.join(log_directory, "error.log"),
137
+ maxBytes=1*1024*1024, # 1MB
138
+ encoding="utf-8",
139
+ backupCount=2,
140
+ )
141
+ error_file_handler.setLevel(logging.ERROR)
142
+ error_file_handler.setFormatter(formatter)
143
+
144
+ logging.basicConfig(
145
+ level=logging.DEBUG,
146
+ datefmt="%a, %d %b %Y %H:%M:%S",
147
+ handlers=[
148
+ debug_file_handler,
149
+ info_file_handler,
150
+ error_file_handler,
151
+ ]
152
+ )
153
+
154
+
155
+ def setup_time_rotating(log_directory: str):
156
+ fmt = "%(asctime)s - %(name)s - %(levelname)s %(filename)s:%(lineno)d > %(message)s"
157
+
158
+ stream_handler = logging.StreamHandler()
159
+ stream_handler.setLevel(logging.INFO)
160
+ stream_handler.setFormatter(logging.Formatter(fmt))
161
+
162
+ # main
163
+ main_logger = logging.getLogger("main")
164
+ main_logger.addHandler(stream_handler)
165
+ main_info_file_handler = TimedRotatingFileHandler(
166
+ filename=os.path.join(log_directory, "main.log"),
167
+ encoding="utf-8",
168
+ when="midnight",
169
+ interval=1,
170
+ backupCount=7
171
+ )
172
+ main_info_file_handler.setLevel(logging.INFO)
173
+ main_info_file_handler.setFormatter(logging.Formatter(fmt))
174
+ main_logger.addHandler(main_info_file_handler)
175
+
176
+ # http
177
+ http_logger = logging.getLogger("http")
178
+ http_file_handler = TimedRotatingFileHandler(
179
+ filename=os.path.join(log_directory, "http.log"),
180
+ encoding='utf-8',
181
+ when="midnight",
182
+ interval=1,
183
+ backupCount=7
184
+ )
185
+ http_file_handler.setLevel(logging.DEBUG)
186
+ http_file_handler.setFormatter(logging.Formatter(fmt))
187
+ http_logger.addHandler(http_file_handler)
188
+
189
+ # api
190
+ api_logger = logging.getLogger("api")
191
+ api_file_handler = TimedRotatingFileHandler(
192
+ filename=os.path.join(log_directory, "api.log"),
193
+ encoding='utf-8',
194
+ when="midnight",
195
+ interval=1,
196
+ backupCount=7
197
+ )
198
+ api_file_handler.setLevel(logging.DEBUG)
199
+ api_file_handler.setFormatter(logging.Formatter(fmt))
200
+ api_logger.addHandler(api_file_handler)
201
+
202
+ # alarm
203
+ alarm_logger = logging.getLogger("alarm")
204
+ alarm_file_handler = TimedRotatingFileHandler(
205
+ filename=os.path.join(log_directory, "alarm.log"),
206
+ encoding="utf-8",
207
+ when="midnight",
208
+ interval=1,
209
+ backupCount=7
210
+ )
211
+ alarm_file_handler.setLevel(logging.DEBUG)
212
+ alarm_file_handler.setFormatter(logging.Formatter(fmt))
213
+ alarm_logger.addHandler(alarm_file_handler)
214
+
215
+ debug_file_handler = TimedRotatingFileHandler(
216
+ filename=os.path.join(log_directory, "debug.log"),
217
+ encoding="utf-8",
218
+ when="D",
219
+ interval=1,
220
+ backupCount=7
221
+ )
222
+ debug_file_handler.setLevel(logging.DEBUG)
223
+ debug_file_handler.setFormatter(logging.Formatter(fmt))
224
+
225
+ info_file_handler = TimedRotatingFileHandler(
226
+ filename=os.path.join(log_directory, "info.log"),
227
+ encoding="utf-8",
228
+ when="D",
229
+ interval=1,
230
+ backupCount=7
231
+ )
232
+ info_file_handler.setLevel(logging.INFO)
233
+ info_file_handler.setFormatter(logging.Formatter(fmt))
234
+
235
+ error_file_handler = TimedRotatingFileHandler(
236
+ filename=os.path.join(log_directory, "error.log"),
237
+ encoding="utf-8",
238
+ when="D",
239
+ interval=1,
240
+ backupCount=7
241
+ )
242
+ error_file_handler.setLevel(logging.ERROR)
243
+ error_file_handler.setFormatter(logging.Formatter(fmt))
244
+
245
+ logging.basicConfig(
246
+ level=logging.DEBUG,
247
+ datefmt="%a, %d %b %Y %H:%M:%S",
248
+ handlers=[
249
+ debug_file_handler,
250
+ info_file_handler,
251
+ error_file_handler,
252
+ ]
253
+ )
254
+
255
+
256
+ if __name__ == "__main__":
257
+ pass
main.py CHANGED
@@ -151,7 +151,8 @@ def main():
151
  blocks.queue().launch(
152
  share=False if platform.system() == "Windows" else False,
153
  server_name="127.0.0.1" if platform.system() == "Windows" else "0.0.0.0",
154
- server_port=args.server_port
 
155
  )
156
  return
157
 
 
151
  blocks.queue().launch(
152
  share=False if platform.system() == "Windows" else False,
153
  server_name="127.0.0.1" if platform.system() == "Windows" else "0.0.0.0",
154
+ server_port=args.server_port,
155
+ show_error=True if platform.system() == "Windows" else False,
156
  )
157
  return
158
 
project_settings.py CHANGED
@@ -9,11 +9,17 @@ from toolbox.os.environment import EnvironmentManager
9
  project_path = os.path.abspath(os.path.dirname(__file__))
10
  project_path = Path(project_path)
11
 
 
 
 
 
 
 
12
  environment = EnvironmentManager(
13
  path=os.path.join(project_path, "dotenv"),
14
  env=os.environ.get("environment", "dev"),
15
  )
16
 
17
 
18
- if __name__ == '__main__':
19
  pass
 
9
  project_path = os.path.abspath(os.path.dirname(__file__))
10
  project_path = Path(project_path)
11
 
12
+ log_directory = project_path / "logs"
13
+ log_directory.mkdir(parents=True, exist_ok=True)
14
+
15
+ temp_directory = project_path / "temp"
16
+ temp_directory.mkdir(parents=True, exist_ok=True)
17
+
18
  environment = EnvironmentManager(
19
  path=os.path.join(project_path, "dotenv"),
20
  env=os.environ.get("environment", "dev"),
21
  )
22
 
23
 
24
+ if __name__ == "__main__":
25
  pass
requirements.txt CHANGED
@@ -16,3 +16,4 @@ onnxruntime
16
  scipy
17
  onnx
18
  onnxruntime
 
 
16
  scipy
17
  onnx
18
  onnxruntime
19
+ tenacity