HoneyTian commited on
Commit
58d9724
·
1 Parent(s): a57e1ce
examples/sample_filter/music_bad_case_find.py CHANGED
@@ -15,19 +15,18 @@ def get_args():
15
  parser = argparse.ArgumentParser()
16
  parser.add_argument(
17
  "--data_dir",
18
- default=r"D:\Users\tianx\HuggingSpaces\template_match_asr\data\wav\early_media\60\music\wav_segmented",
19
  type=str
20
  )
21
  parser.add_argument(
22
  "--keep_dir",
23
- # default=r"D:\Users\tianx\HuggingSpaces\wav_segmented\keep",
24
- default=r"D:\Users\tianx\HuggingSpaces\template_match_asr\data\wav\early_media\60\music\wav_segmented\keep",
25
  type=str
26
  )
27
  parser.add_argument(
28
  "--trash_dir",
29
  # default=r"D:\Users\tianx\HuggingSpaces\wav_segmented\trash",
30
- default=r"D:\Users\tianx\HuggingSpaces\template_match_asr\data\wav\early_media\60\music\wav_segmented\trash",
31
  type=str
32
  )
33
  args = parser.parse_args()
@@ -49,12 +48,15 @@ def main():
49
  for idx, filename in tqdm(enumerate(data_dir.glob("*.wav"))):
50
  filename = filename.as_posix()
51
 
52
- outputs = client.predict(
53
- audio_t=handle_file(filename),
54
- model_name="sound-8-ch32-cnn",
55
- ground_true="Hello!!",
56
- api_name="/when_click_cls_button"
57
- )
 
 
 
58
  outputs = json.loads(outputs)
59
  label = outputs["label"]
60
  prob = outputs["prob"]
 
15
  parser = argparse.ArgumentParser()
16
  parser.add_argument(
17
  "--data_dir",
18
+ default=r"D:\Users\tianx\HuggingSpaces\template_match_asr\data\wav\early_media\66\temp",
19
  type=str
20
  )
21
  parser.add_argument(
22
  "--keep_dir",
23
+ default=r"D:\Users\tianx\HuggingSpaces\template_match_asr\data\wav\early_media\66\temp\keep",
 
24
  type=str
25
  )
26
  parser.add_argument(
27
  "--trash_dir",
28
  # default=r"D:\Users\tianx\HuggingSpaces\wav_segmented\trash",
29
+ default=r"D:\Users\tianx\HuggingSpaces\template_match_asr\data\wav\early_media\66\temp\trash",
30
  type=str
31
  )
32
  args = parser.parse_args()
 
48
  for idx, filename in tqdm(enumerate(data_dir.glob("*.wav"))):
49
  filename = filename.as_posix()
50
 
51
+ try:
52
+ outputs = client.predict(
53
+ audio_t=handle_file(filename),
54
+ model_name="sound-8-ch32-cnn",
55
+ ground_true="Hello!!",
56
+ api_name="/when_click_cls_button"
57
+ )
58
+ except Exception as error:
59
+ continue
60
  outputs = json.loads(outputs)
61
  label = outputs["label"]
62
  prob = outputs["prob"]
examples/sound_classification_by_lstm/run_batch.sh CHANGED
@@ -1,91 +1,114 @@
1
  #!/usr/bin/env bash
2
 
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  # pretrained voicemail
5
 
6
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-2-ch64-lstm \
7
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
8
  --label_plan 2-voicemail \
9
- --config_file "yaml/lstm-classifier-2-ch64.yaml"
10
 
11
 
12
  # voicemail ch64
13
 
14
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-ph-2-ch64-lstm \
15
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-PH/wav_finished/*/*.wav" \
16
  --label_plan 2-voicemail \
17
- --config_file "yaml/lstm-classifier-2-ch64.yaml" \
18
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch64-lstm.zip"
19
 
20
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-sg-2-ch64-lstm \
21
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-SG/wav_finished/*/*.wav" \
22
  --label_plan 2-voicemail \
23
- --config_file "yaml/lstm-classifier-2-ch64.yaml" \
24
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch64-lstm.zip"
25
 
26
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-us-2-ch64-lstm \
27
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-US/wav_finished/*/*.wav" \
28
  --label_plan 2-voicemail \
29
- --config_file "yaml/lstm-classifier-2-ch64.yaml" \
30
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch64-lstm.zip"
31
 
32
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-mx-2-ch64-lstm \
33
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-MX/wav_finished/*/*.wav" \
34
  --label_plan 2-voicemail \
35
- --config_file "yaml/lstm-classifier-2-ch64.yaml" \
36
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch64-lstm.zip"
37
 
38
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-pe-2-ch64-lstm \
39
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-PE/wav_finished/*/*.wav" \
40
  --label_plan 2-voicemail \
41
- --config_file "yaml/lstm-classifier-2-ch64.yaml" \
42
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch64-lstm.zip"
43
 
44
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-fi-fi-2-ch64-lstm \
45
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/fi-FI/wav_finished/*/*.wav" \
46
  --label_plan 2-voicemail \
47
- --config_file "yaml/lstm-classifier-2-ch64.yaml" \
48
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch64-lstm.zip"
49
 
50
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-id-id-2-ch64-lstm \
51
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/id-ID/wav_finished/*/*.wav" \
52
  --label_plan 2-voicemail \
53
- --config_file "yaml/lstm-classifier-2-ch64.yaml" \
54
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch64-lstm.zip"
55
 
56
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ja-jp-2-ch64-lstm \
57
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ja-JP/wav_finished/*/*.wav" \
58
  --label_plan 2-voicemail \
59
- --config_file "yaml/lstm-classifier-2-ch64.yaml" \
60
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch64-lstm.zip"
61
 
62
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ko-kr-2-ch64-lstm \
63
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ko-KR/wav_finished/*/*.wav" \
64
  --label_plan 2-voicemail \
65
- --config_file "yaml/lstm-classifier-2-ch64.yaml" \
66
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch64-lstm.zip"
67
 
68
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-ch64-lstm \
69
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
70
  --label_plan 2-voicemail \
71
- --config_file "yaml/lstm-classifier-2-ch64.yaml" \
72
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch64-lstm.zip"
73
 
74
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-pt-br-2-ch64-lstm \
75
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/pt-BR/wav_finished/*/*.wav" \
76
  --label_plan 2-voicemail \
77
- --config_file "yaml/lstm-classifier-2-ch64.yaml" \
78
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch64-lstm.zip"
79
 
80
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-th-th-2-ch64-lstm \
81
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/th-TH/wav_finished/*/*.wav" \
82
  --label_plan 2-voicemail \
83
- --config_file "yaml/lstm-classifier-2-ch64.yaml" \
84
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch64-lstm.zip"
85
 
86
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-zh-tw-2-ch64-lstm \
87
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/zh-TW/wav_finished/*/*.wav" \
88
  --label_plan 2-voicemail \
89
- --config_file "yaml/lstm-classifier-2-ch64.yaml" \
90
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch64-lstm.zip"
91
 
 
1
  #!/usr/bin/env bash
2
 
3
 
4
+ # sound ch64
5
+
6
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-l3-ch64-lstm \
7
+ --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
8
+ --label_plan 2 \
9
+ --config_file "yaml/lstm-classifier-2-l3-ch64.yaml"
10
+
11
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-l3-ch64-lstm \
12
+ --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
13
+ --label_plan 3 \
14
+ --config_file "yaml/lstm-classifier-3-l3-ch64.yaml"
15
+
16
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-l3-ch64-lstm \
17
+ --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
18
+ --label_plan 4 \
19
+ --config_file "yaml/lstm-classifier-4-l3-ch64.yaml"
20
+
21
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-l3-ch64-lstm \
22
+ --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
23
+ --label_plan 8 \
24
+ --config_file "yaml/lstm-classifier-8-l3-ch64.yaml"
25
+
26
+
27
  # pretrained voicemail
28
 
29
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-2-l3-ch64-lstm \
30
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
31
  --label_plan 2-voicemail \
32
+ --config_file "yaml/lstm-classifier-2-l3-ch64.yaml"
33
 
34
 
35
  # voicemail ch64
36
 
37
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-ph-2-l3-ch64-lstm \
38
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-PH/wav_finished/*/*.wav" \
39
  --label_plan 2-voicemail \
40
+ --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
41
+ --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
42
 
43
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-sg-2-l3-ch64-lstm \
44
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-SG/wav_finished/*/*.wav" \
45
  --label_plan 2-voicemail \
46
+ --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
47
+ --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
48
 
49
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-us-2-l3-ch64-lstm \
50
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-US/wav_finished/*/*.wav" \
51
  --label_plan 2-voicemail \
52
+ --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
53
+ --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
54
 
55
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-mx-2-l3-ch64-lstm \
56
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-MX/wav_finished/*/*.wav" \
57
  --label_plan 2-voicemail \
58
+ --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
59
+ --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
60
 
61
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-pe-2-l3-ch64-lstm \
62
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-PE/wav_finished/*/*.wav" \
63
  --label_plan 2-voicemail \
64
+ --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
65
+ --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
66
 
67
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-fi-fi-2-l3-ch64-lstm \
68
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/fi-FI/wav_finished/*/*.wav" \
69
  --label_plan 2-voicemail \
70
+ --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
71
+ --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
72
 
73
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-id-id-2-l3-ch64-lstm \
74
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/id-ID/wav_finished/*/*.wav" \
75
  --label_plan 2-voicemail \
76
+ --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
77
+ --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
78
 
79
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ja-jp-2-l3-ch64-lstm \
80
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ja-JP/wav_finished/*/*.wav" \
81
  --label_plan 2-voicemail \
82
+ --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
83
+ --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
84
 
85
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ko-kr-2-l3-ch64-lstm \
86
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ko-KR/wav_finished/*/*.wav" \
87
  --label_plan 2-voicemail \
88
+ --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
89
+ --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
90
 
91
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-l3-ch64-lstm \
92
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
93
  --label_plan 2-voicemail \
94
+ --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
95
+ --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
96
 
97
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-pt-br-2-l3-ch64-lstm \
98
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/pt-BR/wav_finished/*/*.wav" \
99
  --label_plan 2-voicemail \
100
+ --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
101
+ --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
102
 
103
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-th-th-2-l3-ch64-lstm \
104
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/th-TH/wav_finished/*/*.wav" \
105
  --label_plan 2-voicemail \
106
+ --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
107
+ --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
108
 
109
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-zh-tw-2-l3-ch64-lstm \
110
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/zh-TW/wav_finished/*/*.wav" \
111
  --label_plan 2-voicemail \
112
+ --config_file "yaml/lstm-classifier-2-l3-ch64.yaml" \
113
+ --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-l3-ch64-lstm.zip"
114
 
examples/sound_classification_by_lstm/yaml/{lstm-classifier-2-ch64.yaml → lstm-classifier-2-l3-ch64.yaml} RENAMED
File without changes
examples/sound_classification_by_lstm/yaml/lstm-classifier-3-l3-ch64.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "lstm_audio_classifier"
2
+
3
+ mel_spectrogram_param:
4
+ sample_rate: 8000
5
+ n_fft: 512
6
+ win_length: 200
7
+ hop_length: 80
8
+ f_min: 10
9
+ f_max: 3800
10
+ window_fn: hamming
11
+ n_mels: 80
12
+
13
+ lstm_layer_param:
14
+ input_size: 80
15
+ hidden_size: 64
16
+ num_layers: 3
17
+ dropout: 0.2
18
+
19
+ pooling_layer_param:
20
+ pool_layer: last
21
+
22
+ cls_head_param:
23
+ input_dim: 64
24
+ num_layers: 1
25
+ hidden_dims:
26
+ - 32
27
+ activations: relu
28
+ dropout: 0.1
29
+ num_labels: 3
examples/sound_classification_by_lstm/yaml/{lstm-classifier-4-ch64.yaml → lstm-classifier-4-l3-ch64.yaml} RENAMED
File without changes
examples/sound_classification_by_lstm/yaml/lstm-classifier-8-l3-ch64.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "lstm_audio_classifier"
2
+
3
+ mel_spectrogram_param:
4
+ sample_rate: 8000
5
+ n_fft: 512
6
+ win_length: 200
7
+ hop_length: 80
8
+ f_min: 10
9
+ f_max: 3800
10
+ window_fn: hamming
11
+ n_mels: 80
12
+
13
+ lstm_layer_param:
14
+ input_size: 80
15
+ hidden_size: 64
16
+ num_layers: 3
17
+ dropout: 0.2
18
+
19
+ pooling_layer_param:
20
+ pool_layer: last
21
+
22
+ cls_head_param:
23
+ input_dim: 64
24
+ num_layers: 1
25
+ hidden_dims:
26
+ - 32
27
+ activations: relu
28
+ dropout: 0.1
29
+ num_labels: 8