submission_sync

Sleeping

App Files Files Community

AB739 commited on Jan 16, 2025

Commit

deeac22

verified ·

1 Parent(s): bd4f556

Update tasks/audio.py

Browse files

Files changed (1) hide show

tasks/audio.py +4 -136

tasks/audio.py CHANGED Viewed

@@ -10,6 +10,7 @@ from torch.utils.data import DataLoader, TensorDataset
 from torchaudio import transforms
 from torchvision import models
 from .utils.evaluation import AudioEvaluationRequest
 from .utils.emissions import tracker, clean_emissions_data, get_space_info
@@ -87,145 +88,12 @@ async def evaluate_audio(request: AudioEvaluationRequest):
         'spectrogram_length': 64,
         'dct_coefficient_count': 481,
         'label_count': 2
-    }
-    # Create model
-    #model = BlazeFaceModel(input_channels=1, label_count=model_settings['label_count'], use_double_block=False, activation='relu', use_optional_block=False)
-    from torch.quantization import QuantStub, DeQuantStub
-    class BlazeFace(nn.Module):
-        def __init__(self, input_channels=1, use_double_block=False, activation="relu", use_optional_block=True):
-            super(BlazeFace, self).__init__()
-            self.activation = activation
-            self.use_double_block = use_double_block
-            self.use_optional_block = use_optional_block
-            def conv_block(in_channels, out_channels, kernel_size, stride, padding):
-                return nn.Sequential(
-                    nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding),
-                    nn.BatchNorm2d(out_channels),
-                    nn.ReLU() if activation == "relu" else nn.Sigmoid()  # Apply ReLU activation (default) or Sigmoid
-                )
-            def depthwise_separable_block(in_channels, out_channels, stride):
-                return nn.Sequential(
-                    nn.Conv2d(in_channels, in_channels, kernel_size=5, stride=stride, padding=2, groups=in_channels, bias=False),
-                    nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0),
-                    nn.BatchNorm2d(out_channels),
-                    nn.ReLU() if activation == "relu" else nn.Sigmoid()
-                )
-            def double_block(in_channels, filters_1, filters_2, stride):
-                return nn.Sequential(
-                    depthwise_separable_block(in_channels, filters_1, stride),
-                    depthwise_separable_block(filters_1, filters_2, 1)
-                )
-            # Define layers (first part: conv layers)
-            self.conv1 = conv_block(input_channels, 24, kernel_size=5, stride=2, padding=2)
-            # Define single blocks (subsequent conv blocks)
-            self.single_blocks = nn.ModuleList([
-                depthwise_separable_block(24, 24, stride=1),
-                depthwise_separable_block(24, 24, stride=1),
-                depthwise_separable_block(24, 48, stride=2),
-                depthwise_separable_block(48, 48, stride=1),
-                depthwise_separable_block(48, 48, stride=1)
-            ])
-            # Define double blocks if `use_double_block` is True
-            if self.use_double_block:
-                self.double_blocks = nn.ModuleList([
-                    double_block(48, 24, 96, stride=2),
-                    double_block(96, 24, 96, stride=1),
-                    double_block(96, 24, 96, stride=2),
-                    double_block(96, 24, 96, stride=1),
-                    double_block(96, 24, 96, stride=2)
-                ])
-            else:
-                self.double_blocks = nn.ModuleList([
-                    depthwise_separable_block(48, 96, stride=2),
-                    depthwise_separable_block(96, 96, stride=1),
-                    depthwise_separable_block(96, 96, stride=2),
-                    depthwise_separable_block(96, 96, stride=1),
-                    depthwise_separable_block(96, 96, stride=2)
-                ])
-            # Final convolutional head
-            self.conv_head = nn.Conv2d(96, 64, kernel_size=1, stride=1)
-            self.bn_head = nn.BatchNorm2d(64)
-            # Global Average Pooling
-            self.global_avg_pooling = nn.AdaptiveAvgPool2d(1)
-        def forward(self, x):
-            # First conv layer
-            x = self.conv1(x)
-            # Apply single blocks
-            for block in self.single_blocks:
-                x = block(x)
-            # Apply double blocks
-            for block in self.double_blocks:
-                x = block(x)
-            # Final head
-            x = self.conv_head(x)
-            x = self.bn_head(x)
-            x = F.relu(x)
-            # Global Average Pooling and Flatten
-            x = self.global_avg_pooling(x)
-            x = torch.flatten(x, 1)
-            return x
-    class BlazeFaceModel(nn.Module):
-        def __init__(self, input_channels, label_count, use_double_block=False, activation="relu", use_optional_block=True):
-            super(BlazeFaceModel, self).__init__()
-            self.blazeface_backbone = BlazeFace(input_channels=input_channels, use_double_block=use_double_block, activation=activation, use_optional_block=use_optional_block)
-            self.fc = nn.Linear(64, label_count)
-        def forward(self, x):
-            features = self.blazeface_backbone(x)
-            output = self.fc(features)
-            return output
-    # Example Usage
-    model_settings = {
-        'spectrogram_length': 64,
-        'dct_coefficient_count': 481,
-        'label_count': 2
-    }
-    # Define a quantized BlazeFace model
-    class QuantizedBlazeFaceModel(nn.Module):
-        def __init__(self, model_fp32):
-            super(QuantizedBlazeFaceModel, self).__init__()
-            self.quant = QuantStub()
-            self.dequant = DeQuantStub()
-            self.backbone = model_fp32.blazeface_backbone
-            self.fc = model_fp32.fc
-        def forward(self, x):
-            x = self.quant(x)
-            x = self.backbone(x)
-            x = self.fc(x)
-            x = self.dequant(x)
-            return x
-    # Load the trained model
-    model_settings = {
-        'label_count': 2
-    }
-    model_fp32 = BlazeFaceModel(input_channels=1, label_count=2)  # Assume label_count is 2
     quantized_model_path = "./qat_int8_blazeface_model.pth"
     int8_model = QuantizedBlazeFaceModel(model_fp32)
-    int8_model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')  # Optional if not defined in saved model
     # Load the state dictionary
     int8_model.load_state_dict(torch.load(quantized_model_path, map_location=torch.device('cpu'), weights_only=True))

 from torchaudio import transforms
 from torchvision import models
+from .model import BlazeFaceModel, QuantizedBlazeFaceModel
 from .utils.evaluation import AudioEvaluationRequest
 from .utils.emissions import tracker, clean_emissions_data, get_space_info
         'spectrogram_length': 64,
         'dct_coefficient_count': 481,
         'label_count': 2
+    }
+    model = BlazeFaceModel(input_channels=1, label_count=model_settings['label_count'], use_double_block=False, activation='relu', use_optional_block=False)
     quantized_model_path = "./qat_int8_blazeface_model.pth"
     int8_model = QuantizedBlazeFaceModel(model_fp32)
+    int8_model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
     # Load the state dictionary
     int8_model.load_state_dict(torch.load(quantized_model_path, map_location=torch.device('cpu'), weights_only=True))