add some code

2025-09-05 13:25:11 +08:00
parent 9ff0a99e7a
commit 3cf1229a85
8911 changed files with 2535396 additions and 0 deletions
@@ -0,0 +1,27 @@
+#Packet loss simulator
+
+This code is an attempt at simulating better packet loss scenarios. The most common way of simulating
+packet loss is to use a random sequence where each packet loss event is uncorrelated with previous events.
+That is a simplistic model since we know that losses often occur in bursts. This model uses real data
+to build a generative model for packet loss.
+
+We use the training data provided for the Audio Deep Packet Loss Concealment Challenge, which is available at:
+
+http://plcchallenge2022pub.blob.core.windows.net/plcchallengearchive/test_train.tar.gz
+
+To create the training data, run:
+
+`./process_data.sh /<path>/test_train/train/lossy_signals/`
+
+That will create an ascii loss\_sorted.txt file with all loss data sorted in increasing packet loss
+percentage. Then just run:
+
+`python ./train_lossgen.py`
+
+to train a model
+
+To generate a sequence, run
+
+`python3 ./test_lossgen.py <checkpoint> <percentage> output.txt --length 10000`
+
+where <checkpoint> is the .pth model file and <percentage> is the amount of loss (e.g. 0.2 for 20% loss).
@@ -0,0 +1,101 @@
+"""
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+import argparse
+import sys
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '../weight-exchange'))
+
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('checkpoint', type=str, help='model checkpoint')
+parser.add_argument('output_dir', type=str, help='output folder')
+
+args = parser.parse_args()
+
+import torch
+import numpy as np
+
+import lossgen
+from wexchange.torch import dump_torch_weights
+from wexchange.c_export import CWriter, print_vector
+
+def c_export(args, model):
+
+    message = f"Auto generated from checkpoint {os.path.basename(args.checkpoint)}"
+
+    writer = CWriter(os.path.join(args.output_dir, "lossgen_data"), message=message, model_struct_name='LossGen', enable_binary_blob=False, add_typedef=True)
+    writer.header.write(
+f"""
+#include "opus_types.h"
+"""
+        )
+
+    dense_layers = [
+        ('dense_in', "lossgen_dense_in"),
+        ('dense_out', "lossgen_dense_out")
+    ]
+
+
+    for name, export_name in dense_layers:
+        layer = model.get_submodule(name)
+        dump_torch_weights(writer, layer, name=export_name, verbose=True, quantize=False, scale=None)
+
+
+    gru_layers = [
+        ("gru1", "lossgen_gru1"),
+        ("gru2", "lossgen_gru2"),
+    ]
+
+    max_rnn_units = max([dump_torch_weights(writer, model.get_submodule(name), export_name, verbose=True, input_sparse=False, quantize=True, scale=None, recurrent_scale=None)
+                             for name, export_name in gru_layers])
+
+    writer.header.write(
+f"""
+
+#define LOSSGEN_MAX_RNN_UNITS {max_rnn_units}
+
+"""
+        )
+
+    writer.close()
+
+
+if __name__ == "__main__":
+
+    os.makedirs(args.output_dir, exist_ok=True)
+    checkpoint = torch.load(args.checkpoint, map_location='cpu')
+    model = lossgen.LossGen(*checkpoint['model_args'], **checkpoint['model_kwargs'])
+    model.load_state_dict(checkpoint['state_dict'], strict=False)
+    #model = LossGen()
+    #checkpoint = torch.load(args.checkpoint, map_location='cpu')
+    #model.load_state_dict(checkpoint['state_dict'])
+    c_export(args, model)
@@ -0,0 +1,29 @@
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+class LossGen(nn.Module):
+    def __init__(self, gru1_size=16, gru2_size=16):
+        super(LossGen, self).__init__()
+
+        self.gru1_size = gru1_size
+        self.gru2_size = gru2_size
+        self.dense_in = nn.Linear(2, 8)
+        self.gru1 = nn.GRU(8, self.gru1_size, batch_first=True)
+        self.gru2 = nn.GRU(self.gru1_size, self.gru2_size, batch_first=True)
+        self.dense_out = nn.Linear(self.gru2_size, 1)
+
+    def forward(self, loss, perc, states=None):
+        #print(states)
+        device = loss.device
+        batch_size = loss.size(0)
+        if states is None:
+            gru1_state = torch.zeros((1, batch_size, self.gru1_size), device=device)
+            gru2_state = torch.zeros((1, batch_size, self.gru2_size), device=device)
+        else:
+            gru1_state = states[0]
+            gru2_state = states[1]
+        x = torch.tanh(self.dense_in(torch.cat([loss, perc], dim=-1)))
+        gru1_out, gru1_state = self.gru1(x, gru1_state)
+        gru2_out, gru2_state = self.gru2(gru1_out, gru2_state)
+        return self.dense_out(gru2_out), [gru1_state, gru2_state]
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+#directory containing the loss files
+datadir=$1
+
+for i in $datadir/*_is_lost.txt
+do
+	perc=`cat $i | awk '{a+=$1}END{print a/NR}'`
+	echo $perc $i
+done > percentage_list.txt
+
+sort -n percentage_list.txt | awk '{print $2}' > percentage_sorted.txt
+
+for i in `cat percentage_sorted.txt`
+do
+	cat $i
+done > loss_sorted.txt
@@ -0,0 +1,42 @@
+import lossgen
+import os
+import argparse
+import torch
+import numpy as np
+
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('model', type=str, help='CELPNet model')
+parser.add_argument('percentage', type=float, help='percentage loss')
+parser.add_argument('output', type=str, help='path to output file (ascii)')
+
+parser.add_argument('--length', type=int, help="length of sequence to generate", default=500)
+
+args = parser.parse_args()
+
+
+
+checkpoint = torch.load(args.model, map_location='cpu')
+model = lossgen.LossGen(*checkpoint['model_args'], **checkpoint['model_kwargs'])
+model.load_state_dict(checkpoint['state_dict'], strict=False)
+
+states=None
+last = torch.zeros((1,1,1))
+perc = torch.tensor((args.percentage,))[None,None,:]
+seq = torch.zeros((0,1,1))
+
+one = torch.ones((1,1,1))
+zero = torch.zeros((1,1,1))
+
+if __name__ == '__main__':
+    for i in range(args.length):
+        prob, states = model(last, perc, states=states)
+        prob = torch.sigmoid(prob)
+        states[0] = states[0].detach()
+        states[1] = states[1].detach()
+        loss = one if np.random.rand() < prob else zero
+        last = loss
+        seq = torch.cat([seq, loss])
+
+np.savetxt(args.output, seq[:,:,0].numpy().astype('int'), fmt='%d')
@@ -0,0 +1,99 @@
+import numpy as np
+import torch
+from torch import nn
+import torch.nn.functional as F
+import tqdm
+from scipy.signal import lfilter
+import os
+import lossgen
+
+class LossDataset(torch.utils.data.Dataset):
+    def __init__(self,
+                loss_file,
+                sequence_length=997):
+
+        self.sequence_length = sequence_length
+
+        self.loss = np.loadtxt(loss_file, dtype='float32')
+
+        self.nb_sequences = self.loss.shape[0]//self.sequence_length
+        self.loss = self.loss[:self.nb_sequences*self.sequence_length]
+        self.perc = lfilter(np.array([.001], dtype='float32'), np.array([1., -.999], dtype='float32'), self.loss)
+
+        self.loss = np.reshape(self.loss, (self.nb_sequences, self.sequence_length, 1))
+        self.perc = np.reshape(self.perc, (self.nb_sequences, self.sequence_length, 1))
+
+    def __len__(self):
+        return self.nb_sequences
+
+    def __getitem__(self, index):
+        r0 = np.random.normal(scale=.1, size=(1,1)).astype('float32')
+        r1 = np.random.normal(scale=.1, size=(self.sequence_length,1)).astype('float32')
+        perc = self.perc[index, :, :]
+        perc = perc + (r0+r1)*perc*(1-perc)
+        return [self.loss[index, :, :], perc]
+
+
+adam_betas = [0.8, 0.98]
+adam_eps = 1e-8
+batch_size=256
+lr_decay = 0.001
+lr = 0.003
+epsilon = 1e-5
+epochs = 2000
+checkpoint_dir='checkpoint'
+os.makedirs(checkpoint_dir, exist_ok=True)
+checkpoint = dict()
+
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+
+checkpoint['model_args']    = ()
+checkpoint['model_kwargs']  = {'gru1_size': 16, 'gru2_size': 32}
+model = lossgen.LossGen(*checkpoint['model_args'], **checkpoint['model_kwargs'])
+dataset = LossDataset('loss_sorted.txt')
+dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=4)
+
+
+optimizer = torch.optim.AdamW(model.parameters(), lr=lr, betas=adam_betas, eps=adam_eps)
+
+
+# learning rate scheduler
+scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer=optimizer, lr_lambda=lambda x : 1 / (1 + lr_decay * x))
+
+
+if __name__ == '__main__':
+    model.to(device)
+    states = None
+    for epoch in range(1, epochs + 1):
+
+        running_loss = 0
+
+        print(f"training epoch {epoch}...")
+        with tqdm.tqdm(dataloader, unit='batch') as tepoch:
+            for i, (loss, perc) in enumerate(tepoch):
+                optimizer.zero_grad()
+                loss = loss.to(device)
+                perc = perc.to(device)
+
+                out, states = model(loss, perc, states=states)
+                states = [state.detach() for state in states]
+                out = torch.sigmoid(out[:,:-1,:])
+                target = loss[:,1:,:]
+
+                loss = torch.mean(-target*torch.log(out+epsilon) - (1-target)*torch.log(1-out+epsilon))
+
+                loss.backward()
+                optimizer.step()
+
+                scheduler.step()
+
+                running_loss += loss.detach().cpu().item()
+                tepoch.set_postfix(loss=f"{running_loss/(i+1):8.5f}",
+                                   )
+
+        # save checkpoint
+        checkpoint_path = os.path.join(checkpoint_dir, f'lossgen_{epoch}.pth')
+        checkpoint['state_dict'] = model.state_dict()
+        checkpoint['loss'] = running_loss / len(dataloader)
+        checkpoint['epoch'] = epoch
+        torch.save(checkpoint, checkpoint_path)