add some code
This commit is contained in:
BIN
managed_components/espressif__esp-sr/tool/fst/bpe.model
Normal file
BIN
managed_components/espressif__esp-sr/tool/fst/bpe.model
Normal file
Binary file not shown.
@@ -0,0 +1,49 @@
|
||||
1 TELL ME A JOKE
|
||||
2 SING A SONG
|
||||
3 PLAY NEWS CHANNEL
|
||||
4 TURN ON MY SOUNDBOX
|
||||
5 TURN OFF MY SOUNDBOX
|
||||
5 TURN OF MY SOUNDBOX
|
||||
6 HIGHEST VOLUME
|
||||
7 LOWEST VOLUME
|
||||
8 INCREASE THE VOLUME
|
||||
9 DECREASE THE VOLUME
|
||||
10 TURN ON THE TV
|
||||
11 TURN OFF THE TV
|
||||
11 TURN OF THE TV
|
||||
12 MAKE ME A TEA
|
||||
13 MAKE ME A COFFEE
|
||||
14 TURN ON THE LIGHT
|
||||
15 TURN OFF THE LIGHT
|
||||
15 TURN OF THE LIGHT
|
||||
16 CHANGE THE COLOR TO RED
|
||||
17 CHANGE THE COLOR TO GREEN
|
||||
18 TURN ON ALL THE LIGHTS
|
||||
19 TURN OFF ALL THE LIGHTS
|
||||
19 TURN OF ALL THE LIGHTS
|
||||
20 TURN ON THE AIR CONDITIONER
|
||||
21 TURN OFF THE AIR CONDITIONER
|
||||
21 TURN OF THE AIR CONDITIONER
|
||||
22 SET THE TEMPERATURE TO SIXTEEN DEGREES
|
||||
23 SET THE TEMPERATURE TO SEVENTEEN DEGREES
|
||||
24 SET THE TEMPERATURE TO EIGHTEEN DEGREES
|
||||
25 SET THE TEMPERATURE TO NINETEEN DEGREES
|
||||
26 SET THE TEMPERATURE TO TWENTY DEGREES
|
||||
27 SET THE TEMPERATURE TO TWENTY ONE DEGREES
|
||||
28 SET THE TEMPERATURE TO TWENTY TWO DEGREES
|
||||
29 SET THE TEMPERATURE TO TWENTY THREE DEGREES
|
||||
30 SET THE TEMPERATURE TO TWENTY FOUR DEGREES
|
||||
31 SET THE TEMPERATURE TO TWENTY FIVE DEGREES
|
||||
32 SET THE TEMPERATURE TO TWENTY SIX DEGREES
|
||||
33 LOWEST FAN SPEED
|
||||
34 MEDIUM FAN SPEED
|
||||
35 HIGHEST FAN SPEED
|
||||
36 AUTO ADJUST THE FAN SPEED
|
||||
37 DECREASE THE FAN SPEED
|
||||
38 INCREASE THE FAN SPEED
|
||||
39 INCREASE THE TEMPERATURE
|
||||
40 DECREASE THE TEMPERATURE
|
||||
41 COOLING MODE
|
||||
42 HEATING MODE
|
||||
43 VENTILATION MODE
|
||||
44 DEHUMIDIFY MODE
|
||||
@@ -0,0 +1,64 @@
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from typing import List, Set, Tuple
|
||||
|
||||
import sentencepiece as spm
|
||||
|
||||
|
||||
def process_commands(infile: List[str], sp: spm.SentencePieceProcessor
|
||||
) -> Tuple[List[str], Set[int]]:
|
||||
out_commands = []
|
||||
tokens = set()
|
||||
|
||||
for line in infile:
|
||||
command_id = line.split()[0]
|
||||
command = ' '.join(line.split()[1:])
|
||||
command_tokens = sp.encode(command, out_type=str)
|
||||
for token in command_tokens:
|
||||
tokens.add(token)
|
||||
command_tokens = [command_id] + command_tokens
|
||||
out_commands.append('\t'.join(command_tokens))
|
||||
return out_commands, tokens
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--infile', type=str, required=True,
|
||||
help='the text file of commands id and commands.')
|
||||
parser.add_argument('--bpe-model', type=str, default='bpe.model',
|
||||
help='subword bpe model file.')
|
||||
parser.add_argument('--out-command-list', type=str,
|
||||
default='commands_tokens.txt',
|
||||
help='the output subword commands text filename.')
|
||||
parser.add_argument('--out-token-symbols', type=str,
|
||||
default='tokens.txt',
|
||||
help='the output token to subword id mapping.')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not Path(args.infile).is_file():
|
||||
raise FileNotFoundError(args.infile)
|
||||
|
||||
if not Path(args.bpe_model).is_file():
|
||||
raise FileNotFoundError(args.bpe_model)
|
||||
|
||||
with open(args.infile) as f:
|
||||
infile = f.readlines()
|
||||
infile = [x.strip() for x in infile]
|
||||
|
||||
sp = spm.SentencePieceProcessor()
|
||||
sp.load(args.bpe_model)
|
||||
|
||||
out_commands, tokens = process_commands(infile, sp)
|
||||
|
||||
token_symbols = []
|
||||
for i in range(sp.vocab_size()):
|
||||
if sp.id_to_piece(i) in tokens or i == 0:
|
||||
token_symbols.append(f'{sp.id_to_piece(i)}\t{i}')
|
||||
|
||||
with open(args.out_command_list, 'wt') as f:
|
||||
f.write('\n'.join(out_commands))
|
||||
f.write('\n')
|
||||
|
||||
with open(args.out_token_symbols, 'wt') as f:
|
||||
f.write('\n'.join(token_symbols))
|
||||
f.write('\n')
|
||||
@@ -0,0 +1 @@
|
||||
sentencepiece==0.1.97
|
||||
Reference in New Issue
Block a user