add some code

This commit is contained in:
2025-09-05 13:25:11 +08:00
parent 9ff0a99e7a
commit 3cf1229a85
8911 changed files with 2535396 additions and 0 deletions

View File

@@ -0,0 +1,49 @@
1 TELL ME A JOKE
2 SING A SONG
3 PLAY NEWS CHANNEL
4 TURN ON MY SOUNDBOX
5 TURN OFF MY SOUNDBOX
5 TURN OF MY SOUNDBOX
6 HIGHEST VOLUME
7 LOWEST VOLUME
8 INCREASE THE VOLUME
9 DECREASE THE VOLUME
10 TURN ON THE TV
11 TURN OFF THE TV
11 TURN OF THE TV
12 MAKE ME A TEA
13 MAKE ME A COFFEE
14 TURN ON THE LIGHT
15 TURN OFF THE LIGHT
15 TURN OF THE LIGHT
16 CHANGE THE COLOR TO RED
17 CHANGE THE COLOR TO GREEN
18 TURN ON ALL THE LIGHTS
19 TURN OFF ALL THE LIGHTS
19 TURN OF ALL THE LIGHTS
20 TURN ON THE AIR CONDITIONER
21 TURN OFF THE AIR CONDITIONER
21 TURN OF THE AIR CONDITIONER
22 SET THE TEMPERATURE TO SIXTEEN DEGREES
23 SET THE TEMPERATURE TO SEVENTEEN DEGREES
24 SET THE TEMPERATURE TO EIGHTEEN DEGREES
25 SET THE TEMPERATURE TO NINETEEN DEGREES
26 SET THE TEMPERATURE TO TWENTY DEGREES
27 SET THE TEMPERATURE TO TWENTY ONE DEGREES
28 SET THE TEMPERATURE TO TWENTY TWO DEGREES
29 SET THE TEMPERATURE TO TWENTY THREE DEGREES
30 SET THE TEMPERATURE TO TWENTY FOUR DEGREES
31 SET THE TEMPERATURE TO TWENTY FIVE DEGREES
32 SET THE TEMPERATURE TO TWENTY SIX DEGREES
33 LOWEST FAN SPEED
34 MEDIUM FAN SPEED
35 HIGHEST FAN SPEED
36 AUTO ADJUST THE FAN SPEED
37 DECREASE THE FAN SPEED
38 INCREASE THE FAN SPEED
39 INCREASE THE TEMPERATURE
40 DECREASE THE TEMPERATURE
41 COOLING MODE
42 HEATING MODE
43 VENTILATION MODE
44 DEHUMIDIFY MODE

View File

@@ -0,0 +1,64 @@
import argparse
from pathlib import Path
from typing import List, Set, Tuple
import sentencepiece as spm
def process_commands(infile: List[str], sp: spm.SentencePieceProcessor
) -> Tuple[List[str], Set[int]]:
out_commands = []
tokens = set()
for line in infile:
command_id = line.split()[0]
command = ' '.join(line.split()[1:])
command_tokens = sp.encode(command, out_type=str)
for token in command_tokens:
tokens.add(token)
command_tokens = [command_id] + command_tokens
out_commands.append('\t'.join(command_tokens))
return out_commands, tokens
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--infile', type=str, required=True,
help='the text file of commands id and commands.')
parser.add_argument('--bpe-model', type=str, default='bpe.model',
help='subword bpe model file.')
parser.add_argument('--out-command-list', type=str,
default='commands_tokens.txt',
help='the output subword commands text filename.')
parser.add_argument('--out-token-symbols', type=str,
default='tokens.txt',
help='the output token to subword id mapping.')
args = parser.parse_args()
if not Path(args.infile).is_file():
raise FileNotFoundError(args.infile)
if not Path(args.bpe_model).is_file():
raise FileNotFoundError(args.bpe_model)
with open(args.infile) as f:
infile = f.readlines()
infile = [x.strip() for x in infile]
sp = spm.SentencePieceProcessor()
sp.load(args.bpe_model)
out_commands, tokens = process_commands(infile, sp)
token_symbols = []
for i in range(sp.vocab_size()):
if sp.id_to_piece(i) in tokens or i == 0:
token_symbols.append(f'{sp.id_to_piece(i)}\t{i}')
with open(args.out_command_list, 'wt') as f:
f.write('\n'.join(out_commands))
f.write('\n')
with open(args.out_token_symbols, 'wt') as f:
f.write('\n'.join(token_symbols))
f.write('\n')

View File

@@ -0,0 +1 @@
sentencepiece==0.1.97