Upgrade Playlist Features

This commit is contained in:
2025-12-09 17:20:01 +08:00
parent 577990de69
commit 8bd2780688
683 changed files with 91812 additions and 81260 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1,45 +1,45 @@
# LVGL图片转换工具
这个目录包含两个用于处理和转换图片为LVGL格式的Python脚本
## 1. LVGLImage (LVGLImage.py)
引用自LVGL[官方repo](https://github.com/lvgl/lvgl)的转换脚本[LVGLImage.py](https://github.com/lvgl/lvgl/blob/master/scripts/LVGLImage.py)
## 2. LVGL图片转换工具 (lvgl_tools_gui.py)
调用`LVGLImage.py`将图片批量转换为LVGL图片格式
可用于修改小智的默认表情,具体修改教程[在这里](https://www.bilibili.com/video/BV12FQkYeEJ3/)
### 特性
- 图形化操作,界面更友好
- 支持批量转换图片
- 自动识别图片格式并选择最佳的颜色格式转换
- 多分辨率支持
### 使用方法
创建虚拟环境
```bash
# 创建 venv
python -m venv venv
# 激活环境
source venv/bin/activate # Linux/Mac
venv\Scripts\activate # Windows
```
安装依赖
```bash
pip install -r requirements.txt
```
运行转换工具
```bash
# 激活环境
source venv/bin/activate # Linux/Mac
venv\Scripts\activate # Windows
# 运行
python lvgl_tools_gui.py
```
# LVGL图片转换工具
这个目录包含两个用于处理和转换图片为LVGL格式的Python脚本
## 1. LVGLImage (LVGLImage.py)
引用自LVGL[官方repo](https://github.com/lvgl/lvgl)的转换脚本[LVGLImage.py](https://github.com/lvgl/lvgl/blob/master/scripts/LVGLImage.py)
## 2. LVGL图片转换工具 (lvgl_tools_gui.py)
调用`LVGLImage.py`将图片批量转换为LVGL图片格式
可用于修改小智的默认表情,具体修改教程[在这里](https://www.bilibili.com/video/BV12FQkYeEJ3/)
### 特性
- 图形化操作,界面更友好
- 支持批量转换图片
- 自动识别图片格式并选择最佳的颜色格式转换
- 多分辨率支持
### 使用方法
创建虚拟环境
```bash
# 创建 venv
python -m venv venv
# 激活环境
source venv/bin/activate # Linux/Mac
venv\Scripts\activate # Windows
```
安装依赖
```bash
pip install -r requirements.txt
```
运行转换工具
```bash
# 激活环境
source venv/bin/activate # Linux/Mac
venv\Scripts\activate # Windows
# 运行
python lvgl_tools_gui.py
```

View File

@@ -1,253 +1,253 @@
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
from PIL import Image
import os
import tempfile
import sys
from LVGLImage import LVGLImage, ColorFormat, CompressMethod
HELP_TEXT = """LVGL图片转换工具使用说明
1. 添加文件:点击“添加文件”按钮选择需要转换的图片,支持批量导入
2. 移除文件:在列表中选中文件前的复选框“[ ]”(选中后会变成“[√]”),点击“移除选中”可删除选定文件
3. 设置分辨率选择需要的分辨率如128x128
建议根据自己的设备的屏幕分辨率来选择。过大和过小都会影响显示效果。
4. 颜色格式:选择“自动识别”会根据图片是否透明自动选择,或手动指定
除非你了解这个选项,否则建议使用自动识别,不然可能会出现一些意想不到的问题……
5. 压缩方式选择NONE或RLE压缩
除非你了解这个选项否则建议保持默认NONE不压缩
6. 输出目录:设置转换后文件的保存路径
默认为程序所在目录下的output文件夹
7. 转换:点击“转换全部”或“转换选中”开始转换
"""
class ImageConverterApp:
def __init__(self, root):
self.root = root
self.root.title("LVGL图片转换工具")
self.root.geometry("750x650")
# 初始化变量
self.output_dir = tk.StringVar(value=os.path.abspath("output"))
self.resolution = tk.StringVar(value="128x128")
self.color_format = tk.StringVar(value="自动识别")
self.compress_method = tk.StringVar(value="NONE")
# 创建UI组件
self.create_widgets()
self.redirect_output()
def create_widgets(self):
# 参数设置框架
settings_frame = ttk.LabelFrame(self.root, text="转换设置")
settings_frame.grid(row=0, column=0, padx=10, pady=5, sticky="ew")
# 分辨率设置
ttk.Label(settings_frame, text="分辨率:").grid(row=0, column=0, padx=2)
ttk.Combobox(settings_frame, textvariable=self.resolution,
values=["512x512", "256x256", "128x128", "64x64", "32x32"], width=8).grid(row=0, column=1, padx=2)
# 颜色格式
ttk.Label(settings_frame, text="颜色格式:").grid(row=0, column=2, padx=2)
ttk.Combobox(settings_frame, textvariable=self.color_format,
values=["自动识别", "RGB565", "RGB565A8"], width=10).grid(row=0, column=3, padx=2)
# 压缩方式
ttk.Label(settings_frame, text="压缩方式:").grid(row=0, column=4, padx=2)
ttk.Combobox(settings_frame, textvariable=self.compress_method,
values=["NONE", "RLE"], width=8).grid(row=0, column=5, padx=2)
# 文件操作框架
file_frame = ttk.LabelFrame(self.root, text="选取文件")
file_frame.grid(row=1, column=0, padx=10, pady=5, sticky="nsew")
# 文件操作按钮
btn_frame = ttk.Frame(file_frame)
btn_frame.pack(fill=tk.X, pady=2)
ttk.Button(btn_frame, text="添加文件", command=self.select_files).pack(side=tk.LEFT, padx=2)
ttk.Button(btn_frame, text="移除选中", command=self.remove_selected).pack(side=tk.LEFT, padx=2)
ttk.Button(btn_frame, text="清空列表", command=self.clear_files).pack(side=tk.LEFT, padx=2)
# 文件列表Treeview
self.tree = ttk.Treeview(file_frame, columns=("selected", "filename"),
show="headings", height=10)
self.tree.heading("selected", text="选择", anchor=tk.W)
self.tree.heading("filename", text="文件名", anchor=tk.W)
self.tree.column("selected", width=60, anchor=tk.W)
self.tree.column("filename", width=600, anchor=tk.W)
self.tree.pack(fill=tk.BOTH, expand=True)
self.tree.bind("<ButtonRelease-1>", self.on_tree_click)
# 输出目录
output_frame = ttk.LabelFrame(self.root, text="输出目录")
output_frame.grid(row=2, column=0, padx=10, pady=5, sticky="ew")
ttk.Entry(output_frame, textvariable=self.output_dir, width=60).pack(side=tk.LEFT, padx=5)
ttk.Button(output_frame, text="浏览", command=self.select_output_dir).pack(side=tk.RIGHT, padx=5)
# 转换按钮和帮助按钮
convert_frame = ttk.Frame(self.root)
convert_frame.grid(row=3, column=0, padx=10, pady=10)
ttk.Button(convert_frame, text="转换全部文件", command=lambda: self.start_conversion(True)).pack(side=tk.LEFT, padx=5)
ttk.Button(convert_frame, text="转换选中文件", command=lambda: self.start_conversion(False)).pack(side=tk.LEFT, padx=5)
ttk.Button(convert_frame, text="帮助", command=self.show_help).pack(side=tk.RIGHT, padx=5)
# 日志区域(新增清空按钮部分)
log_frame = ttk.LabelFrame(self.root, text="日志")
log_frame.grid(row=4, column=0, padx=10, pady=5, sticky="nsew")
# 添加按钮框架
log_btn_frame = ttk.Frame(log_frame)
log_btn_frame.pack(fill=tk.X, side=tk.BOTTOM)
# 清空日志按钮
ttk.Button(log_btn_frame, text="清空日志", command=self.clear_log).pack(side=tk.RIGHT, padx=5, pady=2)
self.log_text = tk.Text(log_frame, height=15)
self.log_text.pack(fill=tk.BOTH, expand=True)
# 布局配置
self.root.columnconfigure(0, weight=1)
self.root.rowconfigure(1, weight=1)
self.root.rowconfigure(4, weight=1)
def clear_log(self):
"""清空日志内容"""
self.log_text.delete(1.0, tk.END)
def show_help(self):
messagebox.showinfo("帮助", HELP_TEXT)
def redirect_output(self):
class StdoutRedirector:
def __init__(self, text_widget):
self.text_widget = text_widget
self.original_stdout = sys.stdout
def write(self, message):
self.text_widget.insert(tk.END, message)
self.text_widget.see(tk.END)
self.original_stdout.write(message)
def flush(self):
self.original_stdout.flush()
sys.stdout = StdoutRedirector(self.log_text)
def on_tree_click(self, event):
region = self.tree.identify("region", event.x, event.y)
if region == "cell":
col = self.tree.identify_column(event.x)
item = self.tree.identify_row(event.y)
if col == "#1": # 点击的是选中列
current_val = self.tree.item(item, "values")[0]
new_val = "[√]" if current_val == "[ ]" else "[ ]"
self.tree.item(item, values=(new_val, self.tree.item(item, "values")[1]))
def select_output_dir(self):
path = filedialog.askdirectory()
if path:
self.output_dir.set(path)
def select_files(self):
files = filedialog.askopenfilenames(filetypes=[("图片文件", "*.png;*.jpg;*.jpeg;*.bmp;*.gif")])
for f in files:
self.tree.insert("", tk.END, values=("[ ]", os.path.basename(f)), tags=(f,))
def remove_selected(self):
to_remove = []
for item in self.tree.get_children():
if self.tree.item(item, "values")[0] == "[√]":
to_remove.append(item)
for item in reversed(to_remove):
self.tree.delete(item)
def clear_files(self):
for item in self.tree.get_children():
self.tree.delete(item)
def start_conversion(self, convert_all):
input_files = [
self.tree.item(item, "tags")[0]
for item in self.tree.get_children()
if convert_all or self.tree.item(item, "values")[0] == "[√]"
]
if not input_files:
msg = "没有找到可转换的文件" if convert_all else "没有选中任何文件"
messagebox.showwarning("警告", msg)
return
os.makedirs(self.output_dir.get(), exist_ok=True)
# 解析转换参数
width, height = map(int, self.resolution.get().split('x'))
compress = CompressMethod.RLE if self.compress_method.get() == "RLE" else CompressMethod.NONE
# 执行转换
self.convert_images(input_files, width, height, compress)
def convert_images(self, input_files, width, height, compress):
success_count = 0
total_files = len(input_files)
for idx, file_path in enumerate(input_files):
try:
print(f"正在处理: {os.path.basename(file_path)}")
with Image.open(file_path) as img:
# 调整图片大小
img = img.resize((width, height), Image.Resampling.LANCZOS)
# 处理颜色格式
color_format_str = self.color_format.get()
if color_format_str == "自动识别":
# 检测透明通道
has_alpha = img.mode in ('RGBA', 'LA') or (img.mode == 'P' and 'transparency' in img.info)
if has_alpha:
img = img.convert('RGBA')
cf = ColorFormat.RGB565A8
else:
img = img.convert('RGB')
cf = ColorFormat.RGB565
else:
if color_format_str == "RGB565A8":
img = img.convert('RGBA')
cf = ColorFormat.RGB565A8
else:
img = img.convert('RGB')
cf = ColorFormat.RGB565
# 保存调整后的图片
base_name = os.path.splitext(os.path.basename(file_path))[0]
output_image_path = os.path.join(self.output_dir.get(), f"{base_name}_{width}x{height}.png")
img.save(output_image_path, 'PNG')
# 创建临时文件
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile:
temp_path = tmpfile.name
img.save(temp_path, 'PNG')
# 转换为LVGL C数组
lvgl_img = LVGLImage().from_png(temp_path, cf=cf)
output_c_path = os.path.join(self.output_dir.get(), f"{base_name}.c")
lvgl_img.to_c_array(output_c_path, compress=compress)
success_count += 1
os.unlink(temp_path)
print(f"成功转换: {base_name}.c\n")
except Exception as e:
print(f"转换失败: {str(e)}\n")
print(f"转换完成! 成功 {success_count}/{total_files} 个文件\n")
if __name__ == "__main__":
root = tk.Tk()
app = ImageConverterApp(root)
root.mainloop()
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
from PIL import Image
import os
import tempfile
import sys
from LVGLImage import LVGLImage, ColorFormat, CompressMethod
HELP_TEXT = """LVGL图片转换工具使用说明
1. 添加文件:点击“添加文件”按钮选择需要转换的图片,支持批量导入
2. 移除文件:在列表中选中文件前的复选框“[ ]”(选中后会变成“[√]”),点击“移除选中”可删除选定文件
3. 设置分辨率选择需要的分辨率如128x128
建议根据自己的设备的屏幕分辨率来选择。过大和过小都会影响显示效果。
4. 颜色格式:选择“自动识别”会根据图片是否透明自动选择,或手动指定
除非你了解这个选项,否则建议使用自动识别,不然可能会出现一些意想不到的问题……
5. 压缩方式选择NONE或RLE压缩
除非你了解这个选项否则建议保持默认NONE不压缩
6. 输出目录:设置转换后文件的保存路径
默认为程序所在目录下的output文件夹
7. 转换:点击“转换全部”或“转换选中”开始转换
"""
class ImageConverterApp:
def __init__(self, root):
self.root = root
self.root.title("LVGL图片转换工具")
self.root.geometry("750x650")
# 初始化变量
self.output_dir = tk.StringVar(value=os.path.abspath("output"))
self.resolution = tk.StringVar(value="128x128")
self.color_format = tk.StringVar(value="自动识别")
self.compress_method = tk.StringVar(value="NONE")
# 创建UI组件
self.create_widgets()
self.redirect_output()
def create_widgets(self):
# 参数设置框架
settings_frame = ttk.LabelFrame(self.root, text="转换设置")
settings_frame.grid(row=0, column=0, padx=10, pady=5, sticky="ew")
# 分辨率设置
ttk.Label(settings_frame, text="分辨率:").grid(row=0, column=0, padx=2)
ttk.Combobox(settings_frame, textvariable=self.resolution,
values=["512x512", "256x256", "128x128", "64x64", "32x32"], width=8).grid(row=0, column=1, padx=2)
# 颜色格式
ttk.Label(settings_frame, text="颜色格式:").grid(row=0, column=2, padx=2)
ttk.Combobox(settings_frame, textvariable=self.color_format,
values=["自动识别", "RGB565", "RGB565A8"], width=10).grid(row=0, column=3, padx=2)
# 压缩方式
ttk.Label(settings_frame, text="压缩方式:").grid(row=0, column=4, padx=2)
ttk.Combobox(settings_frame, textvariable=self.compress_method,
values=["NONE", "RLE"], width=8).grid(row=0, column=5, padx=2)
# 文件操作框架
file_frame = ttk.LabelFrame(self.root, text="选取文件")
file_frame.grid(row=1, column=0, padx=10, pady=5, sticky="nsew")
# 文件操作按钮
btn_frame = ttk.Frame(file_frame)
btn_frame.pack(fill=tk.X, pady=2)
ttk.Button(btn_frame, text="添加文件", command=self.select_files).pack(side=tk.LEFT, padx=2)
ttk.Button(btn_frame, text="移除选中", command=self.remove_selected).pack(side=tk.LEFT, padx=2)
ttk.Button(btn_frame, text="清空列表", command=self.clear_files).pack(side=tk.LEFT, padx=2)
# 文件列表Treeview
self.tree = ttk.Treeview(file_frame, columns=("selected", "filename"),
show="headings", height=10)
self.tree.heading("selected", text="选择", anchor=tk.W)
self.tree.heading("filename", text="文件名", anchor=tk.W)
self.tree.column("selected", width=60, anchor=tk.W)
self.tree.column("filename", width=600, anchor=tk.W)
self.tree.pack(fill=tk.BOTH, expand=True)
self.tree.bind("<ButtonRelease-1>", self.on_tree_click)
# 输出目录
output_frame = ttk.LabelFrame(self.root, text="输出目录")
output_frame.grid(row=2, column=0, padx=10, pady=5, sticky="ew")
ttk.Entry(output_frame, textvariable=self.output_dir, width=60).pack(side=tk.LEFT, padx=5)
ttk.Button(output_frame, text="浏览", command=self.select_output_dir).pack(side=tk.RIGHT, padx=5)
# 转换按钮和帮助按钮
convert_frame = ttk.Frame(self.root)
convert_frame.grid(row=3, column=0, padx=10, pady=10)
ttk.Button(convert_frame, text="转换全部文件", command=lambda: self.start_conversion(True)).pack(side=tk.LEFT, padx=5)
ttk.Button(convert_frame, text="转换选中文件", command=lambda: self.start_conversion(False)).pack(side=tk.LEFT, padx=5)
ttk.Button(convert_frame, text="帮助", command=self.show_help).pack(side=tk.RIGHT, padx=5)
# 日志区域(新增清空按钮部分)
log_frame = ttk.LabelFrame(self.root, text="日志")
log_frame.grid(row=4, column=0, padx=10, pady=5, sticky="nsew")
# 添加按钮框架
log_btn_frame = ttk.Frame(log_frame)
log_btn_frame.pack(fill=tk.X, side=tk.BOTTOM)
# 清空日志按钮
ttk.Button(log_btn_frame, text="清空日志", command=self.clear_log).pack(side=tk.RIGHT, padx=5, pady=2)
self.log_text = tk.Text(log_frame, height=15)
self.log_text.pack(fill=tk.BOTH, expand=True)
# 布局配置
self.root.columnconfigure(0, weight=1)
self.root.rowconfigure(1, weight=1)
self.root.rowconfigure(4, weight=1)
def clear_log(self):
"""清空日志内容"""
self.log_text.delete(1.0, tk.END)
def show_help(self):
messagebox.showinfo("帮助", HELP_TEXT)
def redirect_output(self):
class StdoutRedirector:
def __init__(self, text_widget):
self.text_widget = text_widget
self.original_stdout = sys.stdout
def write(self, message):
self.text_widget.insert(tk.END, message)
self.text_widget.see(tk.END)
self.original_stdout.write(message)
def flush(self):
self.original_stdout.flush()
sys.stdout = StdoutRedirector(self.log_text)
def on_tree_click(self, event):
region = self.tree.identify("region", event.x, event.y)
if region == "cell":
col = self.tree.identify_column(event.x)
item = self.tree.identify_row(event.y)
if col == "#1": # 点击的是选中列
current_val = self.tree.item(item, "values")[0]
new_val = "[√]" if current_val == "[ ]" else "[ ]"
self.tree.item(item, values=(new_val, self.tree.item(item, "values")[1]))
def select_output_dir(self):
path = filedialog.askdirectory()
if path:
self.output_dir.set(path)
def select_files(self):
files = filedialog.askopenfilenames(filetypes=[("图片文件", "*.png;*.jpg;*.jpeg;*.bmp;*.gif")])
for f in files:
self.tree.insert("", tk.END, values=("[ ]", os.path.basename(f)), tags=(f,))
def remove_selected(self):
to_remove = []
for item in self.tree.get_children():
if self.tree.item(item, "values")[0] == "[√]":
to_remove.append(item)
for item in reversed(to_remove):
self.tree.delete(item)
def clear_files(self):
for item in self.tree.get_children():
self.tree.delete(item)
def start_conversion(self, convert_all):
input_files = [
self.tree.item(item, "tags")[0]
for item in self.tree.get_children()
if convert_all or self.tree.item(item, "values")[0] == "[√]"
]
if not input_files:
msg = "没有找到可转换的文件" if convert_all else "没有选中任何文件"
messagebox.showwarning("警告", msg)
return
os.makedirs(self.output_dir.get(), exist_ok=True)
# 解析转换参数
width, height = map(int, self.resolution.get().split('x'))
compress = CompressMethod.RLE if self.compress_method.get() == "RLE" else CompressMethod.NONE
# 执行转换
self.convert_images(input_files, width, height, compress)
def convert_images(self, input_files, width, height, compress):
success_count = 0
total_files = len(input_files)
for idx, file_path in enumerate(input_files):
try:
print(f"正在处理: {os.path.basename(file_path)}")
with Image.open(file_path) as img:
# 调整图片大小
img = img.resize((width, height), Image.Resampling.LANCZOS)
# 处理颜色格式
color_format_str = self.color_format.get()
if color_format_str == "自动识别":
# 检测透明通道
has_alpha = img.mode in ('RGBA', 'LA') or (img.mode == 'P' and 'transparency' in img.info)
if has_alpha:
img = img.convert('RGBA')
cf = ColorFormat.RGB565A8
else:
img = img.convert('RGB')
cf = ColorFormat.RGB565
else:
if color_format_str == "RGB565A8":
img = img.convert('RGBA')
cf = ColorFormat.RGB565A8
else:
img = img.convert('RGB')
cf = ColorFormat.RGB565
# 保存调整后的图片
base_name = os.path.splitext(os.path.basename(file_path))[0]
output_image_path = os.path.join(self.output_dir.get(), f"{base_name}_{width}x{height}.png")
img.save(output_image_path, 'PNG')
# 创建临时文件
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile:
temp_path = tmpfile.name
img.save(temp_path, 'PNG')
# 转换为LVGL C数组
lvgl_img = LVGLImage().from_png(temp_path, cf=cf)
output_c_path = os.path.join(self.output_dir.get(), f"{base_name}.c")
lvgl_img.to_c_array(output_c_path, compress=compress)
success_count += 1
os.unlink(temp_path)
print(f"成功转换: {base_name}.c\n")
except Exception as e:
print(f"转换失败: {str(e)}\n")
print(f"转换完成! 成功 {success_count}/{total_files} 个文件\n")
if __name__ == "__main__":
root = tk.Tk()
app = ImageConverterApp(root)
root.mainloop()

View File

@@ -1,3 +1,3 @@
lz4==4.4.4
Pillow==11.3.0
pypng==0.20220715.0
lz4==4.4.4
Pillow==11.3.0
pypng==0.20220715.0

View File

@@ -1,280 +1,280 @@
"""
实时AFSK解调器 - 基于Goertzel算法
"""
import numpy as np
from collections import deque
class TraceGoertzel:
"""实时Goertzel算法实现"""
def __init__(self, freq: float, n: int):
"""
初始化Goertzel算法
Args:
freq: 归一化频率 (目标频率/采样频率)
n: 窗口大小
"""
self.freq = freq
self.n = n
# 预计算系数 - 与参考代码一致
self.k = int(freq * n)
self.w = 2.0 * np.pi * freq
self.cw = np.cos(self.w)
self.sw = np.sin(self.w)
self.c = 2.0 * self.cw
# 初始化状态变量 - 使用deque存储最近两个值
self.zs = deque([0.0, 0.0], maxlen=2)
def reset(self):
"""重置算法状态"""
self.zs.clear()
self.zs.extend([0.0, 0.0])
def __call__(self, xs):
"""
处理一组采样点 - 与参考代码一致的接口
Args:
xs: 采样点序列
Returns:
计算出的振幅
"""
self.reset()
for x in xs:
z1, z2 = self.zs[-1], self.zs[-2] # Z[-1], Z[-2]
z0 = x + self.c * z1 - z2 # S[n] = x[n] + C * S[n-1] - S[n-2]
self.zs.append(float(z0)) # 更新序列
return self.amp
@property
def amp(self) -> float:
"""计算当前振幅 - 与参考代码一致"""
z1, z2 = self.zs[-1], self.zs[-2]
ip = self.cw * z1 - z2
qp = self.sw * z1
return np.sqrt(ip**2 + qp**2) / (self.n / 2.0)
class PairGoertzel:
"""双频Goertzel解调器"""
def __init__(self, f_sample: int, f_space: int, f_mark: int,
bit_rate: int, win_size: int):
"""
初始化双频解调器
Args:
f_sample: 采样频率
f_space: Space频率 (通常对应0)
f_mark: Mark频率 (通常对应1)
bit_rate: 比特率
win_size: Goertzel窗口大小
"""
assert f_sample % bit_rate == 0, "采样频率必须是比特率的整数倍"
self.Fs = f_sample
self.F0 = f_space
self.F1 = f_mark
self.bit_rate = bit_rate
self.n_per_bit = int(f_sample // bit_rate) # 每个比特的采样点数
# 计算归一化频率
f1 = f_mark / f_sample
f0 = f_space / f_sample
# 初始化Goertzel算法
self.g0 = TraceGoertzel(freq=f0, n=win_size)
self.g1 = TraceGoertzel(freq=f1, n=win_size)
# 输入缓冲区
self.in_buffer = deque(maxlen=win_size)
self.out_count = 0
print(f"PairGoertzel initialized: f0={f0:.6f}, f1={f1:.6f}, win_size={win_size}, n_per_bit={self.n_per_bit}")
def __call__(self, s: float):
"""
处理单个采样点 - 与参考代码一致的接口
Args:
s: 采样点值
Returns:
(amp0, amp1, p1_prob) - 空间频率振幅,标记频率振幅,标记概率
"""
self.in_buffer.append(s)
self.out_count += 1
amp0, amp1, p1_prob = 0, 0, None
# 每个比特周期输出一次结果
if self.out_count >= self.n_per_bit:
amp0 = self.g0(self.in_buffer) # 计算space频率振幅
amp1 = self.g1(self.in_buffer) # 计算mark频率振幅
p1_prob = amp1 / (amp0 + amp1 + 1e-8) # 计算mark概率
self.out_count = 0
return amp0, amp1, p1_prob
class RealTimeAFSKDecoder:
"""实时AFSK解码器 - 基于起始帧触发"""
def __init__(self, f_sample: int = 16000, mark_freq: int = 1800,
space_freq: int = 1500, bitrate: int = 100,
s_goertzel: int = 9, threshold: float = 0.5):
"""
初始化实时AFSK解码器
Args:
f_sample: 采样频率
mark_freq: Mark频率
space_freq: Space频率
bitrate: 比特率
s_goertzel: Goertzel窗口大小系数 (win_size = f_sample // mark_freq * s_goertzel)
threshold: 判决门限
"""
self.f_sample = f_sample
self.mark_freq = mark_freq
self.space_freq = space_freq
self.bitrate = bitrate
self.threshold = threshold
# 计算窗口大小 - 与参考代码一致
win_size = int(f_sample / mark_freq * s_goertzel)
# 初始化解调器
self.demodulator = PairGoertzel(f_sample, space_freq, mark_freq,
bitrate, win_size)
# 帧定义 - 与参考代码一致
self.start_bytes = b'\x01\x02'
self.end_bytes = b'\x03\x04'
self.start_bits = "".join(format(int(x), '08b') for x in self.start_bytes)
self.end_bits = "".join(format(int(x), '08b') for x in self.end_bytes)
# 状态机
self.state = "idle" # idle / entering
# 存储解调结果
self.buffer_prelude:deque = deque(maxlen=len(self.start_bits)) # 判断是否启动
self.indicators = [] # 存储概率序列
self.signal_bits = "" # 存储比特序列
self.text_cache = ""
# 解码结果
self.decoded_messages = []
self.total_bits_received = 0
print(f"Decoder initialized: win_size={win_size}")
print(f"Start frame: {self.start_bits} (from {self.start_bytes.hex()})")
print(f"End frame: {self.end_bits} (from {self.end_bytes.hex()})")
def process_audio(self, samples: np.array) -> str:
"""
处理音频数据并返回解码文本
Args:
audio_data: 音频字节数据 (16-bit PCM)
Returns:
新解码的文本
"""
new_text = ""
# 逐个处理采样点
for sample in samples:
amp0, amp1, p1_prob = self.demodulator(sample)
# 如果有概率输出,记录并判决
if p1_prob is not None:
bit = '1' if p1_prob > self.threshold else '0'
match self.state:
case "idle":
self.buffer_prelude.append(bit)
pass
case "entering":
self.buffer_prelude.append(bit)
self.signal_bits += bit
self.total_bits_received += 1
case _:
pass
self.indicators.append(p1_prob)
# 检查状态机
if self.state == "idle" and "".join(self.buffer_prelude) == self.start_bits:
self.state = "entering"
self.text_cache = ""
self.signal_bits = "" # 清空比特序列
self.buffer_prelude.clear()
elif self.state == "entering" and ("".join(self.buffer_prelude) == self.end_bits or len(self.signal_bits) >= 256):
self.state = "idle"
self.buffer_prelude.clear()
# 每收集一定数量的比特后尝试解码
if len(self.signal_bits) >= 8:
text = self._decode_bits_to_text(self.signal_bits)
if len(text) > len(self.text_cache):
new_text = text[len(self.text_cache) - len(text):]
self.text_cache = text
return new_text
def _decode_bits_to_text(self, bits: str) -> str:
"""
将比特串解码为文本
Args:
bits: 比特串
Returns:
解码出的文本
"""
if len(bits) < 8:
return ""
decoded_text = ""
byte_count = len(bits) // 8
for i in range(byte_count):
# 提取8位
byte_bits = bits[i*8:(i+1)*8]
# 位转字节
byte_val = int(byte_bits, 2)
# 尝试解码为ASCII字符
if 32 <= byte_val <= 126: # 可打印ASCII字符
decoded_text += chr(byte_val)
elif byte_val == 0: # NULL字符忽略
continue
else:
# 非可打印字符pass以十六进制显示
pass
# decoded_text += f"\\x{byte_val:02X}"
return decoded_text
def clear(self):
"""清空解码状态"""
self.indicators = []
self.signal_bits = ""
self.decoded_messages = []
self.total_bits_received = 0
print("解码器状态已清空")
def get_stats(self) -> dict:
"""获取解码统计信息"""
return {
'prelude_bits': "".join(self.buffer_prelude),
"state": self.state,
'total_chars': sum(len(msg) for msg in self.text_cache),
'buffer_bits': len(self.signal_bits),
'mark_freq': self.mark_freq,
'space_freq': self.space_freq,
'bitrate': self.bitrate,
'threshold': self.threshold,
}
"""
实时AFSK解调器 - 基于Goertzel算法
"""
import numpy as np
from collections import deque
class TraceGoertzel:
"""实时Goertzel算法实现"""
def __init__(self, freq: float, n: int):
"""
初始化Goertzel算法
Args:
freq: 归一化频率 (目标频率/采样频率)
n: 窗口大小
"""
self.freq = freq
self.n = n
# 预计算系数 - 与参考代码一致
self.k = int(freq * n)
self.w = 2.0 * np.pi * freq
self.cw = np.cos(self.w)
self.sw = np.sin(self.w)
self.c = 2.0 * self.cw
# 初始化状态变量 - 使用deque存储最近两个值
self.zs = deque([0.0, 0.0], maxlen=2)
def reset(self):
"""重置算法状态"""
self.zs.clear()
self.zs.extend([0.0, 0.0])
def __call__(self, xs):
"""
处理一组采样点 - 与参考代码一致的接口
Args:
xs: 采样点序列
Returns:
计算出的振幅
"""
self.reset()
for x in xs:
z1, z2 = self.zs[-1], self.zs[-2] # Z[-1], Z[-2]
z0 = x + self.c * z1 - z2 # S[n] = x[n] + C * S[n-1] - S[n-2]
self.zs.append(float(z0)) # 更新序列
return self.amp
@property
def amp(self) -> float:
"""计算当前振幅 - 与参考代码一致"""
z1, z2 = self.zs[-1], self.zs[-2]
ip = self.cw * z1 - z2
qp = self.sw * z1
return np.sqrt(ip**2 + qp**2) / (self.n / 2.0)
class PairGoertzel:
"""双频Goertzel解调器"""
def __init__(self, f_sample: int, f_space: int, f_mark: int,
bit_rate: int, win_size: int):
"""
初始化双频解调器
Args:
f_sample: 采样频率
f_space: Space频率 (通常对应0)
f_mark: Mark频率 (通常对应1)
bit_rate: 比特率
win_size: Goertzel窗口大小
"""
assert f_sample % bit_rate == 0, "采样频率必须是比特率的整数倍"
self.Fs = f_sample
self.F0 = f_space
self.F1 = f_mark
self.bit_rate = bit_rate
self.n_per_bit = int(f_sample // bit_rate) # 每个比特的采样点数
# 计算归一化频率
f1 = f_mark / f_sample
f0 = f_space / f_sample
# 初始化Goertzel算法
self.g0 = TraceGoertzel(freq=f0, n=win_size)
self.g1 = TraceGoertzel(freq=f1, n=win_size)
# 输入缓冲区
self.in_buffer = deque(maxlen=win_size)
self.out_count = 0
print(f"PairGoertzel initialized: f0={f0:.6f}, f1={f1:.6f}, win_size={win_size}, n_per_bit={self.n_per_bit}")
def __call__(self, s: float):
"""
处理单个采样点 - 与参考代码一致的接口
Args:
s: 采样点值
Returns:
(amp0, amp1, p1_prob) - 空间频率振幅,标记频率振幅,标记概率
"""
self.in_buffer.append(s)
self.out_count += 1
amp0, amp1, p1_prob = 0, 0, None
# 每个比特周期输出一次结果
if self.out_count >= self.n_per_bit:
amp0 = self.g0(self.in_buffer) # 计算space频率振幅
amp1 = self.g1(self.in_buffer) # 计算mark频率振幅
p1_prob = amp1 / (amp0 + amp1 + 1e-8) # 计算mark概率
self.out_count = 0
return amp0, amp1, p1_prob
class RealTimeAFSKDecoder:
"""实时AFSK解码器 - 基于起始帧触发"""
def __init__(self, f_sample: int = 16000, mark_freq: int = 1800,
space_freq: int = 1500, bitrate: int = 100,
s_goertzel: int = 9, threshold: float = 0.5):
"""
初始化实时AFSK解码器
Args:
f_sample: 采样频率
mark_freq: Mark频率
space_freq: Space频率
bitrate: 比特率
s_goertzel: Goertzel窗口大小系数 (win_size = f_sample // mark_freq * s_goertzel)
threshold: 判决门限
"""
self.f_sample = f_sample
self.mark_freq = mark_freq
self.space_freq = space_freq
self.bitrate = bitrate
self.threshold = threshold
# 计算窗口大小 - 与参考代码一致
win_size = int(f_sample / mark_freq * s_goertzel)
# 初始化解调器
self.demodulator = PairGoertzel(f_sample, space_freq, mark_freq,
bitrate, win_size)
# 帧定义 - 与参考代码一致
self.start_bytes = b'\x01\x02'
self.end_bytes = b'\x03\x04'
self.start_bits = "".join(format(int(x), '08b') for x in self.start_bytes)
self.end_bits = "".join(format(int(x), '08b') for x in self.end_bytes)
# 状态机
self.state = "idle" # idle / entering
# 存储解调结果
self.buffer_prelude:deque = deque(maxlen=len(self.start_bits)) # 判断是否启动
self.indicators = [] # 存储概率序列
self.signal_bits = "" # 存储比特序列
self.text_cache = ""
# 解码结果
self.decoded_messages = []
self.total_bits_received = 0
print(f"Decoder initialized: win_size={win_size}")
print(f"Start frame: {self.start_bits} (from {self.start_bytes.hex()})")
print(f"End frame: {self.end_bits} (from {self.end_bytes.hex()})")
def process_audio(self, samples: np.array) -> str:
"""
处理音频数据并返回解码文本
Args:
audio_data: 音频字节数据 (16-bit PCM)
Returns:
新解码的文本
"""
new_text = ""
# 逐个处理采样点
for sample in samples:
amp0, amp1, p1_prob = self.demodulator(sample)
# 如果有概率输出,记录并判决
if p1_prob is not None:
bit = '1' if p1_prob > self.threshold else '0'
match self.state:
case "idle":
self.buffer_prelude.append(bit)
pass
case "entering":
self.buffer_prelude.append(bit)
self.signal_bits += bit
self.total_bits_received += 1
case _:
pass
self.indicators.append(p1_prob)
# 检查状态机
if self.state == "idle" and "".join(self.buffer_prelude) == self.start_bits:
self.state = "entering"
self.text_cache = ""
self.signal_bits = "" # 清空比特序列
self.buffer_prelude.clear()
elif self.state == "entering" and ("".join(self.buffer_prelude) == self.end_bits or len(self.signal_bits) >= 256):
self.state = "idle"
self.buffer_prelude.clear()
# 每收集一定数量的比特后尝试解码
if len(self.signal_bits) >= 8:
text = self._decode_bits_to_text(self.signal_bits)
if len(text) > len(self.text_cache):
new_text = text[len(self.text_cache) - len(text):]
self.text_cache = text
return new_text
def _decode_bits_to_text(self, bits: str) -> str:
"""
将比特串解码为文本
Args:
bits: 比特串
Returns:
解码出的文本
"""
if len(bits) < 8:
return ""
decoded_text = ""
byte_count = len(bits) // 8
for i in range(byte_count):
# 提取8位
byte_bits = bits[i*8:(i+1)*8]
# 位转字节
byte_val = int(byte_bits, 2)
# 尝试解码为ASCII字符
if 32 <= byte_val <= 126: # 可打印ASCII字符
decoded_text += chr(byte_val)
elif byte_val == 0: # NULL字符忽略
continue
else:
# 非可打印字符pass以十六进制显示
pass
# decoded_text += f"\\x{byte_val:02X}"
return decoded_text
def clear(self):
"""清空解码状态"""
self.indicators = []
self.signal_bits = ""
self.decoded_messages = []
self.total_bits_received = 0
print("解码器状态已清空")
def get_stats(self) -> dict:
"""获取解码统计信息"""
return {
'prelude_bits': "".join(self.buffer_prelude),
"state": self.state,
'total_chars': sum(len(msg) for msg in self.text_cache),
'buffer_bits': len(self.signal_bits),
'mark_freq': self.mark_freq,
'space_freq': self.space_freq,
'bitrate': self.bitrate,
'threshold': self.threshold,
}

View File

@@ -1,444 +1,444 @@
import sys
import numpy as np
import asyncio
import wave
from collections import deque
import qasync
import matplotlib
matplotlib.use('qtagg')
from matplotlib.backends.backend_qtagg import FigureCanvasQTAgg as FigureCanvas
from matplotlib.backends.backend_qtagg import NavigationToolbar2QT as NavigationToolbar # noqa: F401
from matplotlib.figure import Figure
from PyQt6.QtWidgets import (QApplication, QMainWindow, QVBoxLayout, QWidget,
QHBoxLayout, QLineEdit, QPushButton, QLabel, QTextEdit)
from PyQt6.QtCore import QTimer
# 导入解码器
from demod import RealTimeAFSKDecoder
class UDPServerProtocol(asyncio.DatagramProtocol):
"""UDP服务器协议类"""
def __init__(self, data_queue):
self.client_address = None
self.data_queue: deque = data_queue
def connection_made(self, transport):
self.transport = transport
def datagram_received(self, data, addr):
# 如果还没有客户端地址,记录第一个连接的客户端
if self.client_address is None:
self.client_address = addr
print(f"接受来自 {addr} 的连接")
# 只处理来自已记录客户端的数据
if addr == self.client_address:
# 将接收到的音频数据添加到队列
self.data_queue.extend(data)
else:
print(f"忽略来自未知地址 {addr} 的数据")
class MatplotlibWidget(QWidget):
def __init__(self, parent=None):
super().__init__(parent)
# 创建 Matplotlib 的 Figure 对象
self.figure = Figure()
# 创建 FigureCanvas 对象,它是 Figure 的 QWidget 容器
self.canvas = FigureCanvas(self.figure)
# 创建 Matplotlib 的导航工具栏
# self.toolbar = NavigationToolbar(self.canvas, self)
self.toolbar = None
# 创建布局
layout = QVBoxLayout()
layout.addWidget(self.toolbar)
layout.addWidget(self.canvas)
self.setLayout(layout)
# 初始化音频数据参数
self.freq = 16000 # 采样频率
self.time_window = 20 # 显示时间窗口
self.wave_data = deque(maxlen=self.freq * self.time_window * 2) # 缓冲队列, 用于分发计算/绘图
self.signals = deque(maxlen=self.freq * self.time_window) # 双端队列存储信号数据
# 创建包含两个子图的画布
self.ax1 = self.figure.add_subplot(2, 1, 1)
self.ax2 = self.figure.add_subplot(2, 1, 2)
# 时域子图
self.ax1.set_title('Real-time Audio Waveform')
self.ax1.set_xlabel('Sample Index')
self.ax1.set_ylabel('Amplitude')
self.line_time, = self.ax1.plot([], [])
self.ax1.grid(True, alpha=0.3)
# 频域子图
self.ax2.set_title('Real-time Frequency Spectrum')
self.ax2.set_xlabel('Frequency (Hz)')
self.ax2.set_ylabel('Magnitude')
self.line_freq, = self.ax2.plot([], [])
self.ax2.grid(True, alpha=0.3)
self.figure.tight_layout()
# 定时器用于更新图表
self.timer = QTimer(self)
self.timer.setInterval(100) # 100毫秒更新一次
self.timer.timeout.connect(self.update_plot)
# 初始化AFSK解码器
self.decoder = RealTimeAFSKDecoder(
f_sample=self.freq,
mark_freq=1800,
space_freq=1500,
bitrate=100,
s_goertzel=9,
threshold=0.5
)
# 解码结果回调
self.decode_callback = None
def start_plotting(self):
"""开始绘图"""
self.timer.start()
def stop_plotting(self):
"""停止绘图"""
self.timer.stop()
def update_plot(self):
"""更新绘图数据"""
if len(self.wave_data) >= 2:
# 进行实时解码
# 获取最新的音频数据进行解码
even = len(self.wave_data) // 2 * 2
print(f"length of wave_data: {len(self.wave_data)}")
drained = [self.wave_data.popleft() for _ in range(even)]
signal = np.frombuffer(bytearray(drained), dtype='<i2') / 32768
decoded_text_new = self.decoder.process_audio(signal) # 处理新增信号, 返回全量解码文本
if decoded_text_new and self.decode_callback:
self.decode_callback(decoded_text_new)
self.signals.extend(signal.tolist()) # 将波形数据添加到绘图数据
if len(self.signals) > 0:
# 只显示最近的一段数据,避免图表过于密集
signal = np.array(self.signals)
max_samples = min(len(signal), self.freq * self.time_window)
if len(signal) > max_samples:
signal = signal[-max_samples:]
# 更新时域图
x = np.arange(len(signal))
self.line_time.set_data(x, signal)
# 自动调整时域坐标轴范围
if len(signal) > 0:
self.ax1.set_xlim(0, len(signal))
y_min, y_max = np.min(signal), np.max(signal)
if y_min != y_max:
margin = (y_max - y_min) * 0.1
self.ax1.set_ylim(y_min - margin, y_max + margin)
else:
self.ax1.set_ylim(-1, 1)
# 计算频谱(短时离散傅立叶变换)
if len(signal) > 1:
# 计算FFT
fft_signal = np.abs(np.fft.fft(signal))
frequencies = np.fft.fftfreq(len(signal), 1/self.freq)
# 只取正频率部分
positive_freq_idx = frequencies >= 0
freq_positive = frequencies[positive_freq_idx]
fft_positive = fft_signal[positive_freq_idx]
# 更新频域图
self.line_freq.set_data(freq_positive, fft_positive)
# 自动调整频域坐标轴范围
if len(fft_positive) > 0:
# 限制频率显示范围到0-4000Hz避免过于密集
max_freq_show = min(4000, self.freq // 2)
freq_mask = freq_positive <= max_freq_show
if np.any(freq_mask):
self.ax2.set_xlim(0, max_freq_show)
fft_masked = fft_positive[freq_mask]
if len(fft_masked) > 0:
fft_max = np.max(fft_masked)
if fft_max > 0:
self.ax2.set_ylim(0, fft_max * 1.1)
else:
self.ax2.set_ylim(0, 1)
self.canvas.draw()
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle("Acoustic Check")
self.setGeometry(100, 100, 1000, 800)
# 主窗口部件
main_widget = QWidget()
self.setCentralWidget(main_widget)
# 主布局
main_layout = QVBoxLayout(main_widget)
# 绘图区域
self.matplotlib_widget = MatplotlibWidget()
main_layout.addWidget(self.matplotlib_widget)
# 控制面板
control_panel = QWidget()
control_layout = QHBoxLayout(control_panel)
# 监听地址和端口输入
control_layout.addWidget(QLabel("监听地址:"))
self.address_input = QLineEdit("0.0.0.0")
self.address_input.setFixedWidth(120)
control_layout.addWidget(self.address_input)
control_layout.addWidget(QLabel("端口:"))
self.port_input = QLineEdit("8000")
self.port_input.setFixedWidth(80)
control_layout.addWidget(self.port_input)
# 监听按钮
self.listen_button = QPushButton("开始监听")
self.listen_button.clicked.connect(self.toggle_listening)
control_layout.addWidget(self.listen_button)
# 状态标签
self.status_label = QLabel("状态: 未连接")
control_layout.addWidget(self.status_label)
# 数据统计标签
self.data_label = QLabel("接收数据: 0 bytes")
control_layout.addWidget(self.data_label)
# 保存按钮
self.save_button = QPushButton("保存音频")
self.save_button.clicked.connect(self.save_audio)
self.save_button.setEnabled(False)
control_layout.addWidget(self.save_button)
control_layout.addStretch() # 添加弹性空间
main_layout.addWidget(control_panel)
# 解码显示区域
decode_panel = QWidget()
decode_layout = QVBoxLayout(decode_panel)
# 解码标题
decode_title = QLabel("实时AFSK解码结果:")
decode_title.setStyleSheet("font-weight: bold; font-size: 14px;")
decode_layout.addWidget(decode_title)
# 解码文本显示
self.decode_text = QTextEdit()
self.decode_text.setMaximumHeight(150)
self.decode_text.setReadOnly(True)
self.decode_text.setStyleSheet("font-family: 'Courier New', monospace; font-size: 12px;")
decode_layout.addWidget(self.decode_text)
# 解码控制按钮
decode_control_layout = QHBoxLayout()
# 清空按钮
self.clear_decode_button = QPushButton("清空解码")
self.clear_decode_button.clicked.connect(self.clear_decode_text)
decode_control_layout.addWidget(self.clear_decode_button)
# 解码统计标签
self.decode_stats_label = QLabel("解码统计: 0 bits, 0 chars")
decode_control_layout.addWidget(self.decode_stats_label)
decode_control_layout.addStretch()
decode_layout.addLayout(decode_control_layout)
main_layout.addWidget(decode_panel)
# 设置解码回调
self.matplotlib_widget.decode_callback = self.on_decode_text
# UDP相关属性
self.udp_transport = None
self.is_listening = False
# 数据统计定时器
self.stats_timer = QTimer(self)
self.stats_timer.setInterval(1000) # 每秒更新一次统计
self.stats_timer.timeout.connect(self.update_stats)
def on_decode_text(self, new_text: str):
"""解码文本回调"""
if new_text:
# 添加新解码的文本
current_text = self.decode_text.toPlainText()
updated_text = current_text + new_text
# 限制文本长度保留最新的1000个字符
if len(updated_text) > 1000:
updated_text = updated_text[-1000:]
self.decode_text.setPlainText(updated_text)
# 滚动到底部
cursor = self.decode_text.textCursor()
cursor.movePosition(cursor.MoveOperation.End)
self.decode_text.setTextCursor(cursor)
def clear_decode_text(self):
"""清空解码文本"""
self.decode_text.clear()
if hasattr(self.matplotlib_widget, 'decoder'):
self.matplotlib_widget.decoder.clear()
self.decode_stats_label.setText("解码统计: 0 bits, 0 chars")
def update_decode_stats(self):
"""更新解码统计"""
if hasattr(self.matplotlib_widget, 'decoder'):
stats = self.matplotlib_widget.decoder.get_stats()
stats_text = (
f"前置: {stats['prelude_bits']} , 已接收{stats['total_chars']} chars, "
f"缓冲: {stats['buffer_bits']} bits, 状态: {stats['state']}"
)
self.decode_stats_label.setText(stats_text)
def toggle_listening(self):
"""切换监听状态"""
if not self.is_listening:
self.start_listening()
else:
self.stop_listening()
async def start_listening_async(self):
"""异步启动UDP监听"""
try:
address = self.address_input.text().strip()
port = int(self.port_input.text().strip())
loop = asyncio.get_running_loop()
self.udp_transport, protocol = await loop.create_datagram_endpoint(
lambda: UDPServerProtocol(self.matplotlib_widget.wave_data),
local_addr=(address, port)
)
self.status_label.setText(f"状态: 监听中 ({address}:{port})")
print(f"UDP服务器启动, 监听 {address}:{port}")
except Exception as e:
self.status_label.setText(f"状态: 启动失败 - {str(e)}")
print(f"UDP服务器启动失败: {e}")
self.is_listening = False
self.listen_button.setText("开始监听")
self.address_input.setEnabled(True)
self.port_input.setEnabled(True)
def start_listening(self):
"""开始监听"""
try:
int(self.port_input.text().strip()) # 验证端口号格式
except ValueError:
self.status_label.setText("状态: 端口号必须是数字")
return
self.is_listening = True
self.listen_button.setText("停止监听")
self.address_input.setEnabled(False)
self.port_input.setEnabled(False)
self.save_button.setEnabled(True)
# 清空数据队列
self.matplotlib_widget.wave_data.clear()
# 启动绘图和统计更新
self.matplotlib_widget.start_plotting()
self.stats_timer.start()
# 异步启动UDP服务器
loop = asyncio.get_event_loop()
loop.create_task(self.start_listening_async())
def stop_listening(self):
"""停止监听"""
self.is_listening = False
self.listen_button.setText("开始监听")
self.address_input.setEnabled(True)
self.port_input.setEnabled(True)
# 停止UDP服务器
if self.udp_transport:
self.udp_transport.close()
self.udp_transport = None
# 停止绘图和统计更新
self.matplotlib_widget.stop_plotting()
self.matplotlib_widget.wave_data.clear()
self.stats_timer.stop()
self.status_label.setText("状态: 已停止")
def update_stats(self):
"""更新数据统计"""
data_size = len(self.matplotlib_widget.signals)
self.data_label.setText(f"接收数据: {data_size} 采样")
# 更新解码统计
self.update_decode_stats()
def save_audio(self):
"""保存音频数据"""
if len(self.matplotlib_widget.signals) > 0:
try:
signal_data = np.array(self.matplotlib_widget.signals)
# 保存为WAV文件
with wave.open("received_audio.wav", "wb") as wf:
wf.setnchannels(1) # 单声道
wf.setsampwidth(2) # 采样宽度为2字节
wf.setframerate(self.matplotlib_widget.freq) # 设置采样率
wf.writeframes(signal_data.tobytes()) # 写入数据
self.status_label.setText("状态: 音频已保存为 received_audio.wav")
print("音频已保存为 received_audio.wav")
except Exception as e:
self.status_label.setText(f"状态: 保存失败 - {str(e)}")
print(f"保存音频失败: {e}")
else:
self.status_label.setText("状态: 没有足够的数据可保存")
async def main():
"""异步主函数"""
app = QApplication(sys.argv)
# 设置异步事件循环
loop = qasync.QEventLoop(app)
asyncio.set_event_loop(loop)
window = MainWindow()
window.show()
try:
with loop:
await loop.run_forever()
except KeyboardInterrupt:
print("程序被用户中断")
finally:
# 确保清理资源
if window.udp_transport:
import sys
import numpy as np
import asyncio
import wave
from collections import deque
import qasync
import matplotlib
matplotlib.use('qtagg')
from matplotlib.backends.backend_qtagg import FigureCanvasQTAgg as FigureCanvas
from matplotlib.backends.backend_qtagg import NavigationToolbar2QT as NavigationToolbar # noqa: F401
from matplotlib.figure import Figure
from PyQt6.QtWidgets import (QApplication, QMainWindow, QVBoxLayout, QWidget,
QHBoxLayout, QLineEdit, QPushButton, QLabel, QTextEdit)
from PyQt6.QtCore import QTimer
# 导入解码器
from demod import RealTimeAFSKDecoder
class UDPServerProtocol(asyncio.DatagramProtocol):
"""UDP服务器协议类"""
def __init__(self, data_queue):
self.client_address = None
self.data_queue: deque = data_queue
def connection_made(self, transport):
self.transport = transport
def datagram_received(self, data, addr):
# 如果还没有客户端地址,记录第一个连接的客户端
if self.client_address is None:
self.client_address = addr
print(f"接受来自 {addr} 的连接")
# 只处理来自已记录客户端的数据
if addr == self.client_address:
# 将接收到的音频数据添加到队列
self.data_queue.extend(data)
else:
print(f"忽略来自未知地址 {addr} 的数据")
class MatplotlibWidget(QWidget):
def __init__(self, parent=None):
super().__init__(parent)
# 创建 Matplotlib 的 Figure 对象
self.figure = Figure()
# 创建 FigureCanvas 对象,它是 Figure 的 QWidget 容器
self.canvas = FigureCanvas(self.figure)
# 创建 Matplotlib 的导航工具栏
# self.toolbar = NavigationToolbar(self.canvas, self)
self.toolbar = None
# 创建布局
layout = QVBoxLayout()
layout.addWidget(self.toolbar)
layout.addWidget(self.canvas)
self.setLayout(layout)
# 初始化音频数据参数
self.freq = 16000 # 采样频率
self.time_window = 20 # 显示时间窗口
self.wave_data = deque(maxlen=self.freq * self.time_window * 2) # 缓冲队列, 用于分发计算/绘图
self.signals = deque(maxlen=self.freq * self.time_window) # 双端队列存储信号数据
# 创建包含两个子图的画布
self.ax1 = self.figure.add_subplot(2, 1, 1)
self.ax2 = self.figure.add_subplot(2, 1, 2)
# 时域子图
self.ax1.set_title('Real-time Audio Waveform')
self.ax1.set_xlabel('Sample Index')
self.ax1.set_ylabel('Amplitude')
self.line_time, = self.ax1.plot([], [])
self.ax1.grid(True, alpha=0.3)
# 频域子图
self.ax2.set_title('Real-time Frequency Spectrum')
self.ax2.set_xlabel('Frequency (Hz)')
self.ax2.set_ylabel('Magnitude')
self.line_freq, = self.ax2.plot([], [])
self.ax2.grid(True, alpha=0.3)
self.figure.tight_layout()
# 定时器用于更新图表
self.timer = QTimer(self)
self.timer.setInterval(100) # 100毫秒更新一次
self.timer.timeout.connect(self.update_plot)
# 初始化AFSK解码器
self.decoder = RealTimeAFSKDecoder(
f_sample=self.freq,
mark_freq=1800,
space_freq=1500,
bitrate=100,
s_goertzel=9,
threshold=0.5
)
# 解码结果回调
self.decode_callback = None
def start_plotting(self):
"""开始绘图"""
self.timer.start()
def stop_plotting(self):
"""停止绘图"""
self.timer.stop()
def update_plot(self):
"""更新绘图数据"""
if len(self.wave_data) >= 2:
# 进行实时解码
# 获取最新的音频数据进行解码
even = len(self.wave_data) // 2 * 2
print(f"length of wave_data: {len(self.wave_data)}")
drained = [self.wave_data.popleft() for _ in range(even)]
signal = np.frombuffer(bytearray(drained), dtype='<i2') / 32768
decoded_text_new = self.decoder.process_audio(signal) # 处理新增信号, 返回全量解码文本
if decoded_text_new and self.decode_callback:
self.decode_callback(decoded_text_new)
self.signals.extend(signal.tolist()) # 将波形数据添加到绘图数据
if len(self.signals) > 0:
# 只显示最近的一段数据,避免图表过于密集
signal = np.array(self.signals)
max_samples = min(len(signal), self.freq * self.time_window)
if len(signal) > max_samples:
signal = signal[-max_samples:]
# 更新时域图
x = np.arange(len(signal))
self.line_time.set_data(x, signal)
# 自动调整时域坐标轴范围
if len(signal) > 0:
self.ax1.set_xlim(0, len(signal))
y_min, y_max = np.min(signal), np.max(signal)
if y_min != y_max:
margin = (y_max - y_min) * 0.1
self.ax1.set_ylim(y_min - margin, y_max + margin)
else:
self.ax1.set_ylim(-1, 1)
# 计算频谱(短时离散傅立叶变换)
if len(signal) > 1:
# 计算FFT
fft_signal = np.abs(np.fft.fft(signal))
frequencies = np.fft.fftfreq(len(signal), 1/self.freq)
# 只取正频率部分
positive_freq_idx = frequencies >= 0
freq_positive = frequencies[positive_freq_idx]
fft_positive = fft_signal[positive_freq_idx]
# 更新频域图
self.line_freq.set_data(freq_positive, fft_positive)
# 自动调整频域坐标轴范围
if len(fft_positive) > 0:
# 限制频率显示范围到0-4000Hz避免过于密集
max_freq_show = min(4000, self.freq // 2)
freq_mask = freq_positive <= max_freq_show
if np.any(freq_mask):
self.ax2.set_xlim(0, max_freq_show)
fft_masked = fft_positive[freq_mask]
if len(fft_masked) > 0:
fft_max = np.max(fft_masked)
if fft_max > 0:
self.ax2.set_ylim(0, fft_max * 1.1)
else:
self.ax2.set_ylim(0, 1)
self.canvas.draw()
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle("Acoustic Check")
self.setGeometry(100, 100, 1000, 800)
# 主窗口部件
main_widget = QWidget()
self.setCentralWidget(main_widget)
# 主布局
main_layout = QVBoxLayout(main_widget)
# 绘图区域
self.matplotlib_widget = MatplotlibWidget()
main_layout.addWidget(self.matplotlib_widget)
# 控制面板
control_panel = QWidget()
control_layout = QHBoxLayout(control_panel)
# 监听地址和端口输入
control_layout.addWidget(QLabel("监听地址:"))
self.address_input = QLineEdit("0.0.0.0")
self.address_input.setFixedWidth(120)
control_layout.addWidget(self.address_input)
control_layout.addWidget(QLabel("端口:"))
self.port_input = QLineEdit("8000")
self.port_input.setFixedWidth(80)
control_layout.addWidget(self.port_input)
# 监听按钮
self.listen_button = QPushButton("开始监听")
self.listen_button.clicked.connect(self.toggle_listening)
control_layout.addWidget(self.listen_button)
# 状态标签
self.status_label = QLabel("状态: 未连接")
control_layout.addWidget(self.status_label)
# 数据统计标签
self.data_label = QLabel("接收数据: 0 bytes")
control_layout.addWidget(self.data_label)
# 保存按钮
self.save_button = QPushButton("保存音频")
self.save_button.clicked.connect(self.save_audio)
self.save_button.setEnabled(False)
control_layout.addWidget(self.save_button)
control_layout.addStretch() # 添加弹性空间
main_layout.addWidget(control_panel)
# 解码显示区域
decode_panel = QWidget()
decode_layout = QVBoxLayout(decode_panel)
# 解码标题
decode_title = QLabel("实时AFSK解码结果:")
decode_title.setStyleSheet("font-weight: bold; font-size: 14px;")
decode_layout.addWidget(decode_title)
# 解码文本显示
self.decode_text = QTextEdit()
self.decode_text.setMaximumHeight(150)
self.decode_text.setReadOnly(True)
self.decode_text.setStyleSheet("font-family: 'Courier New', monospace; font-size: 12px;")
decode_layout.addWidget(self.decode_text)
# 解码控制按钮
decode_control_layout = QHBoxLayout()
# 清空按钮
self.clear_decode_button = QPushButton("清空解码")
self.clear_decode_button.clicked.connect(self.clear_decode_text)
decode_control_layout.addWidget(self.clear_decode_button)
# 解码统计标签
self.decode_stats_label = QLabel("解码统计: 0 bits, 0 chars")
decode_control_layout.addWidget(self.decode_stats_label)
decode_control_layout.addStretch()
decode_layout.addLayout(decode_control_layout)
main_layout.addWidget(decode_panel)
# 设置解码回调
self.matplotlib_widget.decode_callback = self.on_decode_text
# UDP相关属性
self.udp_transport = None
self.is_listening = False
# 数据统计定时器
self.stats_timer = QTimer(self)
self.stats_timer.setInterval(1000) # 每秒更新一次统计
self.stats_timer.timeout.connect(self.update_stats)
def on_decode_text(self, new_text: str):
"""解码文本回调"""
if new_text:
# 添加新解码的文本
current_text = self.decode_text.toPlainText()
updated_text = current_text + new_text
# 限制文本长度保留最新的1000个字符
if len(updated_text) > 1000:
updated_text = updated_text[-1000:]
self.decode_text.setPlainText(updated_text)
# 滚动到底部
cursor = self.decode_text.textCursor()
cursor.movePosition(cursor.MoveOperation.End)
self.decode_text.setTextCursor(cursor)
def clear_decode_text(self):
"""清空解码文本"""
self.decode_text.clear()
if hasattr(self.matplotlib_widget, 'decoder'):
self.matplotlib_widget.decoder.clear()
self.decode_stats_label.setText("解码统计: 0 bits, 0 chars")
def update_decode_stats(self):
"""更新解码统计"""
if hasattr(self.matplotlib_widget, 'decoder'):
stats = self.matplotlib_widget.decoder.get_stats()
stats_text = (
f"前置: {stats['prelude_bits']} , 已接收{stats['total_chars']} chars, "
f"缓冲: {stats['buffer_bits']} bits, 状态: {stats['state']}"
)
self.decode_stats_label.setText(stats_text)
def toggle_listening(self):
"""切换监听状态"""
if not self.is_listening:
self.start_listening()
else:
self.stop_listening()
async def start_listening_async(self):
"""异步启动UDP监听"""
try:
address = self.address_input.text().strip()
port = int(self.port_input.text().strip())
loop = asyncio.get_running_loop()
self.udp_transport, protocol = await loop.create_datagram_endpoint(
lambda: UDPServerProtocol(self.matplotlib_widget.wave_data),
local_addr=(address, port)
)
self.status_label.setText(f"状态: 监听中 ({address}:{port})")
print(f"UDP服务器启动, 监听 {address}:{port}")
except Exception as e:
self.status_label.setText(f"状态: 启动失败 - {str(e)}")
print(f"UDP服务器启动失败: {e}")
self.is_listening = False
self.listen_button.setText("开始监听")
self.address_input.setEnabled(True)
self.port_input.setEnabled(True)
def start_listening(self):
"""开始监听"""
try:
int(self.port_input.text().strip()) # 验证端口号格式
except ValueError:
self.status_label.setText("状态: 端口号必须是数字")
return
self.is_listening = True
self.listen_button.setText("停止监听")
self.address_input.setEnabled(False)
self.port_input.setEnabled(False)
self.save_button.setEnabled(True)
# 清空数据队列
self.matplotlib_widget.wave_data.clear()
# 启动绘图和统计更新
self.matplotlib_widget.start_plotting()
self.stats_timer.start()
# 异步启动UDP服务器
loop = asyncio.get_event_loop()
loop.create_task(self.start_listening_async())
def stop_listening(self):
"""停止监听"""
self.is_listening = False
self.listen_button.setText("开始监听")
self.address_input.setEnabled(True)
self.port_input.setEnabled(True)
# 停止UDP服务器
if self.udp_transport:
self.udp_transport.close()
self.udp_transport = None
# 停止绘图和统计更新
self.matplotlib_widget.stop_plotting()
self.matplotlib_widget.wave_data.clear()
self.stats_timer.stop()
self.status_label.setText("状态: 已停止")
def update_stats(self):
"""更新数据统计"""
data_size = len(self.matplotlib_widget.signals)
self.data_label.setText(f"接收数据: {data_size} 采样")
# 更新解码统计
self.update_decode_stats()
def save_audio(self):
"""保存音频数据"""
if len(self.matplotlib_widget.signals) > 0:
try:
signal_data = np.array(self.matplotlib_widget.signals)
# 保存为WAV文件
with wave.open("received_audio.wav", "wb") as wf:
wf.setnchannels(1) # 单声道
wf.setsampwidth(2) # 采样宽度为2字节
wf.setframerate(self.matplotlib_widget.freq) # 设置采样率
wf.writeframes(signal_data.tobytes()) # 写入数据
self.status_label.setText("状态: 音频已保存为 received_audio.wav")
print("音频已保存为 received_audio.wav")
except Exception as e:
self.status_label.setText(f"状态: 保存失败 - {str(e)}")
print(f"保存音频失败: {e}")
else:
self.status_label.setText("状态: 没有足够的数据可保存")
async def main():
"""异步主函数"""
app = QApplication(sys.argv)
# 设置异步事件循环
loop = qasync.QEventLoop(app)
asyncio.set_event_loop(loop)
window = MainWindow()
window.show()
try:
with loop:
await loop.run_forever()
except KeyboardInterrupt:
print("程序被用户中断")
finally:
# 确保清理资源
if window.udp_transport:
window.udp_transport.close()

View File

@@ -1,18 +1,18 @@
#!/usr/bin/env python3
"""
音频实时监听与绘图系统主程序
基于Qt GUI + Matplotlib + UDP接收 + AFSK解码字符串
"""
import sys
import asyncio
from graphic import main
if __name__ == '__main__':
try:
asyncio.run(main())
except KeyboardInterrupt:
print("程序被用户中断")
except Exception as e:
print(f"程序执行出错: {e}")
sys.exit(1)
#!/usr/bin/env python3
"""
音频实时监听与绘图系统主程序
基于Qt GUI + Matplotlib + UDP接收 + AFSK解码字符串
"""
import sys
import asyncio
from graphic import main
if __name__ == '__main__':
try:
asyncio.run(main())
except KeyboardInterrupt:
print("程序被用户中断")
except Exception as e:
print(f"程序执行出错: {e}")
sys.exit(1)

View File

@@ -1,23 +1,23 @@
# 声波测试
该gui用于测试接受小智设备通过`udp`回传的`pcm`转时域/频域, 可以保存窗口长度的声音, 用于判断噪音频率分布和测试声波传输ascii的准确度,
固件测试需要打开`USE_AUDIO_DEBUGGER`, 并设置好`AUDIO_DEBUG_UDP_SERVER`是本机地址.
声波`demod`可以通过`sonic_wifi_config.html`或者上传至`PinMe`的[小智声波配网](https://iqf7jnhi.pinit.eth.limo)来输出声波测试
# 声波解码测试记录
> `✓`代表在I2S DIN接收原始PCM信号时就能成功解码, `△`代表需要降噪或额外操作可稳定解码, `X`代表降噪后效果也不好(可能能解部分但非常不稳定)。
> 个别ADC需要I2C配置阶段做更精细的降噪调整, 由于设备不通用暂只按照boards内提供的config测试
| 设备 | ADC | MIC | 效果 | 备注 |
| ---- | ---- | --- | --- | ---- |
| bread-compact | INMP441 | 集成MEMEMIC | ✓ |
| atk-dnesp32s3-box | ES8311 | | ✓ |
| magiclick-2p5 | ES8311 | | ✓ |
| lichuang-dev | ES7210 | | △ | 测试时需要关掉INPUT_REFERENCE
| kevin-box-2 | ES7210 | | △ | 测试时需要关掉INPUT_REFERENCE
| m5stack-core-s3 | ES7210 | | △ | 测试时需要关掉INPUT_REFERENCE
| xmini-c3 | ES8311 | | △ | 需降噪
| atoms3r-echo-base | ES8311 | | △ | 需降噪
| atk-dnesp32s3-box0 | ES8311 | | X | 能接收且解码, 但是丢包率很高
# 声波测试
该gui用于测试接受小智设备通过`udp`回传的`pcm`转时域/频域, 可以保存窗口长度的声音, 用于判断噪音频率分布和测试声波传输ascii的准确度,
固件测试需要打开`USE_AUDIO_DEBUGGER`, 并设置好`AUDIO_DEBUG_UDP_SERVER`是本机地址.
声波`demod`可以通过`sonic_wifi_config.html`或者上传至`PinMe`的[小智声波配网](https://iqf7jnhi.pinit.eth.limo)来输出声波测试
# 声波解码测试记录
> `✓`代表在I2S DIN接收原始PCM信号时就能成功解码, `△`代表需要降噪或额外操作可稳定解码, `X`代表降噪后效果也不好(可能能解部分但非常不稳定)。
> 个别ADC需要I2C配置阶段做更精细的降噪调整, 由于设备不通用暂只按照boards内提供的config测试
| 设备 | ADC | MIC | 效果 | 备注 |
| ---- | ---- | --- | --- | ---- |
| bread-compact | INMP441 | 集成MEMEMIC | ✓ |
| atk-dnesp32s3-box | ES8311 | | ✓ |
| magiclick-2p5 | ES8311 | | ✓ |
| lichuang-dev | ES7210 | | △ | 测试时需要关掉INPUT_REFERENCE
| kevin-box-2 | ES7210 | | △ | 测试时需要关掉INPUT_REFERENCE
| m5stack-core-s3 | ES7210 | | △ | 测试时需要关掉INPUT_REFERENCE
| xmini-c3 | ES8311 | | △ | 需降噪
| atoms3r-echo-base | ES8311 | | △ | 需降噪
| atk-dnesp32s3-box0 | ES8311 | | X | 能接收且解码, 但是丢包率很高
| movecall-moji-esp32s3 | ES8311 | | X | 能接收且解码, 但是丢包率很高

View File

@@ -1,4 +1,4 @@
matplotlib==3.10.5
numpy==2.3.2
PyQt6==6.9.1
qasync==0.27.1
matplotlib==3.10.5
numpy==2.3.2
PyQt6==6.9.1
qasync==0.27.1

View File

@@ -1,54 +1,54 @@
import socket
import wave
import argparse
'''
Create a UDP socket and bind it to the server's IP:8000.
Listen for incoming messages and print them to the console.
Save the audio to a WAV file.
'''
def main(samplerate, channels):
# Create a UDP socket
server_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
server_socket.bind(('0.0.0.0', 8000))
# Create WAV file with parameters
filename = f"{samplerate}_{channels}.wav"
wav_file = wave.open(filename, "wb")
wav_file.setnchannels(channels) # channels parameter
wav_file.setsampwidth(2) # 2 bytes per sample (16-bit)
wav_file.setframerate(samplerate) # samplerate parameter
print(f"Start saving audio from 0.0.0.0:8000 to {filename}...")
try:
while True:
# Receive a message from the client
message, address = server_socket.recvfrom(8000)
# Write PCM data to WAV file
wav_file.writeframes(message)
# Print length of the message
print(f"Received {len(message)} bytes from {address}")
except KeyboardInterrupt:
print("\nStopping recording...")
finally:
# Close files and socket
wav_file.close()
server_socket.close()
print(f"WAV file '{filename}' saved successfully")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='UDP音频数据接收器保存为WAV文件')
parser.add_argument('--samplerate', '-s', type=int, default=16000,
help='采样率 (默认: 16000)')
parser.add_argument('--channels', '-c', type=int, default=2,
help='声道数 (默认: 2)')
args = parser.parse_args()
main(args.samplerate, args.channels)
import socket
import wave
import argparse
'''
Create a UDP socket and bind it to the server's IP:8000.
Listen for incoming messages and print them to the console.
Save the audio to a WAV file.
'''
def main(samplerate, channels):
# Create a UDP socket
server_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
server_socket.bind(('0.0.0.0', 8000))
# Create WAV file with parameters
filename = f"{samplerate}_{channels}.wav"
wav_file = wave.open(filename, "wb")
wav_file.setnchannels(channels) # channels parameter
wav_file.setsampwidth(2) # 2 bytes per sample (16-bit)
wav_file.setframerate(samplerate) # samplerate parameter
print(f"Start saving audio from 0.0.0.0:8000 to {filename}...")
try:
while True:
# Receive a message from the client
message, address = server_socket.recvfrom(8000)
# Write PCM data to WAV file
wav_file.writeframes(message)
# Print length of the message
print(f"Received {len(message)} bytes from {address}")
except KeyboardInterrupt:
print("\nStopping recording...")
finally:
# Close files and socket
wav_file.close()
server_socket.close()
print(f"WAV file '{filename}' saved successfully")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='UDP音频数据接收器保存为WAV文件')
parser.add_argument('--samplerate', '-s', type=int, default=16000,
help='采样率 (默认: 16000)')
parser.add_argument('--channels', '-c', type=int, default=2,
help='声道数 (默认: 2)')
args = parser.parse_args()
main(args.samplerate, args.channels)

View File

@@ -0,0 +1,882 @@
#!/usr/bin/env python3
"""
Build default assets based on configuration
This script reads configuration from sdkconfig and builds the appropriate assets.bin
for the current board configuration.
Usage:
./build_default_assets.py --sdkconfig <path> --builtin_text_font <font_name> \
--default_emoji_collection <collection_name> --output <output_path>
"""
import argparse
import io
import os
import shutil
import sys
import json
import struct
from datetime import datetime
# =============================================================================
# Pack model functions (from pack_model.py)
# =============================================================================
def struct_pack_string(string, max_len=None):
"""
pack string to binary data.
if max_len is None, max_len = len(string) + 1
else len(string) < max_len, the left will be padded by struct.pack('x')
"""
if max_len == None :
max_len = len(string)
else:
assert len(string) <= max_len
left_num = max_len - len(string)
out_bytes = None
for char in string:
if out_bytes == None:
out_bytes = struct.pack('b', ord(char))
else:
out_bytes += struct.pack('b', ord(char))
for i in range(left_num):
out_bytes += struct.pack('x')
return out_bytes
def read_data(filename):
"""Read binary data, like index and mndata"""
data = None
with open(filename, "rb") as f:
data = f.read()
return data
def pack_models(model_path, out_file="srmodels.bin"):
"""
Pack all models into one binary file by the following format:
{
model_num: int
model1_info: model_info_t
model2_info: model_info_t
...
model1_index,model1_data,model1_MODEL_INFO
model1_index,model1_data,model1_MODEL_INFO
...
}model_pack_t
{
model_name: char[32]
file_number: int
file1_name: char[32]
file1_start: int
file1_len: int
file2_name: char[32]
file2_start: int // data_len = info_start - data_start
file2_len: int
...
}model_info_t
"""
models = {}
file_num = 0
model_num = 0
for root, dirs, _ in os.walk(model_path):
for model_name in dirs:
models[model_name] = {}
model_dir = os.path.join(root, model_name)
model_num += 1
for _, _, files in os.walk(model_dir):
for file_name in files:
file_num += 1
file_path = os.path.join(model_dir, file_name)
models[model_name][file_name] = read_data(file_path)
model_num = len(models)
header_len = 4 + model_num*(32+4) + file_num*(32+4+4)
out_bin = struct.pack('I', model_num) # model number
data_bin = None
for key in models:
model_bin = struct_pack_string(key, 32) # + model name
model_bin += struct.pack('I', len(models[key])) # + file number in this model
for file_name in models[key]:
model_bin += struct_pack_string(file_name, 32) # + file name
if data_bin == None:
model_bin += struct.pack('I', header_len)
data_bin = models[key][file_name]
model_bin += struct.pack('I', len(models[key][file_name]))
else:
model_bin += struct.pack('I', header_len+len(data_bin))
data_bin += models[key][file_name]
model_bin += struct.pack('I', len(models[key][file_name]))
out_bin += model_bin
assert len(out_bin) == header_len
if data_bin != None:
out_bin += data_bin
out_file = os.path.join(model_path, out_file)
with open(out_file, "wb") as f:
f.write(out_bin)
# =============================================================================
# Build assets functions (from build.py)
# =============================================================================
def ensure_dir(directory):
"""Ensure directory exists, create if not"""
os.makedirs(directory, exist_ok=True)
def copy_file(src, dst):
"""Copy file"""
if os.path.exists(src):
shutil.copy2(src, dst)
print(f"Copied: {src} -> {dst}")
return True
else:
print(f"Warning: Source file does not exist: {src}")
return False
def copy_directory(src, dst):
"""Copy directory"""
if os.path.exists(src):
shutil.copytree(src, dst, dirs_exist_ok=True)
print(f"Copied directory: {src} -> {dst}")
return True
else:
print(f"Warning: Source directory does not exist: {src}")
return False
def process_sr_models(wakenet_model_dirs, multinet_model_dirs, build_dir, assets_dir):
"""Process SR models (wakenet and multinet) and generate srmodels.bin"""
if not wakenet_model_dirs and not multinet_model_dirs:
return None
# Create SR models build directory
sr_models_build_dir = os.path.join(build_dir, "srmodels")
if os.path.exists(sr_models_build_dir):
shutil.rmtree(sr_models_build_dir)
os.makedirs(sr_models_build_dir)
models_processed = 0
# Copy wakenet models if available
if wakenet_model_dirs:
for wakenet_model_dir in wakenet_model_dirs:
wakenet_name = os.path.basename(wakenet_model_dir)
wakenet_dst = os.path.join(sr_models_build_dir, wakenet_name)
if copy_directory(wakenet_model_dir, wakenet_dst):
models_processed += 1
print(f"Added wakenet model: {wakenet_name}")
# Copy multinet models if available
if multinet_model_dirs:
for multinet_model_dir in multinet_model_dirs:
multinet_name = os.path.basename(multinet_model_dir)
multinet_dst = os.path.join(sr_models_build_dir, multinet_name)
if copy_directory(multinet_model_dir, multinet_dst):
models_processed += 1
print(f"Added multinet model: {multinet_name}")
if models_processed == 0:
print("Warning: No SR models were successfully processed")
return None
# Use pack_models function to generate srmodels.bin
srmodels_output = os.path.join(sr_models_build_dir, "srmodels.bin")
try:
pack_models(sr_models_build_dir, "srmodels.bin")
print(f"Generated: {srmodels_output}")
# Copy srmodels.bin to assets directory
copy_file(srmodels_output, os.path.join(assets_dir, "srmodels.bin"))
return "srmodels.bin"
except Exception as e:
print(f"Error: Failed to generate srmodels.bin: {e}")
return None
def process_text_font(text_font_file, assets_dir):
"""Process text_font parameter"""
if not text_font_file:
return None
# Copy input file to build/assets directory
font_filename = os.path.basename(text_font_file)
font_dst = os.path.join(assets_dir, font_filename)
if copy_file(text_font_file, font_dst):
return font_filename
return None
def process_emoji_collection(emoji_collection_dir, assets_dir):
"""Process emoji_collection parameter"""
if not emoji_collection_dir:
return []
emoji_list = []
# Copy each image from input directory to build/assets directory
for root, dirs, files in os.walk(emoji_collection_dir):
for file in files:
if file.lower().endswith(('.png', '.gif')):
# Copy file
src_file = os.path.join(root, file)
dst_file = os.path.join(assets_dir, file)
if copy_file(src_file, dst_file):
# Get filename without extension
filename_without_ext = os.path.splitext(file)[0]
# Add to emoji list
emoji_list.append({
"name": filename_without_ext,
"file": file
})
return emoji_list
def process_extra_files(extra_files_dir, assets_dir):
"""Process default_assets_extra_files parameter"""
if not extra_files_dir:
return []
if not os.path.exists(extra_files_dir):
print(f"Warning: Extra files directory not found: {extra_files_dir}")
return []
extra_files_list = []
# Copy each file from input directory to build/assets directory
for root, dirs, files in os.walk(extra_files_dir):
for file in files:
# Skip hidden files and directories
if file.startswith('.'):
continue
# Copy file
src_file = os.path.join(root, file)
dst_file = os.path.join(assets_dir, file)
if copy_file(src_file, dst_file):
extra_files_list.append(file)
if extra_files_list:
print(f"Processed {len(extra_files_list)} extra files from: {extra_files_dir}")
return extra_files_list
def generate_index_json(assets_dir, srmodels, text_font, emoji_collection, extra_files=None, multinet_model_info=None):
"""Generate index.json file"""
index_data = {
"version": 1
}
if srmodels:
index_data["srmodels"] = srmodels
if text_font:
index_data["text_font"] = text_font
if emoji_collection:
index_data["emoji_collection"] = emoji_collection
if extra_files:
index_data["extra_files"] = extra_files
if multinet_model_info:
index_data["multinet_model"] = multinet_model_info
# Write index.json
index_path = os.path.join(assets_dir, "index.json")
with open(index_path, 'w', encoding='utf-8') as f:
json.dump(index_data, f, indent=4, ensure_ascii=False)
print(f"Generated: {index_path}")
def generate_config_json(build_dir, assets_dir):
"""Generate config.json file"""
config_data = {
"include_path": os.path.join(build_dir, "include"),
"assets_path": assets_dir,
"image_file": os.path.join(build_dir, "output", "assets.bin"),
"lvgl_ver": "9.3.0",
"assets_size": "0x400000",
"support_format": ".png, .gif, .jpg, .bin, .json",
"name_length": "32",
"split_height": "0",
"support_qoi": False,
"support_spng": False,
"support_sjpg": False,
"support_sqoi": False,
"support_raw": False,
"support_raw_dither": False,
"support_raw_bgr": False
}
# Write config.json
config_path = os.path.join(build_dir, "config.json")
with open(config_path, 'w', encoding='utf-8') as f:
json.dump(config_data, f, indent=4, ensure_ascii=False)
print(f"Generated: {config_path}")
return config_path
# =============================================================================
# Simplified SPIFFS assets generation (from spiffs_assets_gen.py)
# =============================================================================
def compute_checksum(data):
checksum = sum(data) & 0xFFFF
return checksum
def sort_key(filename):
basename, extension = os.path.splitext(filename)
return extension, basename
def pack_assets_simple(target_path, include_path, out_file, assets_path, max_name_len=32):
"""
Simplified version of pack_assets that handles basic file packing
"""
merged_data = bytearray()
file_info_list = []
skip_files = ['config.json']
# Ensure output directory exists
os.makedirs(os.path.dirname(out_file), exist_ok=True)
os.makedirs(include_path, exist_ok=True)
file_list = sorted(os.listdir(target_path), key=sort_key)
for filename in file_list:
if filename in skip_files:
continue
file_path = os.path.join(target_path, filename)
if not os.path.isfile(file_path):
continue
file_name = os.path.basename(file_path)
file_size = os.path.getsize(file_path)
file_info_list.append((file_name, len(merged_data), file_size, 0, 0))
# Add 0x5A5A prefix to merged_data
merged_data.extend(b'\x5A' * 2)
with open(file_path, 'rb') as bin_file:
bin_data = bin_file.read()
merged_data.extend(bin_data)
total_files = len(file_info_list)
mmap_table = bytearray()
for file_name, offset, file_size, width, height in file_info_list:
if len(file_name) > max_name_len:
print(f'Warning: "{file_name}" exceeds {max_name_len} bytes and will be truncated.')
fixed_name = file_name.ljust(max_name_len, '\0')[:max_name_len]
mmap_table.extend(fixed_name.encode('utf-8'))
mmap_table.extend(file_size.to_bytes(4, byteorder='little'))
mmap_table.extend(offset.to_bytes(4, byteorder='little'))
mmap_table.extend(width.to_bytes(2, byteorder='little'))
mmap_table.extend(height.to_bytes(2, byteorder='little'))
combined_data = mmap_table + merged_data
combined_checksum = compute_checksum(combined_data)
combined_data_length = len(combined_data).to_bytes(4, byteorder='little')
header_data = total_files.to_bytes(4, byteorder='little') + combined_checksum.to_bytes(4, byteorder='little')
final_data = header_data + combined_data_length + combined_data
with open(out_file, 'wb') as output_bin:
output_bin.write(final_data)
# Generate header file
current_year = datetime.now().year
asset_name = os.path.basename(assets_path)
header_file_path = os.path.join(include_path, f'mmap_generate_{asset_name}.h')
with open(header_file_path, 'w') as output_header:
output_header.write('/*\n')
output_header.write(' * SPDX-FileCopyrightText: 2022-{} Espressif Systems (Shanghai) CO LTD\n'.format(current_year))
output_header.write(' *\n')
output_header.write(' * SPDX-License-Identifier: Apache-2.0\n')
output_header.write(' */\n\n')
output_header.write('/**\n')
output_header.write(' * @file\n')
output_header.write(" * @brief This file was generated by esp_mmap_assets, don't modify it\n")
output_header.write(' */\n\n')
output_header.write('#pragma once\n\n')
output_header.write("#include \"esp_mmap_assets.h\"\n\n")
output_header.write(f'#define MMAP_{asset_name.upper()}_FILES {total_files}\n')
output_header.write(f'#define MMAP_{asset_name.upper()}_CHECKSUM 0x{combined_checksum:04X}\n\n')
output_header.write(f'enum MMAP_{asset_name.upper()}_LISTS {{\n')
for i, (file_name, _, _, _, _) in enumerate(file_info_list):
enum_name = file_name.replace('.', '_')
output_header.write(f' MMAP_{asset_name.upper()}_{enum_name.upper()} = {i}, /*!< {file_name} */\n')
output_header.write('};\n')
print(f'All files have been merged into {os.path.basename(out_file)}')
# =============================================================================
# Configuration and main functions
# =============================================================================
def read_wakenet_from_sdkconfig(sdkconfig_path):
"""
Read wakenet models from sdkconfig (based on movemodel.py logic)
Returns a list of wakenet model names
"""
if not os.path.exists(sdkconfig_path):
print(f"Warning: sdkconfig file not found: {sdkconfig_path}")
return []
models = []
with io.open(sdkconfig_path, "r") as f:
for label in f:
label = label.strip("\n")
if 'CONFIG_SR_WN' in label and '#' not in label[0]:
if '_NONE' in label:
continue
if '=' in label:
label = label.split("=")[0]
if '_MULTI' in label:
label = label[:-6]
model_name = label.split("_SR_WN_")[-1].lower()
models.append(model_name)
return models
def read_multinet_from_sdkconfig(sdkconfig_path):
"""
Read multinet models from sdkconfig (based on movemodel.py logic)
Returns a list of multinet model names
"""
if not os.path.exists(sdkconfig_path):
print(f"Warning: sdkconfig file not found: {sdkconfig_path}")
return []
with io.open(sdkconfig_path, "r") as f:
models_string = ''
for label in f:
label = label.strip("\n")
if 'CONFIG_SR_MN' in label and label[0] != '#':
models_string += label
models = []
if "CONFIG_SR_MN_CN_MULTINET3_SINGLE_RECOGNITION" in models_string:
models.append('mn3_cn')
elif "CONFIG_SR_MN_CN_MULTINET4_5_SINGLE_RECOGNITION_QUANT8" in models_string:
models.append('mn4q8_cn')
elif "CONFIG_SR_MN_CN_MULTINET4_5_SINGLE_RECOGNITION" in models_string:
models.append('mn4_cn')
elif "CONFIG_SR_MN_CN_MULTINET5_RECOGNITION_QUANT8" in models_string:
models.append('mn5q8_cn')
elif "CONFIG_SR_MN_CN_MULTINET6_QUANT" in models_string:
models.append('mn6_cn')
elif "CONFIG_SR_MN_CN_MULTINET6_AC_QUANT" in models_string:
models.append('mn6_cn_ac')
elif "CONFIG_SR_MN_CN_MULTINET7_QUANT" in models_string:
models.append('mn7_cn')
elif "CONFIG_SR_MN_CN_MULTINET7_AC_QUANT" in models_string:
models.append('mn7_cn_ac')
if "CONFIG_SR_MN_EN_MULTINET5_SINGLE_RECOGNITION_QUANT8" in models_string:
models.append('mn5q8_en')
elif "CONFIG_SR_MN_EN_MULTINET5_SINGLE_RECOGNITION" in models_string:
models.append('mn5_en')
elif "CONFIG_SR_MN_EN_MULTINET6_QUANT" in models_string:
models.append('mn6_en')
elif "CONFIG_SR_MN_EN_MULTINET7_QUANT" in models_string:
models.append('mn7_en')
if "MULTINET6" in models_string or "MULTINET7" in models_string:
models.append('fst')
return models
def read_wake_word_type_from_sdkconfig(sdkconfig_path):
"""
Read wake word type configuration from sdkconfig
Returns a dict with wake word type info
"""
if not os.path.exists(sdkconfig_path):
print(f"Warning: sdkconfig file not found: {sdkconfig_path}")
return {
'use_esp_wake_word': False,
'use_afe_wake_word': False,
'use_custom_wake_word': False,
'wake_word_disabled': True
}
config_values = {
'use_esp_wake_word': False,
'use_afe_wake_word': False,
'use_custom_wake_word': False,
'wake_word_disabled': False
}
with io.open(sdkconfig_path, "r") as f:
for line in f:
line = line.strip("\n")
if line.startswith('#'):
continue
# Check for wake word type configuration
if 'CONFIG_USE_ESP_WAKE_WORD=y' in line:
config_values['use_esp_wake_word'] = True
elif 'CONFIG_USE_AFE_WAKE_WORD=y' in line:
config_values['use_afe_wake_word'] = True
elif 'CONFIG_USE_CUSTOM_WAKE_WORD=y' in line:
config_values['use_custom_wake_word'] = True
elif 'CONFIG_WAKE_WORD_DISABLED=y' in line:
config_values['wake_word_disabled'] = True
return config_values
def read_custom_wake_word_from_sdkconfig(sdkconfig_path):
"""
Read custom wake word configuration from sdkconfig
Returns a dict with custom wake word info or None if not configured
"""
if not os.path.exists(sdkconfig_path):
print(f"Warning: sdkconfig file not found: {sdkconfig_path}")
return None
config_values = {}
with io.open(sdkconfig_path, "r") as f:
for line in f:
line = line.strip("\n")
if line.startswith('#') or '=' not in line:
continue
# Check for custom wake word configuration
if 'CONFIG_USE_CUSTOM_WAKE_WORD=y' in line:
config_values['use_custom_wake_word'] = True
elif 'CONFIG_CUSTOM_WAKE_WORD=' in line and not line.startswith('#'):
# Extract string value (remove quotes)
value = line.split('=', 1)[1].strip('"')
config_values['wake_word'] = value
elif 'CONFIG_CUSTOM_WAKE_WORD_DISPLAY=' in line and not line.startswith('#'):
# Extract string value (remove quotes)
value = line.split('=', 1)[1].strip('"')
config_values['display'] = value
elif 'CONFIG_CUSTOM_WAKE_WORD_THRESHOLD=' in line and not line.startswith('#'):
# Extract numeric value
value = line.split('=', 1)[1]
try:
config_values['threshold'] = int(value)
except ValueError:
try:
config_values['threshold'] = float(value)
except ValueError:
print(f"Warning: Invalid threshold value: {value}")
config_values['threshold'] = 20 # default (will be converted to 0.2)
# Return config only if custom wake word is enabled and required fields are present
if (config_values.get('use_custom_wake_word', False) and
'wake_word' in config_values and
'display' in config_values and
'threshold' in config_values):
return {
'wake_word': config_values['wake_word'],
'display': config_values['display'],
'threshold': config_values['threshold'] / 100.0 # Convert to decimal (20 -> 0.2)
}
return None
def get_language_from_multinet_models(multinet_models):
"""
Determine language from multinet model names
Returns 'cn', 'en', or None
"""
if not multinet_models:
return None
# Check for Chinese models
cn_indicators = ['_cn', 'cn_']
en_indicators = ['_en', 'en_']
has_cn = any(any(indicator in model for indicator in cn_indicators) for model in multinet_models)
has_en = any(any(indicator in model for indicator in en_indicators) for model in multinet_models)
# If both or neither, default to cn
if has_cn and not has_en:
return 'cn'
elif has_en and not has_cn:
return 'en'
else:
return 'cn' # Default to Chinese
def get_wakenet_model_paths(model_names, esp_sr_model_path):
"""
Get the full paths to the wakenet model directories
Returns a list of valid model paths
"""
if not model_names:
return []
valid_paths = []
for model_name in model_names:
wakenet_model_path = os.path.join(esp_sr_model_path, 'wakenet_model', model_name)
if os.path.exists(wakenet_model_path):
valid_paths.append(wakenet_model_path)
else:
print(f"Warning: Wakenet model directory not found: {wakenet_model_path}")
return valid_paths
def get_multinet_model_paths(model_names, esp_sr_model_path):
"""
Get the full paths to the multinet model directories
Returns a list of valid model paths
"""
if not model_names:
return []
valid_paths = []
for model_name in model_names:
multinet_model_path = os.path.join(esp_sr_model_path, 'multinet_model', model_name)
if os.path.exists(multinet_model_path):
valid_paths.append(multinet_model_path)
else:
print(f"Warning: Multinet model directory not found: {multinet_model_path}")
return valid_paths
def get_text_font_path(builtin_text_font, xiaozhi_fonts_path):
"""
Get the text font path if needed
Returns the font file path or None if no font is needed
"""
if not builtin_text_font or 'basic' not in builtin_text_font:
return None
# Convert from basic to common font name
# e.g., font_puhui_basic_16_4 -> font_puhui_common_16_4.bin
font_name = builtin_text_font.replace('basic', 'common') + '.bin'
font_path = os.path.join(xiaozhi_fonts_path, 'cbin', font_name)
if os.path.exists(font_path):
return font_path
else:
print(f"Warning: Font file not found: {font_path}")
return None
def get_emoji_collection_path(default_emoji_collection, xiaozhi_fonts_path):
"""
Get the emoji collection path if needed
Returns the emoji directory path or None if no emoji collection is needed
"""
if not default_emoji_collection:
return None
emoji_path = os.path.join(xiaozhi_fonts_path, 'png', default_emoji_collection)
if os.path.exists(emoji_path):
return emoji_path
else:
print(f"Warning: Emoji collection directory not found: {emoji_path}")
return None
def build_assets_integrated(wakenet_model_paths, multinet_model_paths, text_font_path, emoji_collection_path, extra_files_path, output_path, multinet_model_info=None):
"""
Build assets using integrated functions (no external dependencies)
"""
# Create temporary build directory
temp_build_dir = os.path.join(os.path.dirname(output_path), "temp_build")
assets_dir = os.path.join(temp_build_dir, "assets")
try:
# Clean and create directories
if os.path.exists(temp_build_dir):
shutil.rmtree(temp_build_dir)
ensure_dir(temp_build_dir)
ensure_dir(assets_dir)
print("Starting to build assets...")
# Process each component
srmodels = process_sr_models(wakenet_model_paths, multinet_model_paths, temp_build_dir, assets_dir) if (wakenet_model_paths or multinet_model_paths) else None
text_font = process_text_font(text_font_path, assets_dir) if text_font_path else None
emoji_collection = process_emoji_collection(emoji_collection_path, assets_dir) if emoji_collection_path else None
extra_files = process_extra_files(extra_files_path, assets_dir) if extra_files_path else None
# Generate index.json
generate_index_json(assets_dir, srmodels, text_font, emoji_collection, extra_files, multinet_model_info)
# Generate config.json for packing
config_path = generate_config_json(temp_build_dir, assets_dir)
# Load config and pack assets
with open(config_path, 'r') as f:
config_data = json.load(f)
# Use simplified packing function
include_path = config_data['include_path']
image_file = config_data['image_file']
pack_assets_simple(assets_dir, include_path, image_file, "assets", int(config_data['name_length']))
# Copy final assets.bin to output location
if os.path.exists(image_file):
shutil.copy2(image_file, output_path)
print(f"Successfully generated assets.bin: {output_path}")
# Show size information
total_size = os.path.getsize(output_path)
print(f"Assets file size: {total_size / 1024:.2f}K ({total_size} bytes)")
return True
else:
print(f"Error: Generated assets.bin not found: {image_file}")
return False
except Exception as e:
print(f"Error: Failed to build assets: {e}")
return False
finally:
# Clean up temporary directory
if os.path.exists(temp_build_dir):
shutil.rmtree(temp_build_dir)
def main():
parser = argparse.ArgumentParser(description='Build default assets based on configuration')
parser.add_argument('--sdkconfig', required=True, help='Path to sdkconfig file')
parser.add_argument('--builtin_text_font', help='Builtin text font name (e.g., font_puhui_basic_16_4)')
parser.add_argument('--emoji_collection', help='Default emoji collection name (e.g., emojis_32)')
parser.add_argument('--output', required=True, help='Output path for assets.bin')
parser.add_argument('--esp_sr_model_path', help='Path to ESP-SR model directory')
parser.add_argument('--xiaozhi_fonts_path', help='Path to xiaozhi-fonts component directory')
parser.add_argument('--extra_files', help='Path to extra files directory to be included in assets')
args = parser.parse_args()
# Set default paths if not provided
if not args.esp_sr_model_path or not args.xiaozhi_fonts_path:
# Calculate project root from script location
script_dir = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.dirname(script_dir)
if not args.esp_sr_model_path:
args.esp_sr_model_path = os.path.join(project_root, "managed_components", "espressif__esp-sr", "model")
if not args.xiaozhi_fonts_path:
args.xiaozhi_fonts_path = os.path.join(project_root, "components", "xiaozhi-fonts")
print("Building default assets...")
print(f" sdkconfig: {args.sdkconfig}")
print(f" builtin_text_font: {args.builtin_text_font}")
print(f" emoji_collection: {args.emoji_collection}")
print(f" output: {args.output}")
# Read wake word type configuration from sdkconfig
wake_word_config = read_wake_word_type_from_sdkconfig(args.sdkconfig)
# Read SR models from sdkconfig
wakenet_model_names = read_wakenet_from_sdkconfig(args.sdkconfig)
multinet_model_names = read_multinet_from_sdkconfig(args.sdkconfig)
# Apply wake word logic to decide which models to package
wakenet_model_paths = []
multinet_model_paths = []
# 1. Only package wakenet models if USE_ESP_WAKE_WORD=y or USE_AFE_WAKE_WORD=y
if wake_word_config['use_esp_wake_word'] or wake_word_config['use_afe_wake_word']:
wakenet_model_paths = get_wakenet_model_paths(wakenet_model_names, args.esp_sr_model_path)
elif wakenet_model_names:
print(f" Note: Found wakenet models {wakenet_model_names} but wake word type is not ESP/AFE, skipping")
# 2. Error check: if USE_CUSTOM_WAKE_WORD=y but no multinet models selected, report error
if wake_word_config['use_custom_wake_word'] and not multinet_model_names:
print("Error: USE_CUSTOM_WAKE_WORD is enabled but no multinet models are selected in sdkconfig")
print("Please select appropriate CONFIG_SR_MN_* options in menuconfig, or disable USE_CUSTOM_WAKE_WORD")
sys.exit(1)
# 3. Only package multinet models if USE_CUSTOM_WAKE_WORD=y
if wake_word_config['use_custom_wake_word']:
multinet_model_paths = get_multinet_model_paths(multinet_model_names, args.esp_sr_model_path)
elif multinet_model_names:
print(f" Note: Found multinet models {multinet_model_names} but USE_CUSTOM_WAKE_WORD is disabled, skipping")
# Print model information (only for models that will actually be packaged)
if wakenet_model_paths:
print(f" wakenet models: {', '.join(wakenet_model_names)} (will be packaged)")
if multinet_model_paths:
print(f" multinet models: {', '.join(multinet_model_names)} (will be packaged)")
# Get text font path if needed
text_font_path = get_text_font_path(args.builtin_text_font, args.xiaozhi_fonts_path)
# Get emoji collection path if needed
emoji_collection_path = get_emoji_collection_path(args.emoji_collection, args.xiaozhi_fonts_path)
# Get extra files path if provided
extra_files_path = args.extra_files
# Read custom wake word configuration
custom_wake_word_config = read_custom_wake_word_from_sdkconfig(args.sdkconfig)
multinet_model_info = None
if custom_wake_word_config and multinet_model_paths:
# Determine language from multinet models
language = get_language_from_multinet_models(multinet_model_names)
# Build multinet_model info structure
multinet_model_info = {
"language": language,
"duration": 3000, # Default duration in ms
"threshold": custom_wake_word_config['threshold'],
"commands": [
{
"command": custom_wake_word_config['wake_word'],
"text": custom_wake_word_config['display'],
"action": "wake"
}
]
}
print(f" custom wake word: {custom_wake_word_config['wake_word']} ({custom_wake_word_config['display']})")
print(f" wake word language: {language}")
print(f" wake word threshold: {custom_wake_word_config['threshold']}")
# Check if we have anything to build
if not wakenet_model_paths and not multinet_model_paths and not text_font_path and not emoji_collection_path and not extra_files_path and not multinet_model_info:
print("Warning: No assets to build (no SR models, text font, emoji collection, extra files, or custom wake word)")
# Create an empty assets.bin file
os.makedirs(os.path.dirname(args.output), exist_ok=True)
with open(args.output, 'wb') as f:
pass # Create empty file
print(f"Created empty assets.bin: {args.output}")
return
# Build the assets
success = build_assets_integrated(wakenet_model_paths, multinet_model_paths, text_font_path, emoji_collection_path,
extra_files_path, args.output, multinet_model_info)
if not success:
sys.exit(1)
print("Build completed successfully!")
if __name__ == "__main__":
main()

View File

@@ -1,2 +0,0 @@
#!/bin/sh
esptool.py -p /dev/ttyACM0 -b 2000000 write_flash 0 ../releases/v0.9.9_bread-compact-wifi/merged-binary.bin

View File

@@ -1,187 +1,187 @@
#!/usr/bin/env python3
import argparse
import json
import os
HEADER_TEMPLATE = """// Auto-generated language config
// Language: {lang_code} with en-US fallback
#pragma once
#include <string_view>
#ifndef {lang_code_for_font}
#define {lang_code_for_font} // 預設語言
#endif
namespace Lang {{
// 语言元数据
constexpr const char* CODE = "{lang_code}";
// 字符串资源 (en-US as fallback for missing keys)
namespace Strings {{
{strings}
}}
// 音效资源 (en-US as fallback for missing audio files)
namespace Sounds {{
{sounds}
}}
}}
"""
def load_base_language(assets_dir):
"""加载 en-US 基准语言数据"""
base_lang_path = os.path.join(assets_dir, 'locales', 'en-US', 'language.json')
if os.path.exists(base_lang_path):
try:
with open(base_lang_path, 'r', encoding='utf-8') as f:
base_data = json.load(f)
print(f"Loaded base language en-US with {len(base_data.get('strings', {}))} strings")
return base_data
except json.JSONDecodeError as e:
print(f"Warning: Failed to parse en-US language file: {e}")
else:
print("Warning: en-US base language file not found, fallback mechanism disabled")
return {'strings': {}}
def get_sound_files(directory):
"""获取目录中的音效文件列表"""
if not os.path.exists(directory):
return []
return [f for f in os.listdir(directory) if f.endswith('.ogg')]
def generate_header(lang_code, output_path):
# 从输出路径推导项目结构
# output_path 通常是 main/assets/lang_config.h
main_dir = os.path.dirname(output_path) # main/assets
if os.path.basename(main_dir) == 'assets':
main_dir = os.path.dirname(main_dir) # main
project_dir = os.path.dirname(main_dir) # 项目根目录
assets_dir = os.path.join(main_dir, 'assets')
# 构建语言JSON文件路径
input_path = os.path.join(assets_dir, 'locales', lang_code, 'language.json')
print(f"Processing language: {lang_code}")
print(f"Input file path: {input_path}")
print(f"Output file path: {output_path}")
if not os.path.exists(input_path):
raise FileNotFoundError(f"Language file not found: {input_path}")
with open(input_path, 'r', encoding='utf-8') as f:
data = json.load(f)
# 验证数据结构
if 'language' not in data or 'strings' not in data:
raise ValueError("Invalid JSON structure")
# 加载 en-US 基准语言数据
base_data = load_base_language(assets_dir)
# 合并字符串:以 en-US 为基准,用户语言覆盖
base_strings = base_data.get('strings', {})
user_strings = data['strings']
merged_strings = base_strings.copy()
merged_strings.update(user_strings)
# 统计信息
base_count = len(base_strings)
user_count = len(user_strings)
total_count = len(merged_strings)
fallback_count = total_count - user_count
print(f"Language {lang_code} string statistics:")
print(f" - Base language (en-US): {base_count} strings")
print(f" - User language: {user_count} strings")
print(f" - Total: {total_count} strings")
if fallback_count > 0:
print(f" - Fallback to en-US: {fallback_count} strings")
# 生成字符串常量
strings = []
sounds = []
for key, value in merged_strings.items():
value = value.replace('"', '\\"')
strings.append(f' constexpr const char* {key.upper()} = "{value}";')
# 收集音效文件:以 en-US 为基准,用户语言覆盖
current_lang_dir = os.path.join(assets_dir, 'locales', lang_code)
base_lang_dir = os.path.join(assets_dir, 'locales', 'en-US')
common_dir = os.path.join(assets_dir, 'common')
# 获取所有可能的音效文件
base_sounds = get_sound_files(base_lang_dir)
current_sounds = get_sound_files(current_lang_dir)
common_sounds = get_sound_files(common_dir)
# 合并音效文件列表:用户语言覆盖基准语言
all_sound_files = set(base_sounds)
all_sound_files.update(current_sounds)
# 音效统计信息
base_sound_count = len(base_sounds)
user_sound_count = len(current_sounds)
common_sound_count = len(common_sounds)
sound_fallback_count = len(set(base_sounds) - set(current_sounds))
print(f"Language {lang_code} sound statistics:")
print(f" - Base language (en-US): {base_sound_count} sounds")
print(f" - User language: {user_sound_count} sounds")
print(f" - Common sounds: {common_sound_count} sounds")
if sound_fallback_count > 0:
print(f" - Sound fallback to en-US: {sound_fallback_count} sounds")
# 生成语言特定音效常量
for file in sorted(all_sound_files):
base_name = os.path.splitext(file)[0]
# 优先使用当前语言的音效,如果不存在则回退到 en-US
if file in current_sounds:
sound_lang = lang_code.replace('-', '_').lower()
else:
sound_lang = 'en_us'
sounds.append(f'''
extern const char ogg_{base_name}_start[] asm("_binary_{base_name}_ogg_start");
extern const char ogg_{base_name}_end[] asm("_binary_{base_name}_ogg_end");
static const std::string_view OGG_{base_name.upper()} {{
static_cast<const char*>(ogg_{base_name}_start),
static_cast<size_t>(ogg_{base_name}_end - ogg_{base_name}_start)
}};''')
# 生成公共音效常量
for file in sorted(common_sounds):
base_name = os.path.splitext(file)[0]
sounds.append(f'''
extern const char ogg_{base_name}_start[] asm("_binary_{base_name}_ogg_start");
extern const char ogg_{base_name}_end[] asm("_binary_{base_name}_ogg_end");
static const std::string_view OGG_{base_name.upper()} {{
static_cast<const char*>(ogg_{base_name}_start),
static_cast<size_t>(ogg_{base_name}_end - ogg_{base_name}_start)
}};''')
# 填充模板
content = HEADER_TEMPLATE.format(
lang_code=lang_code,
lang_code_for_font=lang_code.replace('-', '_').lower(),
strings="\n".join(sorted(strings)),
sounds="\n".join(sorted(sounds))
)
# 写入文件
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, 'w', encoding='utf-8') as f:
f.write(content)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Generate language configuration header file with en-US fallback")
parser.add_argument("--language", required=True, help="Language code (e.g: zh-CN, en-US, ja-JP)")
parser.add_argument("--output", required=True, help="Output header file path")
args = parser.parse_args()
try:
generate_header(args.language, args.output)
print(f"Successfully generated language config file: {args.output}")
except Exception as e:
print(f"Error: {e}")
#!/usr/bin/env python3
import argparse
import json
import os
HEADER_TEMPLATE = """// Auto-generated language config
// Language: {lang_code} with en-US fallback
#pragma once
#include <string_view>
#ifndef {lang_code_for_font}
#define {lang_code_for_font} // 預設語言
#endif
namespace Lang {{
// 语言元数据
constexpr const char* CODE = "{lang_code}";
// 字符串资源 (en-US as fallback for missing keys)
namespace Strings {{
{strings}
}}
// 音效资源 (en-US as fallback for missing audio files)
namespace Sounds {{
{sounds}
}}
}}
"""
def load_base_language(assets_dir):
"""加载 en-US 基准语言数据"""
base_lang_path = os.path.join(assets_dir, 'locales', 'en-US', 'language.json')
if os.path.exists(base_lang_path):
try:
with open(base_lang_path, 'r', encoding='utf-8') as f:
base_data = json.load(f)
print(f"Loaded base language en-US with {len(base_data.get('strings', {}))} strings")
return base_data
except json.JSONDecodeError as e:
print(f"Warning: Failed to parse en-US language file: {e}")
else:
print("Warning: en-US base language file not found, fallback mechanism disabled")
return {'strings': {}}
def get_sound_files(directory):
"""获取目录中的音效文件列表"""
if not os.path.exists(directory):
return []
return [f for f in os.listdir(directory) if f.endswith('.ogg')]
def generate_header(lang_code, output_path):
# 从输出路径推导项目结构
# output_path 通常是 main/assets/lang_config.h
main_dir = os.path.dirname(output_path) # main/assets
if os.path.basename(main_dir) == 'assets':
main_dir = os.path.dirname(main_dir) # main
project_dir = os.path.dirname(main_dir) # 项目根目录
assets_dir = os.path.join(main_dir, 'assets')
# 构建语言JSON文件路径
input_path = os.path.join(assets_dir, 'locales', lang_code, 'language.json')
print(f"Processing language: {lang_code}")
print(f"Input file path: {input_path}")
print(f"Output file path: {output_path}")
if not os.path.exists(input_path):
raise FileNotFoundError(f"Language file not found: {input_path}")
with open(input_path, 'r', encoding='utf-8') as f:
data = json.load(f)
# 验证数据结构
if 'language' not in data or 'strings' not in data:
raise ValueError("Invalid JSON structure")
# 加载 en-US 基准语言数据
base_data = load_base_language(assets_dir)
# 合并字符串:以 en-US 为基准,用户语言覆盖
base_strings = base_data.get('strings', {})
user_strings = data['strings']
merged_strings = base_strings.copy()
merged_strings.update(user_strings)
# 统计信息
base_count = len(base_strings)
user_count = len(user_strings)
total_count = len(merged_strings)
fallback_count = total_count - user_count
print(f"Language {lang_code} string statistics:")
print(f" - Base language (en-US): {base_count} strings")
print(f" - User language: {user_count} strings")
print(f" - Total: {total_count} strings")
if fallback_count > 0:
print(f" - Fallback to en-US: {fallback_count} strings")
# 生成字符串常量
strings = []
sounds = []
for key, value in merged_strings.items():
value = value.replace('"', '\\"')
strings.append(f' constexpr const char* {key.upper()} = "{value}";')
# 收集音效文件:以 en-US 为基准,用户语言覆盖
current_lang_dir = os.path.join(assets_dir, 'locales', lang_code)
base_lang_dir = os.path.join(assets_dir, 'locales', 'en-US')
common_dir = os.path.join(assets_dir, 'common')
# 获取所有可能的音效文件
base_sounds = get_sound_files(base_lang_dir)
current_sounds = get_sound_files(current_lang_dir)
common_sounds = get_sound_files(common_dir)
# 合并音效文件列表:用户语言覆盖基准语言
all_sound_files = set(base_sounds)
all_sound_files.update(current_sounds)
# 音效统计信息
base_sound_count = len(base_sounds)
user_sound_count = len(current_sounds)
common_sound_count = len(common_sounds)
sound_fallback_count = len(set(base_sounds) - set(current_sounds))
print(f"Language {lang_code} sound statistics:")
print(f" - Base language (en-US): {base_sound_count} sounds")
print(f" - User language: {user_sound_count} sounds")
print(f" - Common sounds: {common_sound_count} sounds")
if sound_fallback_count > 0:
print(f" - Sound fallback to en-US: {sound_fallback_count} sounds")
# 生成语言特定音效常量
for file in sorted(all_sound_files):
base_name = os.path.splitext(file)[0]
# 优先使用当前语言的音效,如果不存在则回退到 en-US
if file in current_sounds:
sound_lang = lang_code.replace('-', '_').lower()
else:
sound_lang = 'en_us'
sounds.append(f'''
extern const char ogg_{base_name}_start[] asm("_binary_{base_name}_ogg_start");
extern const char ogg_{base_name}_end[] asm("_binary_{base_name}_ogg_end");
static const std::string_view OGG_{base_name.upper()} {{
static_cast<const char*>(ogg_{base_name}_start),
static_cast<size_t>(ogg_{base_name}_end - ogg_{base_name}_start)
}};''')
# 生成公共音效常量
for file in sorted(common_sounds):
base_name = os.path.splitext(file)[0]
sounds.append(f'''
extern const char ogg_{base_name}_start[] asm("_binary_{base_name}_ogg_start");
extern const char ogg_{base_name}_end[] asm("_binary_{base_name}_ogg_end");
static const std::string_view OGG_{base_name.upper()} {{
static_cast<const char*>(ogg_{base_name}_start),
static_cast<size_t>(ogg_{base_name}_end - ogg_{base_name}_start)
}};''')
# 填充模板
content = HEADER_TEMPLATE.format(
lang_code=lang_code,
lang_code_for_font=lang_code.replace('-', '_').lower(),
strings="\n".join(sorted(strings)),
sounds="\n".join(sorted(sounds))
)
# 写入文件
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, 'w', encoding='utf-8') as f:
f.write(content)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Generate language configuration header file with en-US fallback")
parser.add_argument("--language", required=True, help="Language code (e.g: zh-CN, en-US, ja-JP)")
parser.add_argument("--output", required=True, help="Output header file path")
args = parser.parse_args()
try:
generate_header(args.language, args.output)
print(f"Successfully generated language config file: {args.output}")
except Exception as e:
print(f"Error: {e}")
exit(1)

View File

@@ -1,3 +1,3 @@
#!/bin/sh
# mp3_to_ogg.sh <input_mp3_file> <output_ogg_file>
ffmpeg -i $1 -c:a libopus -b:a 16k -ac 1 -ar 16000 -frame_duration 60 $2
#!/bin/sh
# mp3_to_ogg.sh <input_mp3_file> <output_ogg_file>
ffmpeg -i $1 -c:a libopus -b:a 16k -ac 1 -ar 16000 -frame_duration 60 $2

View File

@@ -1,29 +1,29 @@
# ogg_covertor 小智AI OGG 批量转换器
本脚本为OGG批量转换工具支持将输入的音频文件转换为小智可使用的OGG格式
基于Python第三方库`ffmpeg-python`实现
支持OGG和音频之间的互转响度调节等功能
# 创建并激活虚拟环境
```bash
# 创建虚拟环境
python -m venv venv
# 激活虚拟环境
source venv/bin/activate # Mac/Linux
venv\Scripts\activate # Windows
```
# 安装依赖
请在虚拟环境中执行
```bash
pip install ffmpeg-python
```
# 运行脚本
```bash
python ogg_covertor.py
```
# ogg_covertor 小智AI OGG 批量转换器
本脚本为OGG批量转换工具支持将输入的音频文件转换为小智可使用的OGG格式
基于Python第三方库`ffmpeg-python`实现
支持OGG和音频之间的互转响度调节等功能
# 创建并激活虚拟环境
```bash
# 创建虚拟环境
python -m venv venv
# 激活虚拟环境
source venv/bin/activate # Mac/Linux
venv\Scripts\activate # Windows
```
# 安装依赖
请在虚拟环境中执行
```bash
pip install ffmpeg-python
```
# 运行脚本
```bash
python ogg_covertor.py
```

View File

@@ -1,230 +1,230 @@
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import os
import threading
import sys
import ffmpeg
class AudioConverterApp:
def __init__(self, master):
self.master = master
master.title("小智AI OGG音频批量转换工具")
master.geometry("680x600") # 调整窗口高度
# 初始化变量
self.mode = tk.StringVar(value="audio_to_ogg")
self.output_dir = tk.StringVar()
self.output_dir.set(os.path.abspath("output"))
self.enable_loudnorm = tk.BooleanVar(value=True)
self.target_lufs = tk.DoubleVar(value=-16.0)
# 创建UI组件
self.create_widgets()
self.redirect_output()
def create_widgets(self):
# 模式选择
mode_frame = ttk.LabelFrame(self.master, text="转换模式")
mode_frame.grid(row=0, column=0, padx=10, pady=5, sticky="ew")
ttk.Radiobutton(mode_frame, text="音频转到OGG", variable=self.mode,
value="audio_to_ogg", command=self.toggle_settings,
width=12).grid(row=0, column=0, padx=5)
ttk.Radiobutton(mode_frame, text="OGG转回音频", variable=self.mode,
value="ogg_to_audio", command=self.toggle_settings,
width=12).grid(row=0, column=1, padx=5)
# 响度设置
self.loudnorm_frame = ttk.Frame(self.master)
self.loudnorm_frame.grid(row=1, column=0, padx=10, pady=5, sticky="ew")
ttk.Checkbutton(self.loudnorm_frame, text="启用响度调整",
variable=self.enable_loudnorm, width=15
).grid(row=0, column=0, padx=2)
ttk.Entry(self.loudnorm_frame, textvariable=self.target_lufs,
width=6).grid(row=0, column=1, padx=2)
ttk.Label(self.loudnorm_frame, text="LUFS").grid(row=0, column=2, padx=2)
# 文件选择
file_frame = ttk.LabelFrame(self.master, text="输入文件")
file_frame.grid(row=2, column=0, padx=10, pady=5, sticky="nsew")
# 文件操作按钮
ttk.Button(file_frame, text="选择文件", command=self.select_files,
width=12).grid(row=0, column=0, padx=5, pady=2)
ttk.Button(file_frame, text="移除选中", command=self.remove_selected,
width=12).grid(row=0, column=1, padx=5, pady=2)
ttk.Button(file_frame, text="清空列表", command=self.clear_files,
width=12).grid(row=0, column=2, padx=5, pady=2)
# 文件列表使用Treeview
self.tree = ttk.Treeview(file_frame, columns=("selected", "filename"),
show="headings", height=8)
self.tree.heading("selected", text="选中", anchor=tk.W)
self.tree.heading("filename", text="文件名", anchor=tk.W)
self.tree.column("selected", width=60, anchor=tk.W)
self.tree.column("filename", width=600, anchor=tk.W)
self.tree.grid(row=1, column=0, columnspan=3, sticky="nsew", padx=5, pady=2)
self.tree.bind("<ButtonRelease-1>", self.on_tree_click)
# 输出目录
output_frame = ttk.LabelFrame(self.master, text="输出目录")
output_frame.grid(row=3, column=0, padx=10, pady=5, sticky="ew")
ttk.Entry(output_frame, textvariable=self.output_dir, width=60
).grid(row=0, column=0, padx=5, sticky="ew")
ttk.Button(output_frame, text="浏览", command=self.select_output_dir,
width=8).grid(row=0, column=1, padx=5)
# 转换按钮区域
button_frame = ttk.Frame(self.master)
button_frame.grid(row=4, column=0, padx=10, pady=10, sticky="ew")
ttk.Button(button_frame, text="转换全部文件", command=lambda: self.start_conversion(True),
width=15).pack(side=tk.LEFT, padx=5)
ttk.Button(button_frame, text="转换选中文件", command=lambda: self.start_conversion(False),
width=15).pack(side=tk.LEFT, padx=5)
# 日志区域
log_frame = ttk.LabelFrame(self.master, text="日志")
log_frame.grid(row=5, column=0, padx=10, pady=5, sticky="nsew")
self.log_text = tk.Text(log_frame, height=14, width=80)
self.log_text.pack(fill=tk.BOTH, expand=True)
# 配置布局权重
self.master.columnconfigure(0, weight=1)
self.master.rowconfigure(2, weight=1)
self.master.rowconfigure(5, weight=3)
file_frame.columnconfigure(0, weight=1)
file_frame.rowconfigure(1, weight=1)
def toggle_settings(self):
if self.mode.get() == "audio_to_ogg":
self.loudnorm_frame.grid()
else:
self.loudnorm_frame.grid_remove()
def select_files(self):
file_types = [
("音频文件", "*.wav *.mogg *.ogg *.flac") if self.mode.get() == "audio_to_ogg"
else ("ogg文件", "*.ogg")
]
files = filedialog.askopenfilenames(filetypes=file_types)
for f in files:
self.tree.insert("", tk.END, values=("[ ]", os.path.basename(f)), tags=(f,))
def on_tree_click(self, event):
"""处理复选框点击事件"""
region = self.tree.identify("region", event.x, event.y)
if region == "cell":
col = self.tree.identify_column(event.x)
item = self.tree.identify_row(event.y)
if col == "#1": # 点击的是选中列
current_val = self.tree.item(item, "values")[0]
new_val = "[√]" if current_val == "[ ]" else "[ ]"
self.tree.item(item, values=(new_val, self.tree.item(item, "values")[1]))
def remove_selected(self):
"""移除选中的文件"""
to_remove = []
for item in self.tree.get_children():
if self.tree.item(item, "values")[0] == "[√]":
to_remove.append(item)
for item in reversed(to_remove):
self.tree.delete(item)
def clear_files(self):
"""清空所有文件"""
for item in self.tree.get_children():
self.tree.delete(item)
def select_output_dir(self):
path = filedialog.askdirectory()
if path:
self.output_dir.set(path)
def redirect_output(self):
class StdoutRedirector:
def __init__(self, text_widget):
self.text_widget = text_widget
self.original_stdout = sys.stdout
def write(self, message):
self.text_widget.insert(tk.END, message)
self.text_widget.see(tk.END)
self.original_stdout.write(message)
def flush(self):
self.original_stdout.flush()
sys.stdout = StdoutRedirector(self.log_text)
def start_conversion(self, convert_all):
"""开始转换"""
input_files = []
for item in self.tree.get_children():
if convert_all or self.tree.item(item, "values")[0] == "[√]":
input_files.append(self.tree.item(item, "tags")[0])
if not input_files:
msg = "没有找到可转换的文件" if convert_all else "没有选中任何文件"
messagebox.showwarning("警告", msg)
return
os.makedirs(self.output_dir.get(), exist_ok=True)
try:
if self.mode.get() == "audio_to_ogg":
target_lufs = self.target_lufs.get() if self.enable_loudnorm.get() else None
thread = threading.Thread(target=self.convert_audio_to_ogg, args=(target_lufs, input_files))
else:
thread = threading.Thread(target=self.convert_ogg_to_audio, args=(input_files,))
thread.start()
except Exception as e:
print(f"转换初始化失败: {str(e)}")
def convert_audio_to_ogg(self, target_lufs, input_files):
"""音频转到ogg转换逻辑"""
for input_path in input_files:
try:
filename = os.path.basename(input_path)
base_name = os.path.splitext(filename)[0]
output_path = os.path.join(self.output_dir.get(), f"{base_name}.ogg")
print(f"正在转换: {filename}")
(
ffmpeg
.input(input_path)
.output(output_path, acodec='libopus', audio_bitrate='16k', ac=1, ar=16000, frame_duration=60)
.run(overwrite_output=True)
)
print(f"转换成功: {filename}\n")
except Exception as e:
print(f"转换失败: {str(e)}\n")
def convert_ogg_to_audio(self, input_files):
"""ogg转回音频转换逻辑"""
for input_path in input_files:
try:
filename = os.path.basename(input_path)
base_name = os.path.splitext(filename)[0]
output_path = os.path.join(self.output_dir.get(), f"{base_name}.ogg")
print(f"正在转换: {filename}")
(
ffmpeg
.input(input_path)
.output(output_path, acodec='libopus', audio_bitrate='16k', ac=1, ar=16000, frame_duration=60)
.run(overwrite_output=True)
)
print(f"转换成功: {filename}\n")
except Exception as e:
print(f"转换失败: {str(e)}\n")
if __name__ == "__main__":
root = tk.Tk()
app = AudioConverterApp(root)
root.mainloop()
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import os
import threading
import sys
import ffmpeg
class AudioConverterApp:
def __init__(self, master):
self.master = master
master.title("小智AI OGG音频批量转换工具")
master.geometry("680x600") # 调整窗口高度
# 初始化变量
self.mode = tk.StringVar(value="audio_to_ogg")
self.output_dir = tk.StringVar()
self.output_dir.set(os.path.abspath("output"))
self.enable_loudnorm = tk.BooleanVar(value=True)
self.target_lufs = tk.DoubleVar(value=-16.0)
# 创建UI组件
self.create_widgets()
self.redirect_output()
def create_widgets(self):
# 模式选择
mode_frame = ttk.LabelFrame(self.master, text="转换模式")
mode_frame.grid(row=0, column=0, padx=10, pady=5, sticky="ew")
ttk.Radiobutton(mode_frame, text="音频转到OGG", variable=self.mode,
value="audio_to_ogg", command=self.toggle_settings,
width=12).grid(row=0, column=0, padx=5)
ttk.Radiobutton(mode_frame, text="OGG转回音频", variable=self.mode,
value="ogg_to_audio", command=self.toggle_settings,
width=12).grid(row=0, column=1, padx=5)
# 响度设置
self.loudnorm_frame = ttk.Frame(self.master)
self.loudnorm_frame.grid(row=1, column=0, padx=10, pady=5, sticky="ew")
ttk.Checkbutton(self.loudnorm_frame, text="启用响度调整",
variable=self.enable_loudnorm, width=15
).grid(row=0, column=0, padx=2)
ttk.Entry(self.loudnorm_frame, textvariable=self.target_lufs,
width=6).grid(row=0, column=1, padx=2)
ttk.Label(self.loudnorm_frame, text="LUFS").grid(row=0, column=2, padx=2)
# 文件选择
file_frame = ttk.LabelFrame(self.master, text="输入文件")
file_frame.grid(row=2, column=0, padx=10, pady=5, sticky="nsew")
# 文件操作按钮
ttk.Button(file_frame, text="选择文件", command=self.select_files,
width=12).grid(row=0, column=0, padx=5, pady=2)
ttk.Button(file_frame, text="移除选中", command=self.remove_selected,
width=12).grid(row=0, column=1, padx=5, pady=2)
ttk.Button(file_frame, text="清空列表", command=self.clear_files,
width=12).grid(row=0, column=2, padx=5, pady=2)
# 文件列表使用Treeview
self.tree = ttk.Treeview(file_frame, columns=("selected", "filename"),
show="headings", height=8)
self.tree.heading("selected", text="选中", anchor=tk.W)
self.tree.heading("filename", text="文件名", anchor=tk.W)
self.tree.column("selected", width=60, anchor=tk.W)
self.tree.column("filename", width=600, anchor=tk.W)
self.tree.grid(row=1, column=0, columnspan=3, sticky="nsew", padx=5, pady=2)
self.tree.bind("<ButtonRelease-1>", self.on_tree_click)
# 输出目录
output_frame = ttk.LabelFrame(self.master, text="输出目录")
output_frame.grid(row=3, column=0, padx=10, pady=5, sticky="ew")
ttk.Entry(output_frame, textvariable=self.output_dir, width=60
).grid(row=0, column=0, padx=5, sticky="ew")
ttk.Button(output_frame, text="浏览", command=self.select_output_dir,
width=8).grid(row=0, column=1, padx=5)
# 转换按钮区域
button_frame = ttk.Frame(self.master)
button_frame.grid(row=4, column=0, padx=10, pady=10, sticky="ew")
ttk.Button(button_frame, text="转换全部文件", command=lambda: self.start_conversion(True),
width=15).pack(side=tk.LEFT, padx=5)
ttk.Button(button_frame, text="转换选中文件", command=lambda: self.start_conversion(False),
width=15).pack(side=tk.LEFT, padx=5)
# 日志区域
log_frame = ttk.LabelFrame(self.master, text="日志")
log_frame.grid(row=5, column=0, padx=10, pady=5, sticky="nsew")
self.log_text = tk.Text(log_frame, height=14, width=80)
self.log_text.pack(fill=tk.BOTH, expand=True)
# 配置布局权重
self.master.columnconfigure(0, weight=1)
self.master.rowconfigure(2, weight=1)
self.master.rowconfigure(5, weight=3)
file_frame.columnconfigure(0, weight=1)
file_frame.rowconfigure(1, weight=1)
def toggle_settings(self):
if self.mode.get() == "audio_to_ogg":
self.loudnorm_frame.grid()
else:
self.loudnorm_frame.grid_remove()
def select_files(self):
file_types = [
("音频文件", "*.wav *.mogg *.ogg *.flac") if self.mode.get() == "audio_to_ogg"
else ("ogg文件", "*.ogg")
]
files = filedialog.askopenfilenames(filetypes=file_types)
for f in files:
self.tree.insert("", tk.END, values=("[ ]", os.path.basename(f)), tags=(f,))
def on_tree_click(self, event):
"""处理复选框点击事件"""
region = self.tree.identify("region", event.x, event.y)
if region == "cell":
col = self.tree.identify_column(event.x)
item = self.tree.identify_row(event.y)
if col == "#1": # 点击的是选中列
current_val = self.tree.item(item, "values")[0]
new_val = "[√]" if current_val == "[ ]" else "[ ]"
self.tree.item(item, values=(new_val, self.tree.item(item, "values")[1]))
def remove_selected(self):
"""移除选中的文件"""
to_remove = []
for item in self.tree.get_children():
if self.tree.item(item, "values")[0] == "[√]":
to_remove.append(item)
for item in reversed(to_remove):
self.tree.delete(item)
def clear_files(self):
"""清空所有文件"""
for item in self.tree.get_children():
self.tree.delete(item)
def select_output_dir(self):
path = filedialog.askdirectory()
if path:
self.output_dir.set(path)
def redirect_output(self):
class StdoutRedirector:
def __init__(self, text_widget):
self.text_widget = text_widget
self.original_stdout = sys.stdout
def write(self, message):
self.text_widget.insert(tk.END, message)
self.text_widget.see(tk.END)
self.original_stdout.write(message)
def flush(self):
self.original_stdout.flush()
sys.stdout = StdoutRedirector(self.log_text)
def start_conversion(self, convert_all):
"""开始转换"""
input_files = []
for item in self.tree.get_children():
if convert_all or self.tree.item(item, "values")[0] == "[√]":
input_files.append(self.tree.item(item, "tags")[0])
if not input_files:
msg = "没有找到可转换的文件" if convert_all else "没有选中任何文件"
messagebox.showwarning("警告", msg)
return
os.makedirs(self.output_dir.get(), exist_ok=True)
try:
if self.mode.get() == "audio_to_ogg":
target_lufs = self.target_lufs.get() if self.enable_loudnorm.get() else None
thread = threading.Thread(target=self.convert_audio_to_ogg, args=(target_lufs, input_files))
else:
thread = threading.Thread(target=self.convert_ogg_to_audio, args=(input_files,))
thread.start()
except Exception as e:
print(f"转换初始化失败: {str(e)}")
def convert_audio_to_ogg(self, target_lufs, input_files):
"""音频转到ogg转换逻辑"""
for input_path in input_files:
try:
filename = os.path.basename(input_path)
base_name = os.path.splitext(filename)[0]
output_path = os.path.join(self.output_dir.get(), f"{base_name}.ogg")
print(f"正在转换: {filename}")
(
ffmpeg
.input(input_path)
.output(output_path, acodec='libopus', audio_bitrate='16k', ac=1, ar=16000, frame_duration=60)
.run(overwrite_output=True)
)
print(f"转换成功: {filename}\n")
except Exception as e:
print(f"转换失败: {str(e)}\n")
def convert_ogg_to_audio(self, input_files):
"""ogg转回音频转换逻辑"""
for input_path in input_files:
try:
filename = os.path.basename(input_path)
base_name = os.path.splitext(filename)[0]
output_path = os.path.join(self.output_dir.get(), f"{base_name}.ogg")
print(f"正在转换: {filename}")
(
ffmpeg
.input(input_path)
.output(output_path, acodec='libopus', audio_bitrate='16k', ac=1, ar=16000, frame_duration=60)
.run(overwrite_output=True)
)
print(f"转换成功: {filename}\n")
except Exception as e:
print(f"转换失败: {str(e)}\n")
if __name__ == "__main__":
root = tk.Tk()
app = AudioConverterApp(root)
root.mainloop()

View File

@@ -1,95 +1,95 @@
# P3音频格式转换与播放工具
这个目录包含两个用于处理P3格式音频文件的Python脚本
## 1. 音频转换工具 (convert_audio_to_p3.py)
将普通音频文件转换为P3格式4字节header + Opus数据包的流式结构并进行响度标准化。
### 使用方法
```bash
python convert_audio_to_p3.py <输入音频文件> <输出P3文件> [-l LUFS] [-d]
```
其中,可选选项 `-l` 用于指定响度标准化的目标响度,默认为 -16 LUFS可选选项 `-d` 可以禁用响度标准化。
如果输入的音频文件符合下面的任一条件,建议使用 `-d` 禁用响度标准化:
- 音频过短
- 音频已经调整过响度
- 音频来自默认 TTS (小智当前使用的 TTS 的默认响度已是 -16 LUFS
例如:
```bash
python convert_audio_to_p3.py input.mp3 output.p3
```
## 2. P3音频播放工具 (play_p3.py)
播放P3格式的音频文件。
### 特性
- 解码并播放P3格式的音频文件
- 在播放结束或用户中断时应用淡出效果,避免破音
- 支持通过命令行参数指定要播放的文件
### 使用方法
```bash
python play_p3.py <P3文件路径>
```
例如:
```bash
python play_p3.py output.p3
```
## 3. 音频转回工具 (convert_p3_to_audio.py)
将P3格式转换回普通音频文件。
### 使用方法
```bash
python convert_p3_to_audio.py <输入P3文件> <输出音频文件>
```
输出音频文件需要有扩展名。
例如:
```bash
python convert_p3_to_audio.py input.p3 output.wav
```
## 4. 音频/P3批量转换工具
一个图形化的工具支持批量转换音频到P3P3到音频
![](./img/img.png)
### 使用方法:
```bash
python batch_convert_gui.py
```
## 依赖安装
在使用这些脚本前请确保安装了所需的Python库
```bash
pip install librosa opuslib numpy tqdm sounddevice pyloudnorm soundfile
```
或者使用提供的requirements.txt文件
```bash
pip install -r requirements.txt
```
## P3格式说明
P3格式是一种简单的流式音频格式结构如下
- 每个音频帧由一个4字节的头部和一个Opus编码的数据包组成
- 头部格式:[1字节类型, 1字节保留, 2字节长度]
- 采样率固定为16000Hz单声道
# P3音频格式转换与播放工具
这个目录包含两个用于处理P3格式音频文件的Python脚本
## 1. 音频转换工具 (convert_audio_to_p3.py)
将普通音频文件转换为P3格式4字节header + Opus数据包的流式结构并进行响度标准化。
### 使用方法
```bash
python convert_audio_to_p3.py <输入音频文件> <输出P3文件> [-l LUFS] [-d]
```
其中,可选选项 `-l` 用于指定响度标准化的目标响度,默认为 -16 LUFS可选选项 `-d` 可以禁用响度标准化。
如果输入的音频文件符合下面的任一条件,建议使用 `-d` 禁用响度标准化:
- 音频过短
- 音频已经调整过响度
- 音频来自默认 TTS (小智当前使用的 TTS 的默认响度已是 -16 LUFS
例如:
```bash
python convert_audio_to_p3.py input.mp3 output.p3
```
## 2. P3音频播放工具 (play_p3.py)
播放P3格式的音频文件。
### 特性
- 解码并播放P3格式的音频文件
- 在播放结束或用户中断时应用淡出效果,避免破音
- 支持通过命令行参数指定要播放的文件
### 使用方法
```bash
python play_p3.py <P3文件路径>
```
例如:
```bash
python play_p3.py output.p3
```
## 3. 音频转回工具 (convert_p3_to_audio.py)
将P3格式转换回普通音频文件。
### 使用方法
```bash
python convert_p3_to_audio.py <输入P3文件> <输出音频文件>
```
输出音频文件需要有扩展名。
例如:
```bash
python convert_p3_to_audio.py input.p3 output.wav
```
## 4. 音频/P3批量转换工具
一个图形化的工具支持批量转换音频到P3P3到音频
![](./img/img.png)
### 使用方法:
```bash
python batch_convert_gui.py
```
## 依赖安装
在使用这些脚本前请确保安装了所需的Python库
```bash
pip install librosa opuslib numpy tqdm sounddevice pyloudnorm soundfile
```
或者使用提供的requirements.txt文件
```bash
pip install -r requirements.txt
```
## P3格式说明
P3格式是一种简单的流式音频格式结构如下
- 每个音频帧由一个4字节的头部和一个Opus编码的数据包组成
- 头部格式:[1字节类型, 1字节保留, 2字节长度]
- 采样率固定为16000Hz单声道
- 每帧时长为60ms

View File

@@ -1,221 +1,221 @@
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import os
import threading
import sys
from convert_audio_to_p3 import encode_audio_to_opus
from convert_p3_to_audio import decode_p3_to_audio
class AudioConverterApp:
def __init__(self, master):
self.master = master
master.title("音频/P3 批量转换工具")
master.geometry("680x600") # 调整窗口高度
# 初始化变量
self.mode = tk.StringVar(value="audio_to_p3")
self.output_dir = tk.StringVar()
self.output_dir.set(os.path.abspath("output"))
self.enable_loudnorm = tk.BooleanVar(value=True)
self.target_lufs = tk.DoubleVar(value=-16.0)
# 创建UI组件
self.create_widgets()
self.redirect_output()
def create_widgets(self):
# 模式选择
mode_frame = ttk.LabelFrame(self.master, text="转换模式")
mode_frame.grid(row=0, column=0, padx=10, pady=5, sticky="ew")
ttk.Radiobutton(mode_frame, text="音频转P3", variable=self.mode,
value="audio_to_p3", command=self.toggle_settings,
width=12).grid(row=0, column=0, padx=5)
ttk.Radiobutton(mode_frame, text="P3转音频", variable=self.mode,
value="p3_to_audio", command=self.toggle_settings,
width=12).grid(row=0, column=1, padx=5)
# 响度设置
self.loudnorm_frame = ttk.Frame(self.master)
self.loudnorm_frame.grid(row=1, column=0, padx=10, pady=5, sticky="ew")
ttk.Checkbutton(self.loudnorm_frame, text="启用响度调整",
variable=self.enable_loudnorm, width=15
).grid(row=0, column=0, padx=2)
ttk.Entry(self.loudnorm_frame, textvariable=self.target_lufs,
width=6).grid(row=0, column=1, padx=2)
ttk.Label(self.loudnorm_frame, text="LUFS").grid(row=0, column=2, padx=2)
# 文件选择
file_frame = ttk.LabelFrame(self.master, text="输入文件")
file_frame.grid(row=2, column=0, padx=10, pady=5, sticky="nsew")
# 文件操作按钮
ttk.Button(file_frame, text="选择文件", command=self.select_files,
width=12).grid(row=0, column=0, padx=5, pady=2)
ttk.Button(file_frame, text="移除选中", command=self.remove_selected,
width=12).grid(row=0, column=1, padx=5, pady=2)
ttk.Button(file_frame, text="清空列表", command=self.clear_files,
width=12).grid(row=0, column=2, padx=5, pady=2)
# 文件列表使用Treeview
self.tree = ttk.Treeview(file_frame, columns=("selected", "filename"),
show="headings", height=8)
self.tree.heading("selected", text="选中", anchor=tk.W)
self.tree.heading("filename", text="文件名", anchor=tk.W)
self.tree.column("selected", width=60, anchor=tk.W)
self.tree.column("filename", width=600, anchor=tk.W)
self.tree.grid(row=1, column=0, columnspan=3, sticky="nsew", padx=5, pady=2)
self.tree.bind("<ButtonRelease-1>", self.on_tree_click)
# 输出目录
output_frame = ttk.LabelFrame(self.master, text="输出目录")
output_frame.grid(row=3, column=0, padx=10, pady=5, sticky="ew")
ttk.Entry(output_frame, textvariable=self.output_dir, width=60
).grid(row=0, column=0, padx=5, sticky="ew")
ttk.Button(output_frame, text="浏览", command=self.select_output_dir,
width=8).grid(row=0, column=1, padx=5)
# 转换按钮区域
button_frame = ttk.Frame(self.master)
button_frame.grid(row=4, column=0, padx=10, pady=10, sticky="ew")
ttk.Button(button_frame, text="转换全部文件", command=lambda: self.start_conversion(True),
width=15).pack(side=tk.LEFT, padx=5)
ttk.Button(button_frame, text="转换选中文件", command=lambda: self.start_conversion(False),
width=15).pack(side=tk.LEFT, padx=5)
# 日志区域
log_frame = ttk.LabelFrame(self.master, text="日志")
log_frame.grid(row=5, column=0, padx=10, pady=5, sticky="nsew")
self.log_text = tk.Text(log_frame, height=14, width=80)
self.log_text.pack(fill=tk.BOTH, expand=True)
# 配置布局权重
self.master.columnconfigure(0, weight=1)
self.master.rowconfigure(2, weight=1)
self.master.rowconfigure(5, weight=3)
file_frame.columnconfigure(0, weight=1)
file_frame.rowconfigure(1, weight=1)
def toggle_settings(self):
if self.mode.get() == "audio_to_p3":
self.loudnorm_frame.grid()
else:
self.loudnorm_frame.grid_remove()
def select_files(self):
file_types = [
("音频文件", "*.wav *.mp3 *.ogg *.flac") if self.mode.get() == "audio_to_p3"
else ("P3文件", "*.p3")
]
files = filedialog.askopenfilenames(filetypes=file_types)
for f in files:
self.tree.insert("", tk.END, values=("[ ]", os.path.basename(f)), tags=(f,))
def on_tree_click(self, event):
"""处理复选框点击事件"""
region = self.tree.identify("region", event.x, event.y)
if region == "cell":
col = self.tree.identify_column(event.x)
item = self.tree.identify_row(event.y)
if col == "#1": # 点击的是选中列
current_val = self.tree.item(item, "values")[0]
new_val = "[√]" if current_val == "[ ]" else "[ ]"
self.tree.item(item, values=(new_val, self.tree.item(item, "values")[1]))
def remove_selected(self):
"""移除选中的文件"""
to_remove = []
for item in self.tree.get_children():
if self.tree.item(item, "values")[0] == "[√]":
to_remove.append(item)
for item in reversed(to_remove):
self.tree.delete(item)
def clear_files(self):
"""清空所有文件"""
for item in self.tree.get_children():
self.tree.delete(item)
def select_output_dir(self):
path = filedialog.askdirectory()
if path:
self.output_dir.set(path)
def redirect_output(self):
class StdoutRedirector:
def __init__(self, text_widget):
self.text_widget = text_widget
self.original_stdout = sys.stdout
def write(self, message):
self.text_widget.insert(tk.END, message)
self.text_widget.see(tk.END)
self.original_stdout.write(message)
def flush(self):
self.original_stdout.flush()
sys.stdout = StdoutRedirector(self.log_text)
def start_conversion(self, convert_all):
"""开始转换"""
input_files = []
for item in self.tree.get_children():
if convert_all or self.tree.item(item, "values")[0] == "[√]":
input_files.append(self.tree.item(item, "tags")[0])
if not input_files:
msg = "没有找到可转换的文件" if convert_all else "没有选中任何文件"
messagebox.showwarning("警告", msg)
return
os.makedirs(self.output_dir.get(), exist_ok=True)
try:
if self.mode.get() == "audio_to_p3":
target_lufs = self.target_lufs.get() if self.enable_loudnorm.get() else None
thread = threading.Thread(target=self.convert_audio_to_p3, args=(target_lufs, input_files))
else:
thread = threading.Thread(target=self.convert_p3_to_audio, args=(input_files,))
thread.start()
except Exception as e:
print(f"转换初始化失败: {str(e)}")
def convert_audio_to_p3(self, target_lufs, input_files):
"""音频转P3转换逻辑"""
for input_path in input_files:
try:
filename = os.path.basename(input_path)
base_name = os.path.splitext(filename)[0]
output_path = os.path.join(self.output_dir.get(), f"{base_name}.p3")
print(f"正在转换: {filename}")
encode_audio_to_opus(input_path, output_path, target_lufs)
print(f"转换成功: {filename}\n")
except Exception as e:
print(f"转换失败: {str(e)}\n")
def convert_p3_to_audio(self, input_files):
"""P3转音频转换逻辑"""
for input_path in input_files:
try:
filename = os.path.basename(input_path)
base_name = os.path.splitext(filename)[0]
output_path = os.path.join(self.output_dir.get(), f"{base_name}.wav")
print(f"正在转换: {filename}")
decode_p3_to_audio(input_path, output_path)
print(f"转换成功: {filename}\n")
except Exception as e:
print(f"转换失败: {str(e)}\n")
if __name__ == "__main__":
root = tk.Tk()
app = AudioConverterApp(root)
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import os
import threading
import sys
from convert_audio_to_p3 import encode_audio_to_opus
from convert_p3_to_audio import decode_p3_to_audio
class AudioConverterApp:
def __init__(self, master):
self.master = master
master.title("音频/P3 批量转换工具")
master.geometry("680x600") # 调整窗口高度
# 初始化变量
self.mode = tk.StringVar(value="audio_to_p3")
self.output_dir = tk.StringVar()
self.output_dir.set(os.path.abspath("output"))
self.enable_loudnorm = tk.BooleanVar(value=True)
self.target_lufs = tk.DoubleVar(value=-16.0)
# 创建UI组件
self.create_widgets()
self.redirect_output()
def create_widgets(self):
# 模式选择
mode_frame = ttk.LabelFrame(self.master, text="转换模式")
mode_frame.grid(row=0, column=0, padx=10, pady=5, sticky="ew")
ttk.Radiobutton(mode_frame, text="音频转P3", variable=self.mode,
value="audio_to_p3", command=self.toggle_settings,
width=12).grid(row=0, column=0, padx=5)
ttk.Radiobutton(mode_frame, text="P3转音频", variable=self.mode,
value="p3_to_audio", command=self.toggle_settings,
width=12).grid(row=0, column=1, padx=5)
# 响度设置
self.loudnorm_frame = ttk.Frame(self.master)
self.loudnorm_frame.grid(row=1, column=0, padx=10, pady=5, sticky="ew")
ttk.Checkbutton(self.loudnorm_frame, text="启用响度调整",
variable=self.enable_loudnorm, width=15
).grid(row=0, column=0, padx=2)
ttk.Entry(self.loudnorm_frame, textvariable=self.target_lufs,
width=6).grid(row=0, column=1, padx=2)
ttk.Label(self.loudnorm_frame, text="LUFS").grid(row=0, column=2, padx=2)
# 文件选择
file_frame = ttk.LabelFrame(self.master, text="输入文件")
file_frame.grid(row=2, column=0, padx=10, pady=5, sticky="nsew")
# 文件操作按钮
ttk.Button(file_frame, text="选择文件", command=self.select_files,
width=12).grid(row=0, column=0, padx=5, pady=2)
ttk.Button(file_frame, text="移除选中", command=self.remove_selected,
width=12).grid(row=0, column=1, padx=5, pady=2)
ttk.Button(file_frame, text="清空列表", command=self.clear_files,
width=12).grid(row=0, column=2, padx=5, pady=2)
# 文件列表使用Treeview
self.tree = ttk.Treeview(file_frame, columns=("selected", "filename"),
show="headings", height=8)
self.tree.heading("selected", text="选中", anchor=tk.W)
self.tree.heading("filename", text="文件名", anchor=tk.W)
self.tree.column("selected", width=60, anchor=tk.W)
self.tree.column("filename", width=600, anchor=tk.W)
self.tree.grid(row=1, column=0, columnspan=3, sticky="nsew", padx=5, pady=2)
self.tree.bind("<ButtonRelease-1>", self.on_tree_click)
# 输出目录
output_frame = ttk.LabelFrame(self.master, text="输出目录")
output_frame.grid(row=3, column=0, padx=10, pady=5, sticky="ew")
ttk.Entry(output_frame, textvariable=self.output_dir, width=60
).grid(row=0, column=0, padx=5, sticky="ew")
ttk.Button(output_frame, text="浏览", command=self.select_output_dir,
width=8).grid(row=0, column=1, padx=5)
# 转换按钮区域
button_frame = ttk.Frame(self.master)
button_frame.grid(row=4, column=0, padx=10, pady=10, sticky="ew")
ttk.Button(button_frame, text="转换全部文件", command=lambda: self.start_conversion(True),
width=15).pack(side=tk.LEFT, padx=5)
ttk.Button(button_frame, text="转换选中文件", command=lambda: self.start_conversion(False),
width=15).pack(side=tk.LEFT, padx=5)
# 日志区域
log_frame = ttk.LabelFrame(self.master, text="日志")
log_frame.grid(row=5, column=0, padx=10, pady=5, sticky="nsew")
self.log_text = tk.Text(log_frame, height=14, width=80)
self.log_text.pack(fill=tk.BOTH, expand=True)
# 配置布局权重
self.master.columnconfigure(0, weight=1)
self.master.rowconfigure(2, weight=1)
self.master.rowconfigure(5, weight=3)
file_frame.columnconfigure(0, weight=1)
file_frame.rowconfigure(1, weight=1)
def toggle_settings(self):
if self.mode.get() == "audio_to_p3":
self.loudnorm_frame.grid()
else:
self.loudnorm_frame.grid_remove()
def select_files(self):
file_types = [
("音频文件", "*.wav *.mp3 *.ogg *.flac") if self.mode.get() == "audio_to_p3"
else ("P3文件", "*.p3")
]
files = filedialog.askopenfilenames(filetypes=file_types)
for f in files:
self.tree.insert("", tk.END, values=("[ ]", os.path.basename(f)), tags=(f,))
def on_tree_click(self, event):
"""处理复选框点击事件"""
region = self.tree.identify("region", event.x, event.y)
if region == "cell":
col = self.tree.identify_column(event.x)
item = self.tree.identify_row(event.y)
if col == "#1": # 点击的是选中列
current_val = self.tree.item(item, "values")[0]
new_val = "[√]" if current_val == "[ ]" else "[ ]"
self.tree.item(item, values=(new_val, self.tree.item(item, "values")[1]))
def remove_selected(self):
"""移除选中的文件"""
to_remove = []
for item in self.tree.get_children():
if self.tree.item(item, "values")[0] == "[√]":
to_remove.append(item)
for item in reversed(to_remove):
self.tree.delete(item)
def clear_files(self):
"""清空所有文件"""
for item in self.tree.get_children():
self.tree.delete(item)
def select_output_dir(self):
path = filedialog.askdirectory()
if path:
self.output_dir.set(path)
def redirect_output(self):
class StdoutRedirector:
def __init__(self, text_widget):
self.text_widget = text_widget
self.original_stdout = sys.stdout
def write(self, message):
self.text_widget.insert(tk.END, message)
self.text_widget.see(tk.END)
self.original_stdout.write(message)
def flush(self):
self.original_stdout.flush()
sys.stdout = StdoutRedirector(self.log_text)
def start_conversion(self, convert_all):
"""开始转换"""
input_files = []
for item in self.tree.get_children():
if convert_all or self.tree.item(item, "values")[0] == "[√]":
input_files.append(self.tree.item(item, "tags")[0])
if not input_files:
msg = "没有找到可转换的文件" if convert_all else "没有选中任何文件"
messagebox.showwarning("警告", msg)
return
os.makedirs(self.output_dir.get(), exist_ok=True)
try:
if self.mode.get() == "audio_to_p3":
target_lufs = self.target_lufs.get() if self.enable_loudnorm.get() else None
thread = threading.Thread(target=self.convert_audio_to_p3, args=(target_lufs, input_files))
else:
thread = threading.Thread(target=self.convert_p3_to_audio, args=(input_files,))
thread.start()
except Exception as e:
print(f"转换初始化失败: {str(e)}")
def convert_audio_to_p3(self, target_lufs, input_files):
"""音频转P3转换逻辑"""
for input_path in input_files:
try:
filename = os.path.basename(input_path)
base_name = os.path.splitext(filename)[0]
output_path = os.path.join(self.output_dir.get(), f"{base_name}.p3")
print(f"正在转换: {filename}")
encode_audio_to_opus(input_path, output_path, target_lufs)
print(f"转换成功: {filename}\n")
except Exception as e:
print(f"转换失败: {str(e)}\n")
def convert_p3_to_audio(self, input_files):
"""P3转音频转换逻辑"""
for input_path in input_files:
try:
filename = os.path.basename(input_path)
base_name = os.path.splitext(filename)[0]
output_path = os.path.join(self.output_dir.get(), f"{base_name}.wav")
print(f"正在转换: {filename}")
decode_p3_to_audio(input_path, output_path)
print(f"转换成功: {filename}\n")
except Exception as e:
print(f"转换失败: {str(e)}\n")
if __name__ == "__main__":
root = tk.Tk()
app = AudioConverterApp(root)
root.mainloop()

View File

@@ -1,62 +1,62 @@
# convert audio files to protocol v3 stream
import librosa
import opuslib
import struct
import sys
import tqdm
import numpy as np
import argparse
import pyloudnorm as pyln
def encode_audio_to_opus(input_file, output_file, target_lufs=None):
# Load audio file using librosa
audio, sample_rate = librosa.load(input_file, sr=None, mono=False, dtype=np.float32)
# Convert to mono if stereo
if audio.ndim == 2:
audio = librosa.to_mono(audio)
if target_lufs is not None:
print("Note: Automatic loudness adjustment is enabled, which may cause", file=sys.stderr)
print(" audio distortion. If the input audio has already been ", file=sys.stderr)
print(" loudness-adjusted or if the input audio is TTS audio, ", file=sys.stderr)
print(" please use the `-d` parameter to disable loudness adjustment.", file=sys.stderr)
meter = pyln.Meter(sample_rate)
current_loudness = meter.integrated_loudness(audio)
audio = pyln.normalize.loudness(audio, current_loudness, target_lufs)
print(f"Adjusted loudness: {current_loudness:.1f} LUFS -> {target_lufs} LUFS")
# Convert sample rate to 16000Hz if necessary
target_sample_rate = 16000
if sample_rate != target_sample_rate:
audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=target_sample_rate)
sample_rate = target_sample_rate
# Convert audio data back to int16 after processing
audio = (audio * 32767).astype(np.int16)
# Initialize Opus encoder
encoder = opuslib.Encoder(sample_rate, 1, opuslib.APPLICATION_AUDIO)
# Encode and save
with open(output_file, 'wb') as f:
duration = 60 # 60ms per frame
frame_size = int(sample_rate * duration / 1000)
for i in tqdm.tqdm(range(0, len(audio) - frame_size, frame_size)):
frame = audio[i:i + frame_size]
opus_data = encoder.encode(frame.tobytes(), frame_size=frame_size)
packet = struct.pack('>BBH', 0, 0, len(opus_data)) + opus_data
f.write(packet)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Convert audio to Opus with loudness normalization')
parser.add_argument('input_file', help='Input audio file')
parser.add_argument('output_file', help='Output .opus file')
parser.add_argument('-l', '--lufs', type=float, default=-16.0,
help='Target loudness in LUFS (default: -16)')
parser.add_argument('-d', '--disable-loudnorm', action='store_true',
help='Disable loudness normalization')
args = parser.parse_args()
target_lufs = None if args.disable_loudnorm else args.lufs
# convert audio files to protocol v3 stream
import librosa
import opuslib
import struct
import sys
import tqdm
import numpy as np
import argparse
import pyloudnorm as pyln
def encode_audio_to_opus(input_file, output_file, target_lufs=None):
# Load audio file using librosa
audio, sample_rate = librosa.load(input_file, sr=None, mono=False, dtype=np.float32)
# Convert to mono if stereo
if audio.ndim == 2:
audio = librosa.to_mono(audio)
if target_lufs is not None:
print("Note: Automatic loudness adjustment is enabled, which may cause", file=sys.stderr)
print(" audio distortion. If the input audio has already been ", file=sys.stderr)
print(" loudness-adjusted or if the input audio is TTS audio, ", file=sys.stderr)
print(" please use the `-d` parameter to disable loudness adjustment.", file=sys.stderr)
meter = pyln.Meter(sample_rate)
current_loudness = meter.integrated_loudness(audio)
audio = pyln.normalize.loudness(audio, current_loudness, target_lufs)
print(f"Adjusted loudness: {current_loudness:.1f} LUFS -> {target_lufs} LUFS")
# Convert sample rate to 16000Hz if necessary
target_sample_rate = 16000
if sample_rate != target_sample_rate:
audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=target_sample_rate)
sample_rate = target_sample_rate
# Convert audio data back to int16 after processing
audio = (audio * 32767).astype(np.int16)
# Initialize Opus encoder
encoder = opuslib.Encoder(sample_rate, 1, opuslib.APPLICATION_AUDIO)
# Encode and save
with open(output_file, 'wb') as f:
duration = 60 # 60ms per frame
frame_size = int(sample_rate * duration / 1000)
for i in tqdm.tqdm(range(0, len(audio) - frame_size, frame_size)):
frame = audio[i:i + frame_size]
opus_data = encoder.encode(frame.tobytes(), frame_size=frame_size)
packet = struct.pack('>BBH', 0, 0, len(opus_data)) + opus_data
f.write(packet)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Convert audio to Opus with loudness normalization')
parser.add_argument('input_file', help='Input audio file')
parser.add_argument('output_file', help='Output .opus file')
parser.add_argument('-l', '--lufs', type=float, default=-16.0,
help='Target loudness in LUFS (default: -16)')
parser.add_argument('-d', '--disable-loudnorm', action='store_true',
help='Disable loudness normalization')
args = parser.parse_args()
target_lufs = None if args.disable_loudnorm else args.lufs
encode_audio_to_opus(args.input_file, args.output_file, target_lufs)

View File

@@ -1,51 +1,51 @@
import struct
import sys
import opuslib
import numpy as np
from tqdm import tqdm
import soundfile as sf
def decode_p3_to_audio(input_file, output_file):
sample_rate = 16000
channels = 1
decoder = opuslib.Decoder(sample_rate, channels)
pcm_frames = []
frame_size = int(sample_rate * 60 / 1000)
with open(input_file, "rb") as f:
f.seek(0, 2)
total_size = f.tell()
f.seek(0)
with tqdm(total=total_size, unit="B", unit_scale=True) as pbar:
while True:
header = f.read(4)
if not header or len(header) < 4:
break
pkt_type, reserved, opus_len = struct.unpack(">BBH", header)
opus_data = f.read(opus_len)
if len(opus_data) != opus_len:
break
pcm = decoder.decode(opus_data, frame_size)
pcm_frames.append(np.frombuffer(pcm, dtype=np.int16))
pbar.update(4 + opus_len)
if not pcm_frames:
raise ValueError("No valid audio data found")
pcm_data = np.concatenate(pcm_frames)
sf.write(output_file, pcm_data, sample_rate, subtype="PCM_16")
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python convert_p3_to_audio.py <input.p3> <output.wav>")
sys.exit(1)
decode_p3_to_audio(sys.argv[1], sys.argv[2])
import struct
import sys
import opuslib
import numpy as np
from tqdm import tqdm
import soundfile as sf
def decode_p3_to_audio(input_file, output_file):
sample_rate = 16000
channels = 1
decoder = opuslib.Decoder(sample_rate, channels)
pcm_frames = []
frame_size = int(sample_rate * 60 / 1000)
with open(input_file, "rb") as f:
f.seek(0, 2)
total_size = f.tell()
f.seek(0)
with tqdm(total=total_size, unit="B", unit_scale=True) as pbar:
while True:
header = f.read(4)
if not header or len(header) < 4:
break
pkt_type, reserved, opus_len = struct.unpack(">BBH", header)
opus_data = f.read(opus_len)
if len(opus_data) != opus_len:
break
pcm = decoder.decode(opus_data, frame_size)
pcm_frames.append(np.frombuffer(pcm, dtype=np.int16))
pbar.update(4 + opus_len)
if not pcm_frames:
raise ValueError("No valid audio data found")
pcm_data = np.concatenate(pcm_frames)
sf.write(output_file, pcm_data, sample_rate, subtype="PCM_16")
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python convert_p3_to_audio.py <input.p3> <output.wav>")
sys.exit(1)
decode_p3_to_audio(sys.argv[1], sys.argv[2])

View File

@@ -1,241 +1,241 @@
import tkinter as tk
from tkinter import filedialog, messagebox
import threading
import time
import opuslib
import struct
import numpy as np
import sounddevice as sd
import os
def play_p3_file(input_file, stop_event=None, pause_event=None):
"""
播放p3格式的音频文件
p3格式: [1字节类型, 1字节保留, 2字节长度, Opus数据]
"""
# 初始化Opus解码器
sample_rate = 16000 # 采样率固定为16000Hz
channels = 1 # 单声道
decoder = opuslib.Decoder(sample_rate, channels)
# 帧大小 (60ms)
frame_size = int(sample_rate * 60 / 1000)
# 打开音频流
stream = sd.OutputStream(
samplerate=sample_rate,
channels=channels,
dtype='int16'
)
stream.start()
try:
with open(input_file, 'rb') as f:
print(f"正在播放: {input_file}")
while True:
if stop_event and stop_event.is_set():
break
if pause_event and pause_event.is_set():
time.sleep(0.1)
continue
# 读取头部 (4字节)
header = f.read(4)
if not header or len(header) < 4:
break
# 解析头部
packet_type, reserved, data_len = struct.unpack('>BBH', header)
# 读取Opus数据
opus_data = f.read(data_len)
if not opus_data or len(opus_data) < data_len:
break
# 解码Opus数据
pcm_data = decoder.decode(opus_data, frame_size)
# 将字节转换为numpy数组
audio_array = np.frombuffer(pcm_data, dtype=np.int16)
# 播放音频
stream.write(audio_array)
except KeyboardInterrupt:
print("\n播放已停止")
finally:
stream.stop()
stream.close()
print("播放完成")
class P3PlayerApp:
def __init__(self, root):
self.root = root
self.root.title("P3 文件简易播放器")
self.root.geometry("500x400")
self.playlist = []
self.current_index = 0
self.is_playing = False
self.is_paused = False
self.stop_event = threading.Event()
self.pause_event = threading.Event()
self.loop_playback = tk.BooleanVar(value=False) # 循环播放复选框的状态
# 创建界面组件
self.create_widgets()
def create_widgets(self):
# 播放列表
self.playlist_label = tk.Label(self.root, text="播放列表:")
self.playlist_label.pack(pady=5)
self.playlist_frame = tk.Frame(self.root)
self.playlist_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
self.playlist_listbox = tk.Listbox(self.playlist_frame, selectmode=tk.SINGLE)
self.playlist_listbox.pack(fill=tk.BOTH, expand=True)
# 复选框和移除按钮
self.checkbox_frame = tk.Frame(self.root)
self.checkbox_frame.pack(pady=5)
self.remove_button = tk.Button(self.checkbox_frame, text="移除文件", command=self.remove_files)
self.remove_button.pack(side=tk.LEFT, padx=5)
# 循环播放复选框
self.loop_checkbox = tk.Checkbutton(self.checkbox_frame, text="循环播放", variable=self.loop_playback)
self.loop_checkbox.pack(side=tk.LEFT, padx=5)
# 控制按钮
self.control_frame = tk.Frame(self.root)
self.control_frame.pack(pady=10)
self.add_button = tk.Button(self.control_frame, text="添加文件", command=self.add_file)
self.add_button.grid(row=0, column=0, padx=5)
self.play_button = tk.Button(self.control_frame, text="播放", command=self.play)
self.play_button.grid(row=0, column=1, padx=5)
self.pause_button = tk.Button(self.control_frame, text="暂停", command=self.pause)
self.pause_button.grid(row=0, column=2, padx=5)
self.stop_button = tk.Button(self.control_frame, text="停止", command=self.stop)
self.stop_button.grid(row=0, column=3, padx=5)
# 状态标签
self.status_label = tk.Label(self.root, text="未在播放", fg="blue")
self.status_label.pack(pady=10)
def add_file(self):
files = filedialog.askopenfilenames(filetypes=[("P3 文件", "*.p3")])
if files:
self.playlist.extend(files)
self.update_playlist()
def update_playlist(self):
self.playlist_listbox.delete(0, tk.END)
for file in self.playlist:
self.playlist_listbox.insert(tk.END, os.path.basename(file)) # 仅显示文件名
def update_status(self, status_text, color="blue"):
"""更新状态标签的内容"""
self.status_label.config(text=status_text, fg=color)
def play(self):
if not self.playlist:
messagebox.showwarning("警告", "播放列表为空!")
return
if self.is_paused:
self.is_paused = False
self.pause_event.clear()
self.update_status(f"正在播放:{os.path.basename(self.playlist[self.current_index])}", "green")
return
if self.is_playing:
return
self.is_playing = True
self.stop_event.clear()
self.pause_event.clear()
self.current_index = self.playlist_listbox.curselection()[0] if self.playlist_listbox.curselection() else 0
self.play_thread = threading.Thread(target=self.play_audio, daemon=True)
self.play_thread.start()
self.update_status(f"正在播放:{os.path.basename(self.playlist[self.current_index])}", "green")
def play_audio(self):
while True:
if self.stop_event.is_set():
break
if self.pause_event.is_set():
time.sleep(0.1)
continue
# 检查当前索引是否有效
if self.current_index >= len(self.playlist):
if self.loop_playback.get(): # 如果勾选了循环播放
self.current_index = 0 # 回到第一首
else:
break # 否则停止播放
file = self.playlist[self.current_index]
self.playlist_listbox.selection_clear(0, tk.END)
self.playlist_listbox.selection_set(self.current_index)
self.playlist_listbox.activate(self.current_index)
self.update_status(f"正在播放:{os.path.basename(self.playlist[self.current_index])}", "green")
play_p3_file(file, self.stop_event, self.pause_event)
if self.stop_event.is_set():
break
if not self.loop_playback.get(): # 如果没有勾选循环播放
break # 播放完当前文件后停止
self.current_index += 1
if self.current_index >= len(self.playlist):
if self.loop_playback.get(): # 如果勾选了循环播放
self.current_index = 0 # 回到第一首
self.is_playing = False
self.is_paused = False
self.update_status("播放已停止", "red")
def pause(self):
if self.is_playing:
self.is_paused = not self.is_paused
if self.is_paused:
self.pause_event.set()
self.update_status("播放已暂停", "orange")
else:
self.pause_event.clear()
self.update_status(f"正在播放:{os.path.basename(self.playlist[self.current_index])}", "green")
def stop(self):
if self.is_playing or self.is_paused:
self.is_playing = False
self.is_paused = False
self.stop_event.set()
self.pause_event.clear()
self.update_status("播放已停止", "red")
def remove_files(self):
selected_indices = self.playlist_listbox.curselection()
if not selected_indices:
messagebox.showwarning("警告", "请先选择要移除的文件!")
return
for index in reversed(selected_indices):
self.playlist.pop(index)
self.update_playlist()
if __name__ == "__main__":
root = tk.Tk()
app = P3PlayerApp(root)
root.mainloop()
import tkinter as tk
from tkinter import filedialog, messagebox
import threading
import time
import opuslib
import struct
import numpy as np
import sounddevice as sd
import os
def play_p3_file(input_file, stop_event=None, pause_event=None):
"""
播放p3格式的音频文件
p3格式: [1字节类型, 1字节保留, 2字节长度, Opus数据]
"""
# 初始化Opus解码器
sample_rate = 16000 # 采样率固定为16000Hz
channels = 1 # 单声道
decoder = opuslib.Decoder(sample_rate, channels)
# 帧大小 (60ms)
frame_size = int(sample_rate * 60 / 1000)
# 打开音频流
stream = sd.OutputStream(
samplerate=sample_rate,
channels=channels,
dtype='int16'
)
stream.start()
try:
with open(input_file, 'rb') as f:
print(f"正在播放: {input_file}")
while True:
if stop_event and stop_event.is_set():
break
if pause_event and pause_event.is_set():
time.sleep(0.1)
continue
# 读取头部 (4字节)
header = f.read(4)
if not header or len(header) < 4:
break
# 解析头部
packet_type, reserved, data_len = struct.unpack('>BBH', header)
# 读取Opus数据
opus_data = f.read(data_len)
if not opus_data or len(opus_data) < data_len:
break
# 解码Opus数据
pcm_data = decoder.decode(opus_data, frame_size)
# 将字节转换为numpy数组
audio_array = np.frombuffer(pcm_data, dtype=np.int16)
# 播放音频
stream.write(audio_array)
except KeyboardInterrupt:
print("\n播放已停止")
finally:
stream.stop()
stream.close()
print("播放完成")
class P3PlayerApp:
def __init__(self, root):
self.root = root
self.root.title("P3 文件简易播放器")
self.root.geometry("500x400")
self.playlist = []
self.current_index = 0
self.is_playing = False
self.is_paused = False
self.stop_event = threading.Event()
self.pause_event = threading.Event()
self.loop_playback = tk.BooleanVar(value=False) # 循环播放复选框的状态
# 创建界面组件
self.create_widgets()
def create_widgets(self):
# 播放列表
self.playlist_label = tk.Label(self.root, text="播放列表:")
self.playlist_label.pack(pady=5)
self.playlist_frame = tk.Frame(self.root)
self.playlist_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
self.playlist_listbox = tk.Listbox(self.playlist_frame, selectmode=tk.SINGLE)
self.playlist_listbox.pack(fill=tk.BOTH, expand=True)
# 复选框和移除按钮
self.checkbox_frame = tk.Frame(self.root)
self.checkbox_frame.pack(pady=5)
self.remove_button = tk.Button(self.checkbox_frame, text="移除文件", command=self.remove_files)
self.remove_button.pack(side=tk.LEFT, padx=5)
# 循环播放复选框
self.loop_checkbox = tk.Checkbutton(self.checkbox_frame, text="循环播放", variable=self.loop_playback)
self.loop_checkbox.pack(side=tk.LEFT, padx=5)
# 控制按钮
self.control_frame = tk.Frame(self.root)
self.control_frame.pack(pady=10)
self.add_button = tk.Button(self.control_frame, text="添加文件", command=self.add_file)
self.add_button.grid(row=0, column=0, padx=5)
self.play_button = tk.Button(self.control_frame, text="播放", command=self.play)
self.play_button.grid(row=0, column=1, padx=5)
self.pause_button = tk.Button(self.control_frame, text="暂停", command=self.pause)
self.pause_button.grid(row=0, column=2, padx=5)
self.stop_button = tk.Button(self.control_frame, text="停止", command=self.stop)
self.stop_button.grid(row=0, column=3, padx=5)
# 状态标签
self.status_label = tk.Label(self.root, text="未在播放", fg="blue")
self.status_label.pack(pady=10)
def add_file(self):
files = filedialog.askopenfilenames(filetypes=[("P3 文件", "*.p3")])
if files:
self.playlist.extend(files)
self.update_playlist()
def update_playlist(self):
self.playlist_listbox.delete(0, tk.END)
for file in self.playlist:
self.playlist_listbox.insert(tk.END, os.path.basename(file)) # 仅显示文件名
def update_status(self, status_text, color="blue"):
"""更新状态标签的内容"""
self.status_label.config(text=status_text, fg=color)
def play(self):
if not self.playlist:
messagebox.showwarning("警告", "播放列表为空!")
return
if self.is_paused:
self.is_paused = False
self.pause_event.clear()
self.update_status(f"正在播放:{os.path.basename(self.playlist[self.current_index])}", "green")
return
if self.is_playing:
return
self.is_playing = True
self.stop_event.clear()
self.pause_event.clear()
self.current_index = self.playlist_listbox.curselection()[0] if self.playlist_listbox.curselection() else 0
self.play_thread = threading.Thread(target=self.play_audio, daemon=True)
self.play_thread.start()
self.update_status(f"正在播放:{os.path.basename(self.playlist[self.current_index])}", "green")
def play_audio(self):
while True:
if self.stop_event.is_set():
break
if self.pause_event.is_set():
time.sleep(0.1)
continue
# 检查当前索引是否有效
if self.current_index >= len(self.playlist):
if self.loop_playback.get(): # 如果勾选了循环播放
self.current_index = 0 # 回到第一首
else:
break # 否则停止播放
file = self.playlist[self.current_index]
self.playlist_listbox.selection_clear(0, tk.END)
self.playlist_listbox.selection_set(self.current_index)
self.playlist_listbox.activate(self.current_index)
self.update_status(f"正在播放:{os.path.basename(self.playlist[self.current_index])}", "green")
play_p3_file(file, self.stop_event, self.pause_event)
if self.stop_event.is_set():
break
if not self.loop_playback.get(): # 如果没有勾选循环播放
break # 播放完当前文件后停止
self.current_index += 1
if self.current_index >= len(self.playlist):
if self.loop_playback.get(): # 如果勾选了循环播放
self.current_index = 0 # 回到第一首
self.is_playing = False
self.is_paused = False
self.update_status("播放已停止", "red")
def pause(self):
if self.is_playing:
self.is_paused = not self.is_paused
if self.is_paused:
self.pause_event.set()
self.update_status("播放已暂停", "orange")
else:
self.pause_event.clear()
self.update_status(f"正在播放:{os.path.basename(self.playlist[self.current_index])}", "green")
def stop(self):
if self.is_playing or self.is_paused:
self.is_playing = False
self.is_paused = False
self.stop_event.set()
self.pause_event.clear()
self.update_status("播放已停止", "red")
def remove_files(self):
selected_indices = self.playlist_listbox.curselection()
if not selected_indices:
messagebox.showwarning("警告", "请先选择要移除的文件!")
return
for index in reversed(selected_indices):
self.playlist.pop(index)
self.update_playlist()
if __name__ == "__main__":
root = tk.Tk()
app = P3PlayerApp(root)
root.mainloop()

View File

@@ -1,71 +1,71 @@
# 播放p3格式的音频文件
import opuslib
import struct
import numpy as np
import sounddevice as sd
import argparse
def play_p3_file(input_file):
"""
播放p3格式的音频文件
p3格式: [1字节类型, 1字节保留, 2字节长度, Opus数据]
"""
# 初始化Opus解码器
sample_rate = 16000 # 采样率固定为16000Hz
channels = 1 # 单声道
decoder = opuslib.Decoder(sample_rate, channels)
# 帧大小 (60ms)
frame_size = int(sample_rate * 60 / 1000)
# 打开音频流
stream = sd.OutputStream(
samplerate=sample_rate,
channels=channels,
dtype='int16'
)
stream.start()
try:
with open(input_file, 'rb') as f:
print(f"正在播放: {input_file}")
while True:
# 读取头部 (4字节)
header = f.read(4)
if not header or len(header) < 4:
break
# 解析头部
packet_type, reserved, data_len = struct.unpack('>BBH', header)
# 读取Opus数据
opus_data = f.read(data_len)
if not opus_data or len(opus_data) < data_len:
break
# 解码Opus数据
pcm_data = decoder.decode(opus_data, frame_size)
# 将字节转换为numpy数组
audio_array = np.frombuffer(pcm_data, dtype=np.int16)
# 播放音频
stream.write(audio_array)
except KeyboardInterrupt:
print("\n播放已停止")
finally:
stream.stop()
stream.close()
print("播放完成")
def main():
parser = argparse.ArgumentParser(description='播放p3格式的音频文件')
parser.add_argument('input_file', help='输入的p3文件路径')
args = parser.parse_args()
play_p3_file(args.input_file)
if __name__ == "__main__":
main()
# 播放p3格式的音频文件
import opuslib
import struct
import numpy as np
import sounddevice as sd
import argparse
def play_p3_file(input_file):
"""
播放p3格式的音频文件
p3格式: [1字节类型, 1字节保留, 2字节长度, Opus数据]
"""
# 初始化Opus解码器
sample_rate = 16000 # 采样率固定为16000Hz
channels = 1 # 单声道
decoder = opuslib.Decoder(sample_rate, channels)
# 帧大小 (60ms)
frame_size = int(sample_rate * 60 / 1000)
# 打开音频流
stream = sd.OutputStream(
samplerate=sample_rate,
channels=channels,
dtype='int16'
)
stream.start()
try:
with open(input_file, 'rb') as f:
print(f"正在播放: {input_file}")
while True:
# 读取头部 (4字节)
header = f.read(4)
if not header or len(header) < 4:
break
# 解析头部
packet_type, reserved, data_len = struct.unpack('>BBH', header)
# 读取Opus数据
opus_data = f.read(data_len)
if not opus_data or len(opus_data) < data_len:
break
# 解码Opus数据
pcm_data = decoder.decode(opus_data, frame_size)
# 将字节转换为numpy数组
audio_array = np.frombuffer(pcm_data, dtype=np.int16)
# 播放音频
stream.write(audio_array)
except KeyboardInterrupt:
print("\n播放已停止")
finally:
stream.stop()
stream.close()
print("播放完成")
def main():
parser = argparse.ArgumentParser(description='播放p3格式的音频文件')
parser.add_argument('input_file', help='输入的p3文件路径')
args = parser.parse_args()
play_p3_file(args.input_file)
if __name__ == "__main__":
main()

View File

@@ -1,7 +1,7 @@
librosa>=0.9.2
opuslib>=3.0.1
numpy>=1.20.0
tqdm>=4.62.0
sounddevice>=0.4.4
pyloudnorm>=0.1.1
soundfile>=0.13.1
librosa>=0.9.2
opuslib>=3.0.1
numpy>=1.20.0
tqdm>=4.62.0
sounddevice>=0.4.4
pyloudnorm>=0.1.1
soundfile>=0.13.1

415
scripts/release.py Executable file → Normal file
View File

@@ -1,153 +1,262 @@
import sys
import os
import json
import zipfile
import argparse
# 切换到项目根目录
os.chdir(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
def get_board_type():
with open("build/compile_commands.json") as f:
data = json.load(f)
for item in data:
if not item["file"].endswith("main.cc"):
continue
command = item["command"]
# extract -DBOARD_TYPE=xxx
board_type = command.split("-DBOARD_TYPE=\\\"")[1].split("\\\"")[0].strip()
return board_type
return None
def get_project_version():
with open("CMakeLists.txt") as f:
for line in f:
if line.startswith("set(PROJECT_VER"):
return line.split("\"")[1].split("\"")[0].strip()
return None
def merge_bin():
if os.system("idf.py merge-bin") != 0:
print("merge bin failed")
sys.exit(1)
def zip_bin(board_type, project_version):
if not os.path.exists("releases"):
os.makedirs("releases")
output_path = f"releases/v{project_version}_{board_type}.zip"
if os.path.exists(output_path):
os.remove(output_path)
with zipfile.ZipFile(output_path, 'w', compression=zipfile.ZIP_DEFLATED) as zipf:
zipf.write("build/merged-binary.bin", arcname="merged-binary.bin")
print(f"zip bin to {output_path} done")
def release_current():
merge_bin()
board_type = get_board_type()
print("board type:", board_type)
project_version = get_project_version()
print("project version:", project_version)
zip_bin(board_type, project_version)
def get_all_board_types():
board_configs = {}
with open("main/CMakeLists.txt", encoding='utf-8') as f:
lines = f.readlines()
for i, line in enumerate(lines):
# 查找 if(CONFIG_BOARD_TYPE_*) 行
if "if(CONFIG_BOARD_TYPE_" in line:
config_name = line.strip().split("if(")[1].split(")")[0]
# 查找下一行的 set(BOARD_TYPE "xxx")
next_line = lines[i + 1].strip()
if next_line.startswith("set(BOARD_TYPE"):
board_type = next_line.split('"')[1]
board_configs[config_name] = board_type
return board_configs
def release(board_type, board_config, config_filename="config.json"):
config_path = f"main/boards/{board_type}/{config_filename}"
if not os.path.exists(config_path):
print(f"跳过 {board_type} 因为 {config_filename} 不存在")
return
# Print Project Version
project_version = get_project_version()
print(f"Project Version: {project_version}", config_path)
with open(config_path, "r") as f:
config = json.load(f)
target = config["target"]
builds = config["builds"]
for build in builds:
name = build["name"]
if not name.startswith(board_type):
raise ValueError(f"name {name} 必须以 {board_type} 开头")
output_path = f"releases/v{project_version}_{name}.zip"
if os.path.exists(output_path):
print(f"跳过 {board_type} 因为 {output_path} 已存在")
continue
sdkconfig_append = [f"{board_config}=y"]
for append in build.get("sdkconfig_append", []):
sdkconfig_append.append(append)
print(f"name: {name}")
print(f"target: {target}")
for append in sdkconfig_append:
print(f"sdkconfig_append: {append}")
# unset IDF_TARGET
os.environ.pop("IDF_TARGET", None)
# Call set-target
if os.system(f"idf.py set-target {target}") != 0:
print("set-target failed")
sys.exit(1)
# Append sdkconfig
with open("sdkconfig", "a") as f:
f.write("\n")
for append in sdkconfig_append:
f.write(f"{append}\n")
# Build with macro BOARD_NAME defined to name
if os.system(f"idf.py -DBOARD_NAME={name} build") != 0:
print("build failed")
sys.exit(1)
# Call merge-bin
if os.system("idf.py merge-bin") != 0:
print("merge-bin failed")
sys.exit(1)
# Zip bin
zip_bin(name, project_version)
print("-" * 80)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("board", nargs="?", default=None, help="板子类型或 all")
parser.add_argument("-c", "--config", default="config.json", help="指定 config 文件名,默认 config.json")
parser.add_argument("--list-boards", action="store_true", help="列出所有支持的 board 列表")
parser.add_argument("--json", action="store_true", help="配合 --list-boardsJSON 格式输出")
args = parser.parse_args()
if args.list_boards:
board_configs = get_all_board_types()
boards = list(board_configs.values())
if args.json:
print(json.dumps(boards))
else:
for board in boards:
print(board)
sys.exit(0)
if args.board:
board_configs = get_all_board_types()
found = False
for board_config, board_type in board_configs.items():
if args.board == 'all' or board_type == args.board:
release(board_type, board_config, config_filename=args.config)
found = True
if not found:
print(f"未找到板子类型: {args.board}")
print("可用的板子类型:")
for board_type in board_configs.values():
print(f" {board_type}")
else:
release_current()
import sys
import os
import json
import zipfile
import argparse
from pathlib import Path
from typing import Optional
# Switch to project root directory
os.chdir(Path(__file__).resolve().parent.parent)
################################################################################
# Common utility functions
################################################################################
def get_board_type_from_compile_commands() -> Optional[str]:
"""Parse the current compiled BOARD_TYPE from build/compile_commands.json"""
compile_file = Path("build/compile_commands.json")
if not compile_file.exists():
return None
with compile_file.open() as f:
data = json.load(f)
for item in data:
if not item["file"].endswith("main.cc"):
continue
cmd = item["command"]
if "-DBOARD_TYPE=\\\"" in cmd:
return cmd.split("-DBOARD_TYPE=\\\"")[1].split("\\\"")[0].strip()
return None
def get_project_version() -> Optional[str]:
"""Read set(PROJECT_VER "x.y.z") from root CMakeLists.txt"""
with Path("CMakeLists.txt").open() as f:
for line in f:
if line.startswith("set(PROJECT_VER"):
return line.split("\"")[1]
return None
def merge_bin() -> None:
if os.system("idf.py merge-bin") != 0:
print("merge-bin failed", file=sys.stderr)
sys.exit(1)
def zip_bin(name: str, version: str) -> None:
"""Zip build/merged-binary.bin to releases/v{version}_{name}.zip"""
out_dir = Path("releases")
out_dir.mkdir(exist_ok=True)
output_path = out_dir / f"v{version}_{name}.zip"
if output_path.exists():
output_path.unlink()
with zipfile.ZipFile(output_path, "w", compression=zipfile.ZIP_DEFLATED) as zipf:
zipf.write("build/merged-binary.bin", arcname="merged-binary.bin")
print(f"zip bin to {output_path} done")
################################################################################
# board / variant related functions
################################################################################
_BOARDS_DIR = Path("main/boards")
def _collect_variants(config_filename: str = "config.json") -> list[dict[str, str]]:
"""Traverse all boards under main/boards, collect variant information.
Return example:
[{"board": "bread-compact-ml307", "name": "bread-compact-ml307"}, ...]
"""
variants: list[dict[str, str]] = []
for board_path in _BOARDS_DIR.iterdir():
if not board_path.is_dir():
continue
if board_path.name == "common":
continue
cfg_path = board_path / config_filename
if not cfg_path.exists():
print(f"[WARN] {cfg_path} does not exist, skip", file=sys.stderr)
continue
try:
with cfg_path.open() as f:
cfg = json.load(f)
for build in cfg.get("builds", []):
variants.append({"board": board_path.name, "name": build["name"]})
except Exception as e:
print(f"[ERROR] 解析 {cfg_path} 失败: {e}", file=sys.stderr)
return variants
def _parse_board_config_map() -> dict[str, str]:
"""Build the mapping of CONFIG_BOARD_TYPE_xxx and board_type from main/CMakeLists.txt"""
cmake_file = Path("main/CMakeLists.txt")
mapping: dict[str, str] = {}
lines = cmake_file.read_text(encoding="utf-8").splitlines()
for idx, line in enumerate(lines):
if "if(CONFIG_BOARD_TYPE_" in line:
config_name = line.strip().split("if(")[1].split(")")[0]
if idx + 1 < len(lines):
next_line = lines[idx + 1].strip()
if next_line.startswith("set(BOARD_TYPE"):
board_type = next_line.split('"')[1]
mapping[config_name] = board_type
return mapping
def _find_board_config(board_type: str) -> Optional[str]:
"""Find the corresponding CONFIG_BOARD_TYPE_xxx for the given board_type"""
for config, b_type in _parse_board_config_map().items():
if b_type == board_type:
return config
return None
################################################################################
# Check board_type in CMakeLists
################################################################################
def _board_type_exists(board_type: str) -> bool:
cmake_file = Path("main/CMakeLists.txt")
pattern = f'set(BOARD_TYPE "{board_type}")'
return pattern in cmake_file.read_text(encoding="utf-8")
################################################################################
# Compile implementation
################################################################################
def release(board_type: str, config_filename: str = "config.json", *, filter_name: Optional[str] = None) -> None:
"""Compile and package all/specified variants of the specified board_type
Args:
board_type: directory name under main/boards
config_filename: config.json name (default: config.json)
filter_name: if specified, only compile the build["name"] that matches
"""
cfg_path = _BOARDS_DIR / board_type / config_filename
if not cfg_path.exists():
print(f"[WARN] {cfg_path} 不存在,跳过 {board_type}")
return
project_version = get_project_version()
print(f"Project Version: {project_version} ({cfg_path})")
with cfg_path.open() as f:
cfg = json.load(f)
target = cfg["target"]
builds = cfg.get("builds", [])
if filter_name:
builds = [b for b in builds if b["name"] == filter_name]
if not builds:
print(f"[ERROR] 未在 {board_type}{config_filename} 中找到变体 {filter_name}", file=sys.stderr)
sys.exit(1)
for build in builds:
name = build["name"]
if not name.startswith(board_type):
raise ValueError(f"build.name {name} 必须以 {board_type} 开头")
output_path = Path("releases") / f"v{project_version}_{name}.zip"
if output_path.exists():
print(f"跳过 {name} 因为 {output_path} 已存在")
continue
# Process sdkconfig_append
board_type_config = _find_board_config(board_type)
sdkconfig_append = [f"{board_type_config}=y"]
sdkconfig_append.extend(build.get("sdkconfig_append", []))
print("-" * 80)
print(f"name: {name}")
print(f"target: {target}")
for item in sdkconfig_append:
print(f"sdkconfig_append: {item}")
os.environ.pop("IDF_TARGET", None)
# Call set-target
if os.system(f"idf.py set-target {target}") != 0:
print("set-target failed", file=sys.stderr)
sys.exit(1)
# Append sdkconfig
with Path("sdkconfig").open("a") as f:
f.write("\n")
f.write("# Append by release.py\n")
for append in sdkconfig_append:
f.write(f"{append}\n")
# Build with macro BOARD_NAME defined to name
if os.system(f"idf.py -DBOARD_NAME={name} build") != 0:
print("build failed")
sys.exit(1)
# merge-bin
merge_bin()
# Zip
zip_bin(name, project_version)
################################################################################
# CLI entry
################################################################################
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("board", nargs="?", default=None, help="板子类型或 all")
parser.add_argument("-c", "--config", default="config.json", help="指定 config 文件名,默认 config.json")
parser.add_argument("--list-boards", action="store_true", help="列出所有支持的 board 及变体列表")
parser.add_argument("--json", action="store_true", help="配合 --list-boardsJSON 格式输出")
parser.add_argument("--name", help="指定变体名称,仅编译匹配的变体")
args = parser.parse_args()
# List mode
if args.list_boards:
variants = _collect_variants(config_filename=args.config)
if args.json:
print(json.dumps(variants))
else:
for v in variants:
print(f"{v['board']}: {v['name']}")
sys.exit(0)
# Current directory firmware packaging mode
if args.board is None:
merge_bin()
curr_board_type = get_board_type_from_compile_commands()
if curr_board_type is None:
print("未能从 compile_commands.json 解析 board_type", file=sys.stderr)
sys.exit(1)
project_ver = get_project_version()
zip_bin(curr_board_type, project_ver)
sys.exit(0)
# Compile mode
board_type_input: str = args.board
name_filter: str | None = args.name
# Check board_type in CMakeLists
if board_type_input != "all" and not _board_type_exists(board_type_input):
print(f"[ERROR] main/CMakeLists.txt 中未找到 board_type {board_type_input}", file=sys.stderr)
sys.exit(1)
variants_all = _collect_variants(config_filename=args.config)
# Filter board_type list
target_board_types: set[str]
if board_type_input == "all":
target_board_types = {v["board"] for v in variants_all}
else:
target_board_types = {board_type_input}
for bt in sorted(target_board_types):
if not _board_type_exists(bt):
print(f"[ERROR] main/CMakeLists.txt 中未找到 board_type {bt}", file=sys.stderr)
sys.exit(1)
cfg_path = _BOARDS_DIR / bt / args.config
if bt == board_type_input and not cfg_path.exists():
print(f"开发板 {bt} 未定义 {args.config} 配置文件,跳过")
sys.exit(0)
release(bt, config_filename=args.config, filter_name=name_filter if bt == board_type_input else None)

View File

@@ -1,208 +1,208 @@
<!DOCTYPE html>
<html lang="zh">
<head>
<meta charset="UTF-8" />
<title>小智声波配网</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<style>
body {
font-family: "Segoe UI", "PingFang SC", sans-serif;
background: #f0f2f5;
margin: 0;
padding: 2rem 1rem;
display: flex;
justify-content: center;
}
.card {
background: #fff;
padding: 2rem 1.5rem;
border-radius: 16px;
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.08);
max-width: 400px;
width: 100%;
}
h2 {
text-align: center;
margin-bottom: 2rem;
}
label {
font-weight: bold;
display: block;
margin: 1rem 0 0.3rem;
}
input[type="text"],
input[type="password"] {
width: 100%;
padding: 0.75rem;
font-size: 1rem;
border-radius: 8px;
border: 1px solid #ccc;
box-sizing: border-box;
}
input[type="checkbox"] {
margin-right: 0.5rem;
}
.checkbox-container {
margin-top: 1rem;
font-size: 0.95rem;
}
button {
width: 100%;
margin-top: 1rem;
padding: 0.8rem;
font-size: 1rem;
border: none;
border-radius: 8px;
background-color: #4a90e2;
color: #fff;
cursor: pointer;
transition: background-color 0.2s;
}
button:hover {
background-color: #357ab8;
}
button:active {
background-color: #2f6ea2;
}
audio {
margin-top: 1.5rem;
width: 100%;
outline: none;
}
</style>
</head>
<body>
<div class="card">
<h2>📶 小智声波配网</h2>
<label for="ssid">WiFi 名称</label>
<input id="ssid" type="text" value="" placeholder="请输入 WiFi 名称" />
<label for="pwd">WiFi 密码</label>
<input id="pwd" type="password" value="" placeholder="请输入 WiFi 密码" />
<div class="checkbox-container">
<label><input type="checkbox" id="loopCheck" checked /> 自动循环播放声波</label>
</div>
<button onclick="generate()">🎵 生成并播放声波</button>
<button onclick="stopPlay()">⏹️ 停止播放</button>
<audio id="player" controls></audio>
</div>
<script>
const MARK = 1800;
const SPACE = 1500;
const SAMPLE_RATE = 44100;
const BIT_RATE = 100;
const START_BYTES = [0x01, 0x02];
const END_BYTES = [0x03, 0x04];
let loopTimer = null;
function checksum(data) {
return data.reduce((sum, b) => (sum + b) & 0xff, 0);
}
function toBits(byte) {
const bits = [];
for (let i = 7; i >= 0; i--) bits.push((byte >> i) & 1);
return bits;
}
function afskModulate(bits) {
const samplesPerBit = SAMPLE_RATE / BIT_RATE;
const totalSamples = Math.floor(bits.length * samplesPerBit);
const buffer = new Float32Array(totalSamples);
for (let i = 0; i < bits.length; i++) {
const freq = bits[i] ? MARK : SPACE;
for (let j = 0; j < samplesPerBit; j++) {
const t = (i * samplesPerBit + j) / SAMPLE_RATE;
buffer[i * samplesPerBit + j] = Math.sin(2 * Math.PI * freq * t);
}
}
return buffer;
}
function floatTo16BitPCM(floatSamples) {
const buffer = new Uint8Array(floatSamples.length * 2);
for (let i = 0; i < floatSamples.length; i++) {
const s = Math.max(-1, Math.min(1, floatSamples[i]));
const val = s < 0 ? s * 0x8000 : s * 0x7fff;
buffer[i * 2] = val & 0xff;
buffer[i * 2 + 1] = (val >> 8) & 0xff;
}
return buffer;
}
function buildWav(pcm) {
const wavHeader = new Uint8Array(44);
const dataLen = pcm.length;
const fileLen = 36 + dataLen;
const writeStr = (offset, str) => {
for (let i = 0; i < str.length; i++) wavHeader[offset + i] = str.charCodeAt(i);
};
const write32 = (offset, value) => {
wavHeader[offset] = value & 0xff;
wavHeader[offset + 1] = (value >> 8) & 0xff;
wavHeader[offset + 2] = (value >> 16) & 0xff;
wavHeader[offset + 3] = (value >> 24) & 0xff;
};
const write16 = (offset, value) => {
wavHeader[offset] = value & 0xff;
wavHeader[offset + 1] = (value >> 8) & 0xff;
};
writeStr(0, 'RIFF');
write32(4, fileLen);
writeStr(8, 'WAVE');
writeStr(12, 'fmt ');
write32(16, 16);
write16(20, 1);
write16(22, 1);
write32(24, SAMPLE_RATE);
write32(28, SAMPLE_RATE * 2);
write16(32, 2);
write16(34, 16);
writeStr(36, 'data');
write32(40, dataLen);
return new Blob([wavHeader, pcm], { type: 'audio/wav' });
}
function generate() {
stopPlay();
const ssid = document.getElementById('ssid').value.trim();
const pwd = document.getElementById('pwd').value.trim();
const dataStr = ssid + '\n' + pwd;
const textBytes = Array.from(new TextEncoder().encode(dataStr));
const fullBytes = [...START_BYTES, ...textBytes, checksum(textBytes), ...END_BYTES];
let bits = [];
fullBytes.forEach((b) => (bits = bits.concat(toBits(b))));
const floatBuf = afskModulate(bits);
const pcmBuf = floatTo16BitPCM(floatBuf);
const wavBlob = buildWav(pcmBuf);
const audio = document.getElementById('player');
audio.src = URL.createObjectURL(wavBlob);
audio.load();
audio.play();
// 修改了这里:使用 'ended' 事件来实现循环播放
if (document.getElementById('loopCheck').checked) {
audio.onended = function() {
audio.currentTime = 0; // 从头开始
audio.play(); // 重新播放
};
}
}
function stopPlay() {
const audio = document.getElementById('player');
audio.pause();
audio.onended = null; // 清除事件监听
}
</script>
</body>
</html>
<!DOCTYPE html>
<html lang="zh">
<head>
<meta charset="UTF-8" />
<title>小智声波配网</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<style>
body {
font-family: "Segoe UI", "PingFang SC", sans-serif;
background: #f0f2f5;
margin: 0;
padding: 2rem 1rem;
display: flex;
justify-content: center;
}
.card {
background: #fff;
padding: 2rem 1.5rem;
border-radius: 16px;
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.08);
max-width: 400px;
width: 100%;
}
h2 {
text-align: center;
margin-bottom: 2rem;
}
label {
font-weight: bold;
display: block;
margin: 1rem 0 0.3rem;
}
input[type="text"],
input[type="password"] {
width: 100%;
padding: 0.75rem;
font-size: 1rem;
border-radius: 8px;
border: 1px solid #ccc;
box-sizing: border-box;
}
input[type="checkbox"] {
margin-right: 0.5rem;
}
.checkbox-container {
margin-top: 1rem;
font-size: 0.95rem;
}
button {
width: 100%;
margin-top: 1rem;
padding: 0.8rem;
font-size: 1rem;
border: none;
border-radius: 8px;
background-color: #4a90e2;
color: #fff;
cursor: pointer;
transition: background-color 0.2s;
}
button:hover {
background-color: #357ab8;
}
button:active {
background-color: #2f6ea2;
}
audio {
margin-top: 1.5rem;
width: 100%;
outline: none;
}
</style>
</head>
<body>
<div class="card">
<h2>📶 小智声波配网</h2>
<label for="ssid">WiFi 名称</label>
<input id="ssid" type="text" value="" placeholder="请输入 WiFi 名称" />
<label for="pwd">WiFi 密码</label>
<input id="pwd" type="password" value="" placeholder="请输入 WiFi 密码" />
<div class="checkbox-container">
<label><input type="checkbox" id="loopCheck" checked /> 自动循环播放声波</label>
</div>
<button onclick="generate()">🎵 生成并播放声波</button>
<button onclick="stopPlay()">⏹️ 停止播放</button>
<audio id="player" controls></audio>
</div>
<script>
const MARK = 1800;
const SPACE = 1500;
const SAMPLE_RATE = 44100;
const BIT_RATE = 100;
const START_BYTES = [0x01, 0x02];
const END_BYTES = [0x03, 0x04];
let loopTimer = null;
function checksum(data) {
return data.reduce((sum, b) => (sum + b) & 0xff, 0);
}
function toBits(byte) {
const bits = [];
for (let i = 7; i >= 0; i--) bits.push((byte >> i) & 1);
return bits;
}
function afskModulate(bits) {
const samplesPerBit = SAMPLE_RATE / BIT_RATE;
const totalSamples = Math.floor(bits.length * samplesPerBit);
const buffer = new Float32Array(totalSamples);
for (let i = 0; i < bits.length; i++) {
const freq = bits[i] ? MARK : SPACE;
for (let j = 0; j < samplesPerBit; j++) {
const t = (i * samplesPerBit + j) / SAMPLE_RATE;
buffer[i * samplesPerBit + j] = Math.sin(2 * Math.PI * freq * t);
}
}
return buffer;
}
function floatTo16BitPCM(floatSamples) {
const buffer = new Uint8Array(floatSamples.length * 2);
for (let i = 0; i < floatSamples.length; i++) {
const s = Math.max(-1, Math.min(1, floatSamples[i]));
const val = s < 0 ? s * 0x8000 : s * 0x7fff;
buffer[i * 2] = val & 0xff;
buffer[i * 2 + 1] = (val >> 8) & 0xff;
}
return buffer;
}
function buildWav(pcm) {
const wavHeader = new Uint8Array(44);
const dataLen = pcm.length;
const fileLen = 36 + dataLen;
const writeStr = (offset, str) => {
for (let i = 0; i < str.length; i++) wavHeader[offset + i] = str.charCodeAt(i);
};
const write32 = (offset, value) => {
wavHeader[offset] = value & 0xff;
wavHeader[offset + 1] = (value >> 8) & 0xff;
wavHeader[offset + 2] = (value >> 16) & 0xff;
wavHeader[offset + 3] = (value >> 24) & 0xff;
};
const write16 = (offset, value) => {
wavHeader[offset] = value & 0xff;
wavHeader[offset + 1] = (value >> 8) & 0xff;
};
writeStr(0, 'RIFF');
write32(4, fileLen);
writeStr(8, 'WAVE');
writeStr(12, 'fmt ');
write32(16, 16);
write16(20, 1);
write16(22, 1);
write32(24, SAMPLE_RATE);
write32(28, SAMPLE_RATE * 2);
write16(32, 2);
write16(34, 16);
writeStr(36, 'data');
write32(40, dataLen);
return new Blob([wavHeader, pcm], { type: 'audio/wav' });
}
function generate() {
stopPlay();
const ssid = document.getElementById('ssid').value.trim();
const pwd = document.getElementById('pwd').value.trim();
const dataStr = ssid + '\n' + pwd;
const textBytes = Array.from(new TextEncoder().encode(dataStr));
const fullBytes = [...START_BYTES, ...textBytes, checksum(textBytes), ...END_BYTES];
let bits = [];
fullBytes.forEach((b) => (bits = bits.concat(toBits(b))));
const floatBuf = afskModulate(bits);
const pcmBuf = floatTo16BitPCM(floatBuf);
const wavBlob = buildWav(pcmBuf);
const audio = document.getElementById('player');
audio.src = URL.createObjectURL(wavBlob);
audio.load();
audio.play();
// 修改了这里:使用 'ended' 事件来实现循环播放
if (document.getElementById('loopCheck').checked) {
audio.onended = function() {
audio.currentTime = 0; // 从头开始
audio.play(); // 重新播放
};
}
}
function stopPlay() {
const audio = document.getElementById('player');
audio.pause();
audio.onended = null; // 清除事件监听
}
</script>
</body>
</html>

View File

@@ -0,0 +1,110 @@
# SPIFFS Assets Builder
这个脚本用于构建 ESP32 项目的 SPIFFS 资源分区,将各种资源文件打包成可在设备上使用的格式。
## 功能特性
- 处理唤醒网络模型 (WakeNet Model)
- 集成文本字体文件
- 处理表情符号图片集合
- 自动生成资源索引文件
- 打包生成最终的 `assets.bin` 文件
## 依赖要求
- Python 3.6+
- 相关资源文件
## 使用方法
### 基本语法
```bash
./build.py --wakenet_model <wakenet_model_dir> \
--text_font <text_font_file> \
--emoji_collection <emoji_collection_dir>
```
### 参数说明
| 参数 | 类型 | 必需 | 说明 |
|------|------|------|------|
| `--wakenet_model` | 目录路径 | 否 | 唤醒网络模型目录路径 |
| `--text_font` | 文件路径 | 否 | 文本字体文件路径 |
| `--emoji_collection` | 目录路径 | 否 | 表情符号图片集合目录路径 |
### 使用示例
```bash
# 完整参数示例
./build.py \
--wakenet_model ../../managed_components/espressif__esp-sr/model/wakenet_model/wn9_nihaoxiaozhi_tts \
--text_font ../../components/xiaozhi-fonts/build/font_puhui_common_20_4.bin \
--emoji_collection ../../components/xiaozhi-fonts/build/emojis_64/
# 仅处理字体文件
./build.py --text_font ../../components/xiaozhi-fonts/build/font_puhui_common_20_4.bin
# 仅处理表情符号
./build.py --emoji_collection ../../components/xiaozhi-fonts/build/emojis_64/
```
## 工作流程
1. **创建构建目录结构**
- `build/` - 主构建目录
- `build/assets/` - 资源文件目录
- `build/output/` - 输出文件目录
2. **处理唤醒网络模型**
- 复制模型文件到构建目录
- 使用 `pack_model.py` 生成 `srmodels.bin`
- 将生成的模型文件复制到资源目录
3. **处理文本字体**
- 复制字体文件到资源目录
- 支持 `.bin` 格式的字体文件
4. **处理表情符号集合**
- 扫描指定目录中的图片文件
- 支持 `.png``.gif` 格式
- 自动生成表情符号索引
5. **生成配置文件**
- `index.json` - 资源索引文件
- `config.json` - 构建配置文件
6. **打包最终资源**
- 使用 `spiffs_assets_gen.py` 生成 `assets.bin`
- 复制到构建根目录
## 输出文件
构建完成后,会在 `build/` 目录下生成以下文件:
- `assets/` - 所有资源文件
- `assets.bin` - 最终的 SPIFFS 资源文件
- `config.json` - 构建配置
- `output/` - 中间输出文件
## 支持的资源格式
- **模型文件**: `.bin` (通过 pack_model.py 处理)
- **字体文件**: `.bin`
- **图片文件**: `.png`, `.gif`
- **配置文件**: `.json`
## 错误处理
脚本包含完善的错误处理机制:
- 检查源文件/目录是否存在
- 验证子进程执行结果
- 提供详细的错误信息和警告
## 注意事项
1. 确保所有依赖的 Python 脚本都在同一目录下
2. 资源文件路径使用绝对路径或相对于脚本目录的路径
3. 构建过程会清理之前的构建文件
4. 生成的 `assets.bin` 文件大小受 SPIFFS 分区大小限制

View File

@@ -0,0 +1,400 @@
#!/usr/bin/env python3
"""
Build the spiffs assets partition
Usage:
./build.py --wakenet_model <wakenet_model_dir> \
--text_font <text_font_file> \
--emoji_collection <emoji_collection_dir>
Example:
./build.py --wakenet_model ../../managed_components/espressif__esp-sr/model/wakenet_model/wn9_nihaoxiaozhi_tts \
--text_font ../../components/xiaozhi-fonts/build/font_puhui_common_20_4.bin \
--emoji_collection ../../components/xiaozhi-fonts/build/emojis_64/
"""
import os
import sys
import shutil
import argparse
import subprocess
import json
from pathlib import Path
def ensure_dir(directory):
"""Ensure directory exists, create if not"""
os.makedirs(directory, exist_ok=True)
def copy_file(src, dst):
"""Copy file"""
if os.path.exists(src):
shutil.copy2(src, dst)
print(f"Copied: {src} -> {dst}")
else:
print(f"Warning: Source file does not exist: {src}")
def copy_directory(src, dst):
"""Copy directory"""
if os.path.exists(src):
shutil.copytree(src, dst, dirs_exist_ok=True)
print(f"Copied directory: {src} -> {dst}")
else:
print(f"Warning: Source directory does not exist: {src}")
def process_wakenet_model(wakenet_model_dir, build_dir, assets_dir):
"""Process wakenet_model parameter"""
if not wakenet_model_dir:
return None
# Copy input directory to build directory
wakenet_build_dir = os.path.join(build_dir, "wakenet_model")
if os.path.exists(wakenet_build_dir):
shutil.rmtree(wakenet_build_dir)
copy_directory(wakenet_model_dir, os.path.join(wakenet_build_dir, os.path.basename(wakenet_model_dir)))
# Use pack_model.py to generate srmodels.bin
srmodels_output = os.path.join(wakenet_build_dir, "srmodels.bin")
try:
subprocess.run([
sys.executable, "pack_model.py",
"-m", wakenet_build_dir,
"-o", "srmodels.bin"
], check=True, cwd=os.path.dirname(__file__))
print(f"Generated: {srmodels_output}")
# Copy srmodels.bin to assets directory
copy_file(srmodels_output, os.path.join(assets_dir, "srmodels.bin"))
return "srmodels.bin"
except subprocess.CalledProcessError as e:
print(f"Error: Failed to generate srmodels.bin: {e}")
return None
def process_text_font(text_font_file, assets_dir):
"""Process text_font parameter"""
if not text_font_file:
return None
# Copy input file to build/assets directory
font_filename = os.path.basename(text_font_file)
font_dst = os.path.join(assets_dir, font_filename)
copy_file(text_font_file, font_dst)
return font_filename
def process_emoji_collection(emoji_collection_dir, assets_dir):
"""Process emoji_collection parameter"""
if not emoji_collection_dir:
return []
emoji_list = []
# Copy each image from input directory to build/assets directory
for root, dirs, files in os.walk(emoji_collection_dir):
for file in files:
if file.lower().endswith(('.png', '.gif')):
# Copy file
src_file = os.path.join(root, file)
dst_file = os.path.join(assets_dir, file)
copy_file(src_file, dst_file)
# Get filename without extension
filename_without_ext = os.path.splitext(file)[0]
# Add to emoji list
emoji_list.append({
"name": filename_without_ext,
"file": file
})
return emoji_list
def load_emoji_config(emoji_collection_dir):
"""Load emoji config from config.json file"""
config_path = os.path.join(emoji_collection_dir, "emote.json")
if not os.path.exists(config_path):
print(f"Warning: Config file not found: {config_path}")
return {}
try:
with open(config_path, 'r', encoding='utf-8') as f:
config_data = json.load(f)
# Convert list format to dict for easy lookup
config_dict = {}
for item in config_data:
if "emote" in item:
config_dict[item["emote"]] = item
return config_dict
except Exception as e:
print(f"Error loading config file {config_path}: {e}")
return {}
def process_board_emoji_collection(emoji_collection_dir, target_board_dir, assets_dir):
"""Process emoji_collection parameter"""
if not emoji_collection_dir:
return []
emoji_config = load_emoji_config(target_board_dir)
print(f"Loaded emoji config with {len(emoji_config)} entries")
emoji_list = []
for emote_name, config in emoji_config.items():
if "src" not in config:
print(f"Error: No src field found for emote '{emote_name}' in config")
continue
eaf_file_path = os.path.join(emoji_collection_dir, config["src"])
file_exists = os.path.exists(eaf_file_path)
if not file_exists:
print(f"Warning: EAF file not found for emote '{emote_name}': {eaf_file_path}")
else:
# Copy eaf file to assets directory
copy_file(eaf_file_path, os.path.join(assets_dir, config["src"]))
# Create emoji entry with src as file (merge file and src)
emoji_entry = {
"name": emote_name,
"file": config["src"] # Use src as the actual file
}
eaf_properties = {}
if not file_exists:
eaf_properties["lack"] = True
if "loop" in config:
eaf_properties["loop"] = config["loop"]
if "fps" in config:
eaf_properties["fps"] = config["fps"]
if eaf_properties:
emoji_entry["eaf"] = eaf_properties
status = "MISSING" if not file_exists else "OK"
eaf_info = emoji_entry.get('eaf', {})
print(f"emote '{emote_name}': file='{emoji_entry['file']}', status={status}, lack={eaf_info.get('lack', False)}, loop={eaf_info.get('loop', 'none')}, fps={eaf_info.get('fps', 'none')}")
emoji_list.append(emoji_entry)
print(f"Successfully processed {len(emoji_list)} emotes from config")
return emoji_list
def process_board_icon_collection(icon_collection_dir, assets_dir):
"""Process emoji_collection parameter"""
if not icon_collection_dir:
return []
icon_list = []
for root, dirs, files in os.walk(icon_collection_dir):
for file in files:
if file.lower().endswith(('.bin')) or file.lower() == 'listen.eaf':
src_file = os.path.join(root, file)
dst_file = os.path.join(assets_dir, file)
copy_file(src_file, dst_file)
filename_without_ext = os.path.splitext(file)[0]
icon_list.append({
"name": filename_without_ext,
"file": file
})
return icon_list
def process_board_layout(layout_json_file, assets_dir):
"""Process layout_json parameter"""
if not layout_json_file:
print(f"Warning: Layout json file not provided")
return []
print(f"Processing layout_json: {layout_json_file}")
print(f"assets_dir: {assets_dir}")
if os.path.isdir(layout_json_file):
layout_json_path = os.path.join(layout_json_file, "layout.json")
if not os.path.exists(layout_json_path):
print(f"Warning: layout.json not found in directory: {layout_json_file}")
return []
layout_json_file = layout_json_path
elif not os.path.isfile(layout_json_file):
print(f"Warning: Layout json file not found: {layout_json_file}")
return []
try:
with open(layout_json_file, 'r', encoding='utf-8') as f:
layout_data = json.load(f)
# Layout data is now directly an array, no need to get "layout" key
layout_items = layout_data if isinstance(layout_data, list) else layout_data.get("layout", [])
processed_layout = []
for item in layout_items:
processed_item = {
"name": item.get("name", ""),
"align": item.get("align", ""),
"x": item.get("x", 0),
"y": item.get("y", 0)
}
if "width" in item:
processed_item["width"] = item["width"]
if "height" in item:
processed_item["height"] = item["height"]
processed_layout.append(processed_item)
print(f"Processed {len(processed_layout)} layout elements")
return processed_layout
except Exception as e:
print(f"Error reading/processing layout.json: {e}")
return []
def process_board_collection(target_board_dir, res_path, assets_dir):
"""Process board collection - merge icon, emoji, and layout processing"""
# Process all collections
if os.path.exists(res_path) and os.path.exists(target_board_dir):
emoji_collection = process_board_emoji_collection(res_path, target_board_dir, assets_dir)
icon_collection = process_board_icon_collection(res_path, assets_dir)
layout_json = process_board_layout(target_board_dir, assets_dir)
else:
print(f"Warning: EAF directory not found: {res_path} or {target_board_dir}")
emoji_collection = []
icon_collection = []
layout_json = []
return emoji_collection, icon_collection, layout_json
def generate_index_json(assets_dir, srmodels, text_font, emoji_collection, icon_collection, layout_json):
"""Generate index.json file"""
index_data = {
"version": 1
}
if srmodels:
index_data["srmodels"] = srmodels
if text_font:
index_data["text_font"] = text_font
if emoji_collection:
index_data["emoji_collection"] = emoji_collection
if icon_collection:
index_data["icon_collection"] = icon_collection
if layout_json:
index_data["layout"] = layout_json
# Write index.json
index_path = os.path.join(assets_dir, "index.json")
with open(index_path, 'w', encoding='utf-8') as f:
json.dump(index_data, f, indent=4, ensure_ascii=False)
print(f"Generated: {index_path}")
def generate_config_json(build_dir, assets_dir):
"""Generate config.json file"""
# Get absolute path of current working directory
workspace_dir = os.path.abspath(os.path.join(os.path.dirname(__file__)))
config_data = {
"include_path": os.path.join(workspace_dir, "build/include"),
"assets_path": os.path.join(workspace_dir, "build/assets"),
"image_file": os.path.join(workspace_dir, "build/output/assets.bin"),
"lvgl_ver": "9.3.0",
"assets_size": "0x400000",
"support_format": ".png, .gif, .jpg, .bin, .json, .eaf",
"name_length": "32",
"split_height": "0",
"support_qoi": False,
"support_spng": False,
"support_sjpg": False,
"support_sqoi": False,
"support_raw": False,
"support_raw_dither": False,
"support_raw_bgr": False
}
# Write config.json
config_path = os.path.join(build_dir, "config.json")
with open(config_path, 'w', encoding='utf-8') as f:
json.dump(config_data, f, indent=4, ensure_ascii=False)
print(f"Generated: {config_path}")
return config_path
def main():
parser = argparse.ArgumentParser(description='Build the spiffs assets partition')
parser.add_argument('--wakenet_model', help='Path to wakenet model directory')
parser.add_argument('--text_font', help='Path to text font file')
parser.add_argument('--emoji_collection', help='Path to emoji collection directory')
parser.add_argument('--res_path', help='Path to res directory')
parser.add_argument('--target_board', help='Path to target board directory')
args = parser.parse_args()
# Get script directory
script_dir = os.path.dirname(os.path.abspath(__file__))
# Set directory paths
build_dir = os.path.join(script_dir, "build")
assets_dir = os.path.join(build_dir, "assets")
if os.path.exists(assets_dir):
shutil.rmtree(assets_dir)
# Ensure directories exist
ensure_dir(build_dir)
ensure_dir(assets_dir)
print("Starting to build SPIFFS assets partition...")
# Process each parameter
srmodels = process_wakenet_model(args.wakenet_model, build_dir, assets_dir)
text_font = process_text_font(args.text_font, assets_dir)
if(args.target_board):
emoji_collection, icon_collection, layout_json = process_board_collection(args.target_board, args.res_path, assets_dir)
else:
emoji_collection = process_emoji_collection(args.emoji_collection, assets_dir)
icon_collection = []
layout_json = []
# Generate index.json
generate_index_json(assets_dir, srmodels, text_font, emoji_collection, icon_collection, layout_json)
# Generate config.json
config_path = generate_config_json(build_dir, assets_dir)
# Use spiffs_assets_gen.py to package final build/assets.bin
try:
subprocess.run([
sys.executable, "spiffs_assets_gen.py",
"--config", config_path
], check=True, cwd=script_dir)
print("Successfully packaged assets.bin")
except subprocess.CalledProcessError as e:
print(f"Error: Failed to package assets.bin: {e}")
sys.exit(1)
# Copy build/output/assets.bin to build/assets.bin
shutil.copy(os.path.join(build_dir, "output", "assets.bin"), os.path.join(build_dir, "assets.bin"))
print("Build completed!")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,187 @@
#!/usr/bin/env python3
"""
Build multiple spiffs assets partitions with different parameter combinations
This script calls build.py with different combinations of:
- wakenet_models
- text_fonts
- emoji_collections
And generates assets.bin files with names like:
wn9_nihaoxiaozhi_tts-font_puhui_common_20_4-emojis_32.bin
"""
import os
import sys
import shutil
import subprocess
import argparse
from pathlib import Path
def ensure_dir(directory):
"""Ensure directory exists, create if not"""
os.makedirs(directory, exist_ok=True)
def get_file_path(base_dir, filename):
"""Get full path for a file, handling 'none' case"""
if filename == "none":
return None
return os.path.join(base_dir, f"{filename}.bin" if not filename.startswith("emojis_") else filename)
def build_assets(wakenet_model, text_font, emoji_collection, target_board, build_dir, final_dir):
"""Build assets.bin using build.py with given parameters"""
# Prepare arguments for build.py
cmd = [sys.executable, "build.py"]
if wakenet_model != "none":
wakenet_path = os.path.join("../../managed_components/espressif__esp-sr/model/wakenet_model", wakenet_model)
cmd.extend(["--wakenet_model", wakenet_path])
if text_font != "none":
text_font_path = os.path.join("../../components/78__xiaozhi-fonts/cbin", f"{text_font}.bin")
cmd.extend(["--text_font", text_font_path])
if emoji_collection != "none":
emoji_path = os.path.join("../../components/xiaozhi-fonts/build", emoji_collection)
cmd.extend(["--emoji_collection", emoji_path])
if target_board != "none":
res_path = os.path.join("../../managed_components/espressif2022__esp_emote_gfx/emoji_large", "")
cmd.extend(["--res_path", res_path])
target_board_path = os.path.join("../../main/boards/", f"{target_board}")
cmd.extend(["--target_board", target_board_path])
print(f"\n正在构建: {wakenet_model}-{text_font}-{emoji_collection}-{target_board}")
print(f"执行命令: {' '.join(cmd)}")
try:
# Run build.py
result = subprocess.run(cmd, check=True, cwd=os.path.dirname(__file__))
# Generate output filename
if(target_board != "none"):
output_name = f"{wakenet_model}-{text_font}-{target_board}.bin"
else:
output_name = f"{wakenet_model}-{text_font}-{emoji_collection}.bin"
# Copy generated assets.bin to final directory with new name
src_path = os.path.join(build_dir, "assets.bin")
dst_path = os.path.join(final_dir, output_name)
if os.path.exists(src_path):
shutil.copy2(src_path, dst_path)
print(f"✓ 成功生成: {output_name}")
return True
else:
print(f"✗ 错误: 未找到生成的 assets.bin 文件")
return False
except subprocess.CalledProcessError as e:
print(f"✗ 构建失败: {e}")
return False
except Exception as e:
print(f"✗ 未知错误: {e}")
return False
def main():
# Parse command line arguments
parser = argparse.ArgumentParser(description='构建多个 SPIFFS assets 分区')
parser.add_argument('--mode',
choices=['emoji_collections', 'emoji_target_boards'],
default='emoji_collections',
help='选择运行模式: emoji_collections 或 emoji_target_boards (默认: emoji_collections)')
args = parser.parse_args()
# Configuration
wakenet_models = [
"none",
"wn9_nihaoxiaozhi_tts",
"wn9s_nihaoxiaozhi"
]
text_fonts = [
"none",
"font_puhui_common_14_1",
"font_puhui_common_16_4",
"font_puhui_common_20_4",
"font_puhui_common_30_4",
]
emoji_collections = [
"none",
"emojis_32",
"emojis_64",
]
emoji_target_boards = [
"esp-box-3",
"echoear",
]
# Get script directory
script_dir = os.path.dirname(os.path.abspath(__file__))
# Set directory paths
build_dir = os.path.join(script_dir, "build")
final_dir = os.path.join(build_dir, "final")
# Ensure directories exist
ensure_dir(build_dir)
ensure_dir(final_dir)
print("开始构建多个 SPIFFS assets 分区...")
print(f"运行模式: {args.mode}")
print(f"输出目录: {final_dir}")
# Track successful builds
successful_builds = 0
if args.mode == 'emoji_collections':
# Calculate total combinations for emoji_collections mode
total_combinations = len(wakenet_models) * len(text_fonts) * len(emoji_collections)
# Build all combinations with emoji_collections
for wakenet_model in wakenet_models:
for text_font in text_fonts:
for emoji_collection in emoji_collections:
if build_assets(wakenet_model, text_font, emoji_collection, "none", build_dir, final_dir):
successful_builds += 1
elif args.mode == 'emoji_target_boards':
# Calculate total combinations for emoji_target_boards mode
total_combinations = len(wakenet_models) * len(text_fonts) * len(emoji_target_boards)
# Build all combinations with emoji_target_boards
for wakenet_model in wakenet_models:
for text_font in text_fonts:
for emoji_target_board in emoji_target_boards:
if build_assets(wakenet_model, text_font, "none", emoji_target_board, build_dir, final_dir):
successful_builds += 1
print(f"\n构建完成!")
print(f"成功构建: {successful_builds}/{total_combinations}")
print(f"输出文件位置: {final_dir}")
# List generated files
if os.path.exists(final_dir):
files = [f for f in os.listdir(final_dir) if f.endswith('.bin')]
if files:
print("\n生成的文件:")
for file in sorted(files):
file_size = os.path.getsize(os.path.join(final_dir, file))
print(f" {file} ({file_size:,} bytes)")
else:
print("\n未找到生成的 .bin 文件")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,123 @@
import os
import struct
import argparse
def struct_pack_string(string, max_len=None):
"""
pack string to binary data.
if max_len is None, max_len = len(string) + 1
else len(string) < max_len, the left will be padded by struct.pack('x')
string: input python string
max_len: output
"""
if max_len == None :
max_len = len(string)
else:
assert len(string) <= max_len
left_num = max_len - len(string)
out_bytes = None
for char in string:
if out_bytes == None:
out_bytes = struct.pack('b', ord(char))
else:
out_bytes += struct.pack('b', ord(char))
for i in range(left_num):
out_bytes += struct.pack('x')
return out_bytes
def read_data(filename):
"""
Read binary data, like index and mndata
"""
data = None
with open(filename, "rb") as f:
data = f.read()
return data
def pack_models(model_path, out_file="srmodels.bin"):
"""
Pack all models into one binary file by the following format:
{
model_num: int
model1_info: model_info_t
model2_info: model_info_t
...
model1_index,model1_data,model1_MODEL_INFO
model1_index,model1_data,model1_MODEL_INFO
...
}model_pack_t
{
model_name: char[32]
file_number: int
file1_name: char[32]
file1_start: int
file1_len: int
file2_name: char[32]
file2_start: int // data_len = info_start - data_start
file2_len: int
...
}model_info_t
model_path: the path of models
out_file: the ouput binary filename
"""
models = {}
file_num = 0
model_num = 0
for root, dirs, _ in os.walk(model_path):
for model_name in dirs:
models[model_name] = {}
model_dir = os.path.join(root, model_name)
model_num += 1
for _, _, files in os.walk(model_dir):
for file_name in files:
file_num += 1
file_path = os.path.join(model_dir, file_name)
models[model_name][file_name] = read_data(file_path)
model_num = len(models)
header_len = 4 + model_num*(32+4) + file_num*(32+4+4)
out_bin = struct.pack('I', model_num) # model number
data_bin = None
for key in models:
model_bin = struct_pack_string(key, 32) # + model name
model_bin += struct.pack('I', len(models[key])) # + file number in this model
for file_name in models[key]:
model_bin += struct_pack_string(file_name, 32) # + file name
if data_bin == None:
model_bin += struct.pack('I', header_len)
data_bin = models[key][file_name]
model_bin += struct.pack('I', len(models[key][file_name]))
# print(file_name, header_len, len(models[key][file_name]), len(data_bin))
else:
model_bin += struct.pack('I', header_len+len(data_bin))
# print(file_name, header_len+len(data_bin), len(models[key][file_name]))
data_bin += models[key][file_name]
model_bin += struct.pack('I', len(models[key][file_name]))
out_bin += model_bin
assert len(out_bin) == header_len
if data_bin != None:
out_bin += data_bin
out_file = os.path.join(model_path, out_file)
with open(out_file, "wb") as f:
f.write(out_bin)
if __name__ == "__main__":
# input parameter
parser = argparse.ArgumentParser(description='Model package tool')
parser.add_argument('-m', '--model_path', help="the path of model files")
parser.add_argument('-o', '--out_file', default="srmodels.bin", help="the path of binary file")
args = parser.parse_args()
# convert(args.model_path, args.out_file)
pack_models(model_path=args.model_path, out_file=args.out_file)

View File

@@ -0,0 +1,647 @@
# SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
# SPDX-License-Identifier: Apache-2.0
import io
import os
import argparse
import json
import shutil
import math
import sys
import time
import numpy as np
import importlib
import subprocess
import urllib.request
from PIL import Image
from datetime import datetime
from dataclasses import dataclass
from typing import List
from pathlib import Path
from packaging import version
sys.dont_write_bytecode = True
GREEN = '\033[1;32m'
RED = '\033[1;31m'
RESET = '\033[0m'
@dataclass
class AssetCopyConfig:
assets_path: str
target_path: str
spng_enable: bool
sjpg_enable: bool
qoi_enable: bool
sqoi_enable: bool
row_enable: bool
support_format: List[str]
split_height: int
@dataclass
class PackModelsConfig:
target_path: str
include_path: str
image_file: str
assets_path: str
name_length: int
def generate_header_filename(path):
asset_name = os.path.basename(path)
header_filename = f'mmap_generate_{asset_name}.h'
return header_filename
def compute_checksum(data):
checksum = sum(data) & 0xFFFF
return checksum
def sort_key(filename):
basename, extension = os.path.splitext(filename)
return extension, basename
def download_v8_script(convert_path):
"""
Ensure that the lvgl_image_converter repository is present at the specified path.
If not, clone the repository. Then, checkout to a specific commit.
Parameters:
- convert_path (str): The directory path where lvgl_image_converter should be located.
"""
# Check if convert_path is not empty
if convert_path:
# If the directory does not exist, create it and clone the repository
if not os.path.exists(convert_path):
os.makedirs(convert_path, exist_ok=True)
try:
subprocess.run(
['git', 'clone', 'https://github.com/W-Mai/lvgl_image_converter.git', convert_path],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=True
)
except subprocess.CalledProcessError as e:
print(f'Git clone failed: {e}')
sys.exit(1)
# Checkout to the specific commit
try:
subprocess.run(
['git', 'checkout', '9174634e9dcc1b21a63668969406897aad650f35'],
cwd=convert_path,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=True
)
except subprocess.CalledProcessError as e:
print(f'Failed to checkout to the specific commit: {e}')
sys.exit(1)
else:
print('Error: convert_path is NULL')
sys.exit(1)
def download_v9_script(url: str, destination: str) -> None:
"""
Download a Python script from a URL to a local destination.
Parameters:
- url (str): URL to download the script from.
- destination (str): Local path to save the downloaded script.
Raises:
- Exception: If the download fails.
"""
file_path = Path(destination)
# Check if the file already exists
if file_path.exists():
if file_path.is_file():
return
try:
# Create the parent directories if they do not exist
file_path.parent.mkdir(parents=True, exist_ok=True)
# Open the URL and retrieve the data
with urllib.request.urlopen(url) as response, open(file_path, 'wb') as out_file:
data = response.read() # Read the entire response
out_file.write(data) # Write data to the local file
except urllib.error.HTTPError as e:
print(f'HTTP Error: {e.code} - {e.reason} when accessing {url}')
sys.exit(1)
except urllib.error.URLError as e:
print(f'URL Error: {e.reason} when accessing {url}')
sys.exit(1)
except Exception as e:
print(f'An unexpected error occurred: {e}')
sys.exit(1)
def split_image(im, block_size, input_dir, ext, convert_to_qoi):
"""Splits the image into blocks based on the block size."""
width, height = im.size
if block_size:
splits = math.ceil(height / block_size)
else:
splits = 1
for i in range(splits):
if i < splits - 1:
crop = im.crop((0, i * block_size, width, (i + 1) * block_size))
else:
crop = im.crop((0, i * block_size, width, height))
output_path = os.path.join(input_dir, str(i) + ext)
crop.save(output_path, quality=100)
qoi_module = importlib.import_module('qoi-conv.qoi')
Qoi = qoi_module.Qoi
replace_extension = qoi_module.replace_extension
if convert_to_qoi:
with Image.open(output_path) as img:
if img.mode != 'RGBA':
img = img.convert('RGBA')
img_data = np.asarray(img)
out_path = qoi_module.replace_extension(output_path, 'qoi')
new_image = qoi_module.Qoi().save(out_path, img_data)
os.remove(output_path)
return width, height, splits
def create_header(width, height, splits, split_height, lenbuf, ext):
"""Creates the header for the output file based on the format."""
header = bytearray()
if ext.lower() == '.jpg':
header += bytearray('_SJPG__'.encode('UTF-8'))
elif ext.lower() == '.png':
header += bytearray('_SPNG__'.encode('UTF-8'))
elif ext.lower() == '.qoi':
header += bytearray('_SQOI__'.encode('UTF-8'))
# 6 BYTES VERSION
header += bytearray(('\x00V1.00\x00').encode('UTF-8'))
# WIDTH 2 BYTES
header += width.to_bytes(2, byteorder='little')
# HEIGHT 2 BYTES
header += height.to_bytes(2, byteorder='little')
# NUMBER OF ITEMS 2 BYTES
header += splits.to_bytes(2, byteorder='little')
# SPLIT HEIGHT 2 BYTES
header += split_height.to_bytes(2, byteorder='little')
for item_len in lenbuf:
# LENGTH 2 BYTES
header += item_len.to_bytes(2, byteorder='little')
return header
def save_image(output_file_path, header, split_data):
"""Saves the image with the constructed header and split data."""
with open(output_file_path, 'wb') as f:
if header is not None:
f.write(header + split_data)
else:
f.write(split_data)
def handle_lvgl_version_v9(input_file: str, input_dir: str,
input_filename: str, convert_path: str) -> None:
"""
Handle conversion for LVGL versions greater than 9.0.
Parameters:
- input_file (str): Path to the input image file.
- input_dir (str): Directory of the input file.
- input_filename (str): Name of the input file.
- convert_path (str): Path for conversion scripts and outputs.
"""
convert_file = os.path.join(convert_path, 'LVGLImage.py')
lvgl_image_url = 'https://raw.githubusercontent.com/lvgl/lvgl/master/scripts/LVGLImage.py'
download_v9_script(url=lvgl_image_url, destination=convert_file)
lvgl_script = Path(convert_file)
cmd = [
'python',
str(lvgl_script),
'--ofmt', 'BIN',
'--cf', config_data['support_raw_cf'],
'--compress', 'NONE',
'--output', str(input_dir),
input_file
]
try:
result = subprocess.run(
cmd,
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
print(f'Completed {input_filename} -> BIN')
except subprocess.CalledProcessError as e:
print('An error occurred while executing LVGLImage.py:')
print(e.stderr)
sys.exit(e.returncode)
def handle_lvgl_version_v8(input_file: str, input_dir: str, input_filename: str, convert_path: str) -> None:
"""
Handle conversion for supported LVGL versions (<= 9.0).
Parameters:
- input_file (str): Path to the input image file.
- input_dir (str): Directory of the input file.
- input_filename (str): Name of the input file.
- convert_path (str): Path for conversion scripts and outputs.
"""
download_v8_script(convert_path=convert_path)
if convert_path not in sys.path:
sys.path.append(convert_path)
try:
import lv_img_conv
except ImportError as e:
print(f"Failed to import 'lv_img_conv' from '{convert_path}': {e}")
sys.exit(1)
try:
lv_img_conv.conv_one_file(
root=Path(input_dir),
filepath=Path(input_file),
f=config_data['support_raw_ff'],
cf=config_data['support_raw_cf'],
ff='BIN',
dither=config_data['support_raw_dither'],
bgr_mode=config_data['support_raw_bgr'],
)
print(f'Completed {input_filename} -> BIN')
except KeyError as e:
print(f'Missing configuration key: {e}')
sys.exit(1)
except Exception as e:
print(f'An error occurred during conversion: {e}')
sys.exit(1)
def process_image(input_file, height_str, output_extension, convert_to_qoi=False):
"""Main function to process the image and save it as .sjpg, .spng, or .sqoi."""
try:
SPLIT_HEIGHT = int(height_str)
if SPLIT_HEIGHT < 0:
raise ValueError('Height must be a positive integer')
except ValueError as e:
print('Error: Height must be a positive integer')
sys.exit(1)
input_dir, input_filename = os.path.split(input_file)
base_filename, ext = os.path.splitext(input_filename)
OUTPUT_FILE_NAME = base_filename
try:
im = Image.open(input_file)
except Exception as e:
print('Error:', e)
sys.exit(0)
width, height, splits = split_image(im, SPLIT_HEIGHT, input_dir, ext, convert_to_qoi)
split_data = bytearray()
lenbuf = []
if convert_to_qoi:
ext = '.qoi'
for i in range(splits):
with open(os.path.join(input_dir, str(i) + ext), 'rb') as f:
a = f.read()
split_data += a
lenbuf.append(len(a))
os.remove(os.path.join(input_dir, str(i) + ext))
header = None
if splits == 1 and convert_to_qoi:
output_file_path = os.path.join(input_dir, OUTPUT_FILE_NAME + ext)
else:
header = create_header(width, height, splits, SPLIT_HEIGHT, lenbuf, ext)
output_file_path = os.path.join(input_dir, OUTPUT_FILE_NAME + output_extension)
save_image(output_file_path, header, split_data)
print('Completed', input_filename, '->', os.path.basename(output_file_path))
def convert_image_to_qoi(input_file, height_str):
process_image(input_file, height_str, '.sqoi', convert_to_qoi=True)
def convert_image_to_simg(input_file, height_str):
input_dir, input_filename = os.path.split(input_file)
_, ext = os.path.splitext(input_filename)
output_extension = '.sjpg' if ext.lower() == '.jpg' else '.spng'
process_image(input_file, height_str, output_extension, convert_to_qoi=False)
def convert_image_to_raw(input_file: str) -> None:
"""
Convert an image to raw binary format compatible with LVGL.
Parameters:
- input_file (str): Path to the input image file.
Raises:
- FileNotFoundError: If required scripts are not found.
- subprocess.CalledProcessError: If the external conversion script fails.
- KeyError: If required keys are missing in config_data.
"""
input_dir, input_filename = os.path.split(input_file)
_, ext = os.path.splitext(input_filename)
convert_path = os.path.join(os.path.dirname(input_file), 'lvgl_image_converter')
lvgl_ver_str = config_data.get('lvgl_ver', '9.0.0')
try:
lvgl_version = version.parse(lvgl_ver_str)
except version.InvalidVersion:
print(f'Invalid LVGL version format: {lvgl_ver_str}')
sys.exit(1)
if lvgl_version >= version.parse('9.0.0'):
handle_lvgl_version_v9(
input_file=input_file,
input_dir=input_dir,
input_filename=input_filename,
convert_path=convert_path
)
else:
handle_lvgl_version_v8(
input_file=input_file,
input_dir=input_dir,
input_filename=input_filename,
convert_path=convert_path
)
def pack_assets(config: PackModelsConfig):
"""
Pack models based on the provided configuration.
"""
target_path = config.target_path
assets_include_path = config.include_path
out_file = config.image_file
assets_path = config.assets_path
max_name_len = config.name_length
merged_data = bytearray()
file_info_list = []
skip_files = ['config.json', 'lvgl_image_converter']
file_list = sorted(os.listdir(target_path), key=sort_key)
for filename in file_list:
if filename in skip_files:
continue
file_path = os.path.join(target_path, filename)
file_name = os.path.basename(file_path)
file_size = os.path.getsize(file_path)
try:
img = Image.open(file_path)
width, height = img.size
except Exception as e:
# print("Error:", e)
_, file_extension = os.path.splitext(file_path)
if file_extension.lower() in ['.sjpg', '.spng', '.sqoi']:
offset = 14
with open(file_path, 'rb') as f:
f.seek(offset)
width_bytes = f.read(2)
height_bytes = f.read(2)
width = int.from_bytes(width_bytes, byteorder='little')
height = int.from_bytes(height_bytes, byteorder='little')
else:
width, height = 0, 0
file_info_list.append((file_name, len(merged_data), file_size, width, height))
# Add 0x5A5A prefix to merged_data
merged_data.extend(b'\x5A' * 2)
with open(file_path, 'rb') as bin_file:
bin_data = bin_file.read()
merged_data.extend(bin_data)
total_files = len(file_info_list)
mmap_table = bytearray()
for file_name, offset, file_size, width, height in file_info_list:
if len(file_name) > int(max_name_len):
print(f'\033[1;33mWarn:\033[0m "{file_name}" exceeds {max_name_len} bytes and will be truncated.')
fixed_name = file_name.ljust(int(max_name_len), '\0')[:int(max_name_len)]
mmap_table.extend(fixed_name.encode('utf-8'))
mmap_table.extend(file_size.to_bytes(4, byteorder='little'))
mmap_table.extend(offset.to_bytes(4, byteorder='little'))
mmap_table.extend(width.to_bytes(2, byteorder='little'))
mmap_table.extend(height.to_bytes(2, byteorder='little'))
combined_data = mmap_table + merged_data
combined_checksum = compute_checksum(combined_data)
combined_data_length = len(combined_data).to_bytes(4, byteorder='little')
header_data = total_files.to_bytes(4, byteorder='little') + combined_checksum.to_bytes(4, byteorder='little')
final_data = header_data + combined_data_length + combined_data
with open(out_file, 'wb') as output_bin:
output_bin.write(final_data)
os.makedirs(assets_include_path, exist_ok=True)
current_year = datetime.now().year
asset_name = os.path.basename(assets_path)
file_path = os.path.join(assets_include_path, f'mmap_generate_{asset_name}.h')
with open(file_path, 'w') as output_header:
output_header.write('/*\n')
output_header.write(' * SPDX-FileCopyrightText: 2022-{} Espressif Systems (Shanghai) CO LTD\n'.format(current_year))
output_header.write(' *\n')
output_header.write(' * SPDX-License-Identifier: Apache-2.0\n')
output_header.write(' */\n\n')
output_header.write('/**\n')
output_header.write(' * @file\n')
output_header.write(" * @brief This file was generated by esp_mmap_assets, don't modify it\n")
output_header.write(' */\n\n')
output_header.write('#pragma once\n\n')
output_header.write("#include \"esp_mmap_assets.h\"\n\n")
output_header.write(f'#define MMAP_{asset_name.upper()}_FILES {total_files}\n')
output_header.write(f'#define MMAP_{asset_name.upper()}_CHECKSUM 0x{combined_checksum:04X}\n\n')
output_header.write(f'enum MMAP_{asset_name.upper()}_LISTS {{\n')
for i, (file_name, _, _, _, _) in enumerate(file_info_list):
enum_name = file_name.replace('.', '_')
output_header.write(f' MMAP_{asset_name.upper()}_{enum_name.upper()} = {i}, /*!< {file_name} */\n')
output_header.write('};\n')
print(f'All bin files have been merged into {os.path.basename(out_file)}')
def copy_assets(config: AssetCopyConfig):
"""
Copy assets to target_path based on the provided configuration.
"""
format_tuple = tuple(config.support_format)
assets_path = config.assets_path
target_path = config.target_path
for filename in os.listdir(assets_path):
if any(filename.endswith(suffix) for suffix in format_tuple):
source_file = os.path.join(assets_path, filename)
target_file = os.path.join(target_path, filename)
shutil.copyfile(source_file, target_file)
conversion_map = {
'.jpg': [
(config.sjpg_enable, convert_image_to_simg),
(config.qoi_enable, convert_image_to_qoi),
],
'.png': [
(config.spng_enable, convert_image_to_simg),
(config.qoi_enable, convert_image_to_qoi),
],
}
file_ext = os.path.splitext(filename)[1].lower()
conversions = conversion_map.get(file_ext, [])
converted = False
for enable_flag, convert_func in conversions:
if enable_flag:
convert_func(target_file, config.split_height)
os.remove(target_file)
converted = True
break
if not converted and config.row_enable:
convert_image_to_raw(target_file)
os.remove(target_file)
else:
print(f'No match found for file: {filename}, format_tuple: {format_tuple}')
def process_assets_build(config_data):
assets_path = config_data['assets_path']
image_file = config_data['image_file']
target_path = os.path.dirname(image_file)
include_path = config_data['include_path']
name_length = config_data['name_length']
split_height = config_data['split_height']
support_format = [fmt.strip() for fmt in config_data['support_format'].split(',')]
copy_config = AssetCopyConfig(
assets_path=assets_path,
target_path=target_path,
spng_enable=config_data['support_spng'],
sjpg_enable=config_data['support_sjpg'],
qoi_enable=config_data['support_qoi'],
sqoi_enable=config_data['support_sqoi'],
row_enable=config_data['support_raw'],
support_format=support_format,
split_height=split_height
)
pack_config = PackModelsConfig(
target_path=target_path,
include_path=include_path,
image_file=image_file,
assets_path=assets_path,
name_length=name_length
)
print('--support_format:', support_format)
if '.jpg' in support_format or '.png' in support_format:
print('--support_spng:', copy_config.spng_enable)
print('--support_sjpg:', copy_config.sjpg_enable)
print('--support_qoi:', copy_config.qoi_enable)
print('--support_raw:', copy_config.row_enable)
if copy_config.sqoi_enable:
print('--support_sqoi:', copy_config.sqoi_enable)
if copy_config.spng_enable or copy_config.sjpg_enable or copy_config.sqoi_enable:
print('--split_height:', copy_config.split_height)
if copy_config.row_enable:
print('--lvgl_version:', config_data['lvgl_ver'])
if not os.path.exists(target_path):
os.makedirs(target_path, exist_ok=True)
for filename in os.listdir(target_path):
file_path = os.path.join(target_path, filename)
if os.path.isfile(file_path) or os.path.islink(file_path):
os.unlink(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path)
copy_assets(copy_config)
pack_assets(pack_config)
total_size = os.path.getsize(os.path.join(target_path, image_file))
recommended_size = math.ceil(total_size / 1024)
partition_size = math.ceil(int(config_data['assets_size'], 16))
print(f'{"Total size:":<30} {GREEN}{total_size / 1024:>8.2f}K ({total_size}){RESET}')
print(f'{"Partition size:":<30} {GREEN}{partition_size / 1024:>8.2f}K ({partition_size}){RESET}')
if int(config_data['assets_size'], 16) <= total_size:
print(f'Recommended partition size: {GREEN}{recommended_size}K{RESET}')
print(f'{RED}Error:Binary size exceeds partition size.{RESET}')
sys.exit(1)
def process_assets_merge(config_data):
app_bin_path = config_data['app_bin_path']
image_file = config_data['image_file']
target_path = os.path.dirname(image_file)
combined_bin_path = os.path.join(target_path, 'combined.bin')
append_bin_path = os.path.join(target_path, image_file)
app_size = os.path.getsize(app_bin_path)
asset_size = os.path.getsize(append_bin_path)
total_size = asset_size + app_size
recommended_size = math.ceil(total_size / 1024)
partition_size = math.ceil(int(config_data['assets_size'], 16))
print(f'{"Asset size:":<30} {GREEN}{asset_size / 1024:>8.2f}K ({asset_size}){RESET}')
print(f'{"App size:":<30} {GREEN}{app_size / 1024:>8.2f}K ({app_size}){RESET}')
print(f'{"Total size:":<30} {GREEN}{total_size / 1024:>8.2f}K ({total_size}){RESET}')
print(f'{"Partition size:":<30} {GREEN}{partition_size / 1024:>8.2f}K ({partition_size}){RESET}')
if total_size > partition_size:
print(f'Recommended partition size: {GREEN}{recommended_size}K{RESET}')
print(f'{RED}Error:Binary size exceeds partition size.{RESET}')
sys.exit(1)
with open(combined_bin_path, 'wb') as combined_bin:
with open(app_bin_path, 'rb') as app_bin:
combined_bin.write(app_bin.read())
with open(append_bin_path, 'rb') as img_bin:
combined_bin.write(img_bin.read())
shutil.move(combined_bin_path, app_bin_path)
print(f'Append bin created: {os.path.basename(app_bin_path)}')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Move and Pack assets.')
parser.add_argument('--config', required=True, help='Path to the configuration file')
parser.add_argument('--merge', action='store_true', help='Merge assets with app binary')
args = parser.parse_args()
with open(args.config, 'r') as f:
config_data = json.load(f)
if args.merge:
process_assets_merge(config_data)
else:
process_assets_build(config_data)

View File

@@ -1,247 +1,247 @@
#! /usr/bin/env python3
from dotenv import load_dotenv
load_dotenv()
import os
import struct
import zipfile
import oss2
import json
import requests
from requests.exceptions import RequestException
# 切换到项目根目录
os.chdir(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
def get_chip_id_string(chip_id):
return {
0x0000: "esp32",
0x0002: "esp32s2",
0x0005: "esp32c3",
0x0009: "esp32s3",
0x000C: "esp32c2",
0x000D: "esp32c6",
0x0010: "esp32h2",
0x0011: "esp32c5",
0x0012: "esp32p4",
0x0017: "esp32c5",
}[chip_id]
def get_flash_size(flash_size):
MB = 1024 * 1024
return {
0x00: 1 * MB,
0x01: 2 * MB,
0x02: 4 * MB,
0x03: 8 * MB,
0x04: 16 * MB,
0x05: 32 * MB,
0x06: 64 * MB,
0x07: 128 * MB,
}[flash_size]
def get_app_desc(data):
magic = struct.unpack("<I", data[0x00:0x04])[0]
if magic != 0xabcd5432:
raise Exception("Invalid app desc magic")
version = data[0x10:0x30].decode("utf-8").strip('\0')
project_name = data[0x30:0x50].decode("utf-8").strip('\0')
time = data[0x50:0x60].decode("utf-8").strip('\0')
date = data[0x60:0x70].decode("utf-8").strip('\0')
idf_ver = data[0x70:0x90].decode("utf-8").strip('\0')
elf_sha256 = data[0x90:0xb0].hex()
return {
"name": project_name,
"version": version,
"compile_time": date + "T" + time,
"idf_version": idf_ver,
"elf_sha256": elf_sha256,
}
def get_board_name(folder):
basename = os.path.basename(folder)
if basename.startswith("v0.2"):
return "bread-simple"
if basename.startswith("v0.3") or basename.startswith("v0.4") or basename.startswith("v0.5") or basename.startswith("v0.6"):
if "ML307" in basename:
return "bread-compact-ml307"
elif "WiFi" in basename:
return "bread-compact-wifi"
elif "KevinBox1" in basename:
return "kevin-box-1"
if basename.startswith("v0.7") or basename.startswith("v0.8") or basename.startswith("v0.9") or basename.startswith("v1.") or basename.startswith("v2."):
return basename.split("_")[1]
raise Exception(f"Unknown board name: {basename}")
def find_app_partition(data):
partition_begin = 0x8000
partition_end = partition_begin + 0x4000
# find the first parition with type 0x00
for i in range(partition_begin, partition_end, 0x20):
# magic is aa 50
if data[i] == 0xaa and data[i + 1] == 0x50:
# type is app
if data[i + 2] == 0x00:
# read offset and size
offset = struct.unpack("<I", data[i + 4:i + 8])[0]
size = struct.unpack("<I", data[i + 8:i + 12])[0]
# then 16 bytes is label
label = data[i + 12:i + 28].decode("utf-8").strip('\0')
print(f"found app partition at 0x{i:08x}, offset: 0x{offset:08x}, size: 0x{size:08x}, label: {label}")
return {
"offset": offset,
"size": size,
"label": label,
}
return None
def read_binary(dir_path):
merged_bin_path = os.path.join(dir_path, "merged-binary.bin")
merged_bin_data = open(merged_bin_path, "rb").read()
# find app partition
app_partition = find_app_partition(merged_bin_data)
if app_partition is None:
print("no app partition found")
return
app_data = merged_bin_data[app_partition["offset"]:app_partition["offset"] + app_partition["size"]]
# check magic
if app_data[0] != 0xE9:
print("not a valid image")
return
# get flash size
flash_size = get_flash_size(app_data[0x3] >> 4)
chip_id = get_chip_id_string(app_data[0xC])
# get segments
segment_count = app_data[0x1]
segments = []
offset = 0x18
image_size = 0x18
for i in range(segment_count):
segment_size = struct.unpack("<I", app_data[offset + 4:offset + 8])[0]
image_size += 8 + segment_size
offset += 8
segment_data = app_data[offset:offset + segment_size]
offset += segment_size
segments.append(segment_data)
assert offset < len(app_data), "offset is out of bounds"
# add checksum size
image_size += 1
image_size = (image_size + 15) & ~15
# hash appended
if app_data[0x17] == 1:
image_size += 32
print(f"image size: {image_size}")
# verify the remaining data are all 0xFF
for i in range(image_size, len(app_data)):
if app_data[i] != 0xFF:
print(f"Failed to verify image, data at 0x{i:08x} is not 0xFF")
return
image_data = app_data[:image_size]
# extract bin file
bin_path = os.path.join(dir_path, "xiaozhi.bin")
if not os.path.exists(bin_path):
print("extract bin file to", bin_path)
open(bin_path, "wb").write(image_data)
# The app desc is in the first segment
desc = get_app_desc(segments[0])
return {
"chip_id": chip_id,
"flash_size": flash_size,
"board": get_board_name(dir_path),
"application": desc,
"firmware_size": image_size,
}
def extract_zip(zip_path, extract_path):
if not os.path.exists(extract_path):
os.makedirs(extract_path)
print(f"Extracting {zip_path} to {extract_path}")
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(extract_path)
def upload_dir_to_oss(source_dir, target_dir):
auth = oss2.Auth(os.environ['OSS_ACCESS_KEY_ID'], os.environ['OSS_ACCESS_KEY_SECRET'])
bucket = oss2.Bucket(auth, os.environ['OSS_ENDPOINT'], os.environ['OSS_BUCKET_NAME'])
for filename in os.listdir(source_dir):
oss_key = os.path.join(target_dir, filename)
print('uploading', oss_key)
bucket.put_object(oss_key, open(os.path.join(source_dir, filename), 'rb'))
def post_info_to_server(info):
"""
将固件信息发送到服务器
Args:
info: 包含固件信息的字典
"""
try:
# 从环境变量获取服务器URL和token
server_url = os.environ.get('VERSIONS_SERVER_URL')
server_token = os.environ.get('VERSIONS_TOKEN')
if not server_url or not server_token:
raise Exception("Missing SERVER_URL or TOKEN in environment variables")
# 准备请求头和数据
headers = {
'Authorization': f'Bearer {server_token}',
'Content-Type': 'application/json'
}
# 发送POST请求
response = requests.post(
server_url,
headers=headers,
json={'jsonData': json.dumps(info)}
)
# 检查响应状态
response.raise_for_status()
print(f"Successfully uploaded version info for tag: {info['tag']}")
except RequestException as e:
if hasattr(e.response, 'json'):
error_msg = e.response.json().get('error', str(e))
else:
error_msg = str(e)
print(f"Failed to upload version info: {error_msg}")
raise
except Exception as e:
print(f"Error uploading version info: {str(e)}")
raise
def main():
release_dir = "releases"
# look for zip files startswith "v"
for name in os.listdir(release_dir):
if name.startswith("v") and name.endswith(".zip"):
tag = name[:-4]
folder = os.path.join(release_dir, tag)
info_path = os.path.join(folder, "info.json")
if not os.path.exists(info_path):
if not os.path.exists(folder):
os.makedirs(folder)
extract_zip(os.path.join(release_dir, name), folder)
info = read_binary(folder)
target_dir = os.path.join("firmwares", tag)
info["tag"] = tag
info["url"] = os.path.join(os.environ['OSS_BUCKET_URL'], target_dir, "xiaozhi.bin")
open(info_path, "w").write(json.dumps(info, indent=4))
# upload all file to oss
upload_dir_to_oss(folder, target_dir)
# read info.json
info = json.load(open(info_path))
# post info.json to server
post_info_to_server(info)
if __name__ == "__main__":
#! /usr/bin/env python3
from dotenv import load_dotenv
load_dotenv()
import os
import struct
import zipfile
import oss2
import json
import requests
from requests.exceptions import RequestException
# 切换到项目根目录
os.chdir(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
def get_chip_id_string(chip_id):
return {
0x0000: "esp32",
0x0002: "esp32s2",
0x0005: "esp32c3",
0x0009: "esp32s3",
0x000C: "esp32c2",
0x000D: "esp32c6",
0x0010: "esp32h2",
0x0011: "esp32c5",
0x0012: "esp32p4",
0x0017: "esp32c5",
}[chip_id]
def get_flash_size(flash_size):
MB = 1024 * 1024
return {
0x00: 1 * MB,
0x01: 2 * MB,
0x02: 4 * MB,
0x03: 8 * MB,
0x04: 16 * MB,
0x05: 32 * MB,
0x06: 64 * MB,
0x07: 128 * MB,
}[flash_size]
def get_app_desc(data):
magic = struct.unpack("<I", data[0x00:0x04])[0]
if magic != 0xabcd5432:
raise Exception("Invalid app desc magic")
version = data[0x10:0x30].decode("utf-8").strip('\0')
project_name = data[0x30:0x50].decode("utf-8").strip('\0')
time = data[0x50:0x60].decode("utf-8").strip('\0')
date = data[0x60:0x70].decode("utf-8").strip('\0')
idf_ver = data[0x70:0x90].decode("utf-8").strip('\0')
elf_sha256 = data[0x90:0xb0].hex()
return {
"name": project_name,
"version": version,
"compile_time": date + "T" + time,
"idf_version": idf_ver,
"elf_sha256": elf_sha256,
}
def get_board_name(folder):
basename = os.path.basename(folder)
if basename.startswith("v0.2"):
return "bread-simple"
if basename.startswith("v0.3") or basename.startswith("v0.4") or basename.startswith("v0.5") or basename.startswith("v0.6"):
if "ML307" in basename:
return "bread-compact-ml307"
elif "WiFi" in basename:
return "bread-compact-wifi"
elif "KevinBox1" in basename:
return "kevin-box-1"
if basename.startswith("v0.7") or basename.startswith("v0.8") or basename.startswith("v0.9") or basename.startswith("v1.") or basename.startswith("v2."):
return basename.split("_")[1]
raise Exception(f"Unknown board name: {basename}")
def find_app_partition(data):
partition_begin = 0x8000
partition_end = partition_begin + 0x4000
# find the first parition with type 0x00
for i in range(partition_begin, partition_end, 0x20):
# magic is aa 50
if data[i] == 0xaa and data[i + 1] == 0x50:
# type is app
if data[i + 2] == 0x00:
# read offset and size
offset = struct.unpack("<I", data[i + 4:i + 8])[0]
size = struct.unpack("<I", data[i + 8:i + 12])[0]
# then 16 bytes is label
label = data[i + 12:i + 28].decode("utf-8").strip('\0')
print(f"found app partition at 0x{i:08x}, offset: 0x{offset:08x}, size: 0x{size:08x}, label: {label}")
return {
"offset": offset,
"size": size,
"label": label,
}
return None
def read_binary(dir_path):
merged_bin_path = os.path.join(dir_path, "merged-binary.bin")
merged_bin_data = open(merged_bin_path, "rb").read()
# find app partition
app_partition = find_app_partition(merged_bin_data)
if app_partition is None:
print("no app partition found")
return
app_data = merged_bin_data[app_partition["offset"]:app_partition["offset"] + app_partition["size"]]
# check magic
if app_data[0] != 0xE9:
print("not a valid image")
return
# get flash size
flash_size = get_flash_size(app_data[0x3] >> 4)
chip_id = get_chip_id_string(app_data[0xC])
# get segments
segment_count = app_data[0x1]
segments = []
offset = 0x18
image_size = 0x18
for i in range(segment_count):
segment_size = struct.unpack("<I", app_data[offset + 4:offset + 8])[0]
image_size += 8 + segment_size
offset += 8
segment_data = app_data[offset:offset + segment_size]
offset += segment_size
segments.append(segment_data)
assert offset < len(app_data), "offset is out of bounds"
# add checksum size
image_size += 1
image_size = (image_size + 15) & ~15
# hash appended
if app_data[0x17] == 1:
image_size += 32
print(f"image size: {image_size}")
# verify the remaining data are all 0xFF
for i in range(image_size, len(app_data)):
if app_data[i] != 0xFF:
print(f"Failed to verify image, data at 0x{i:08x} is not 0xFF")
return
image_data = app_data[:image_size]
# extract bin file
bin_path = os.path.join(dir_path, "xiaozhi.bin")
if not os.path.exists(bin_path):
print("extract bin file to", bin_path)
open(bin_path, "wb").write(image_data)
# The app desc is in the first segment
desc = get_app_desc(segments[0])
return {
"chip_id": chip_id,
"flash_size": flash_size,
"board": get_board_name(dir_path),
"application": desc,
"firmware_size": image_size,
}
def extract_zip(zip_path, extract_path):
if not os.path.exists(extract_path):
os.makedirs(extract_path)
print(f"Extracting {zip_path} to {extract_path}")
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(extract_path)
def upload_dir_to_oss(source_dir, target_dir):
auth = oss2.Auth(os.environ['OSS_ACCESS_KEY_ID'], os.environ['OSS_ACCESS_KEY_SECRET'])
bucket = oss2.Bucket(auth, os.environ['OSS_ENDPOINT'], os.environ['OSS_BUCKET_NAME'])
for filename in os.listdir(source_dir):
oss_key = os.path.join(target_dir, filename)
print('uploading', oss_key)
bucket.put_object(oss_key, open(os.path.join(source_dir, filename), 'rb'))
def post_info_to_server(info):
"""
将固件信息发送到服务器
Args:
info: 包含固件信息的字典
"""
try:
# 从环境变量获取服务器URL和token
server_url = os.environ.get('VERSIONS_SERVER_URL')
server_token = os.environ.get('VERSIONS_TOKEN')
if not server_url or not server_token:
raise Exception("Missing SERVER_URL or TOKEN in environment variables")
# 准备请求头和数据
headers = {
'Authorization': f'Bearer {server_token}',
'Content-Type': 'application/json'
}
# 发送POST请求
response = requests.post(
server_url,
headers=headers,
json={'jsonData': json.dumps(info)}
)
# 检查响应状态
response.raise_for_status()
print(f"Successfully uploaded version info for tag: {info['tag']}")
except RequestException as e:
if hasattr(e.response, 'json'):
error_msg = e.response.json().get('error', str(e))
else:
error_msg = str(e)
print(f"Failed to upload version info: {error_msg}")
raise
except Exception as e:
print(f"Error uploading version info: {str(e)}")
raise
def main():
release_dir = "releases"
# look for zip files startswith "v"
for name in os.listdir(release_dir):
if name.startswith("v") and name.endswith(".zip"):
tag = name[:-4]
folder = os.path.join(release_dir, tag)
info_path = os.path.join(folder, "info.json")
if not os.path.exists(info_path):
if not os.path.exists(folder):
os.makedirs(folder)
extract_zip(os.path.join(release_dir, name), folder)
info = read_binary(folder)
target_dir = os.path.join("firmwares", tag)
info["tag"] = tag
info["url"] = os.path.join(os.environ['OSS_BUCKET_URL'], target_dir, "xiaozhi.bin")
open(info_path, "w").write(json.dumps(info, indent=4))
# upload all file to oss
upload_dir_to_oss(folder, target_dir)
# read info.json
info = json.load(open(info_path))
# post info.json to server
post_info_to_server(info)
if __name__ == "__main__":
main()