add some code

This commit is contained in:
2025-09-05 13:25:11 +08:00
parent 9ff0a99e7a
commit 3cf1229a85
8911 changed files with 2535396 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,45 @@
# LVGL图片转换工具
这个目录包含两个用于处理和转换图片为LVGL格式的Python脚本
## 1. LVGLImage (LVGLImage.py)
引用自LVGL[官方repo](https://github.com/lvgl/lvgl)的转换脚本[LVGLImage.py](https://github.com/lvgl/lvgl/blob/master/scripts/LVGLImage.py)
## 2. LVGL图片转换工具 (lvgl_tools_gui.py)
调用`LVGLImage.py`将图片批量转换为LVGL图片格式
可用于修改小智的默认表情,具体修改教程[在这里](https://www.bilibili.com/video/BV12FQkYeEJ3/)
### 特性
- 图形化操作,界面更友好
- 支持批量转换图片
- 自动识别图片格式并选择最佳的颜色格式转换
- 多分辨率支持
### 使用方法
创建虚拟环境
```bash
# 创建 venv
python -m venv venv
# 激活环境
source venv/bin/activate # Linux/Mac
venv\Scripts\activate # Windows
```
安装依赖
```bash
pip install -r requirements.txt
```
运行转换工具
```bash
# 激活环境
source venv/bin/activate # Linux/Mac
venv\Scripts\activate # Windows
# 运行
python lvgl_tools_gui.py
```

View File

@@ -0,0 +1,253 @@
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
from PIL import Image
import os
import tempfile
import sys
from LVGLImage import LVGLImage, ColorFormat, CompressMethod
HELP_TEXT = """LVGL图片转换工具使用说明
1. 添加文件:点击“添加文件”按钮选择需要转换的图片,支持批量导入
2. 移除文件:在列表中选中文件前的复选框“[ ]”(选中后会变成“[√]”),点击“移除选中”可删除选定文件
3. 设置分辨率选择需要的分辨率如128x128
建议根据自己的设备的屏幕分辨率来选择。过大和过小都会影响显示效果。
4. 颜色格式:选择“自动识别”会根据图片是否透明自动选择,或手动指定
除非你了解这个选项,否则建议使用自动识别,不然可能会出现一些意想不到的问题……
5. 压缩方式选择NONE或RLE压缩
除非你了解这个选项否则建议保持默认NONE不压缩
6. 输出目录:设置转换后文件的保存路径
默认为程序所在目录下的output文件夹
7. 转换:点击“转换全部”或“转换选中”开始转换
"""
class ImageConverterApp:
def __init__(self, root):
self.root = root
self.root.title("LVGL图片转换工具")
self.root.geometry("750x650")
# 初始化变量
self.output_dir = tk.StringVar(value=os.path.abspath("output"))
self.resolution = tk.StringVar(value="128x128")
self.color_format = tk.StringVar(value="自动识别")
self.compress_method = tk.StringVar(value="NONE")
# 创建UI组件
self.create_widgets()
self.redirect_output()
def create_widgets(self):
# 参数设置框架
settings_frame = ttk.LabelFrame(self.root, text="转换设置")
settings_frame.grid(row=0, column=0, padx=10, pady=5, sticky="ew")
# 分辨率设置
ttk.Label(settings_frame, text="分辨率:").grid(row=0, column=0, padx=2)
ttk.Combobox(settings_frame, textvariable=self.resolution,
values=["512x512", "256x256", "128x128", "64x64", "32x32"], width=8).grid(row=0, column=1, padx=2)
# 颜色格式
ttk.Label(settings_frame, text="颜色格式:").grid(row=0, column=2, padx=2)
ttk.Combobox(settings_frame, textvariable=self.color_format,
values=["自动识别", "RGB565", "RGB565A8"], width=10).grid(row=0, column=3, padx=2)
# 压缩方式
ttk.Label(settings_frame, text="压缩方式:").grid(row=0, column=4, padx=2)
ttk.Combobox(settings_frame, textvariable=self.compress_method,
values=["NONE", "RLE"], width=8).grid(row=0, column=5, padx=2)
# 文件操作框架
file_frame = ttk.LabelFrame(self.root, text="选取文件")
file_frame.grid(row=1, column=0, padx=10, pady=5, sticky="nsew")
# 文件操作按钮
btn_frame = ttk.Frame(file_frame)
btn_frame.pack(fill=tk.X, pady=2)
ttk.Button(btn_frame, text="添加文件", command=self.select_files).pack(side=tk.LEFT, padx=2)
ttk.Button(btn_frame, text="移除选中", command=self.remove_selected).pack(side=tk.LEFT, padx=2)
ttk.Button(btn_frame, text="清空列表", command=self.clear_files).pack(side=tk.LEFT, padx=2)
# 文件列表Treeview
self.tree = ttk.Treeview(file_frame, columns=("selected", "filename"),
show="headings", height=10)
self.tree.heading("selected", text="选择", anchor=tk.W)
self.tree.heading("filename", text="文件名", anchor=tk.W)
self.tree.column("selected", width=60, anchor=tk.W)
self.tree.column("filename", width=600, anchor=tk.W)
self.tree.pack(fill=tk.BOTH, expand=True)
self.tree.bind("<ButtonRelease-1>", self.on_tree_click)
# 输出目录
output_frame = ttk.LabelFrame(self.root, text="输出目录")
output_frame.grid(row=2, column=0, padx=10, pady=5, sticky="ew")
ttk.Entry(output_frame, textvariable=self.output_dir, width=60).pack(side=tk.LEFT, padx=5)
ttk.Button(output_frame, text="浏览", command=self.select_output_dir).pack(side=tk.RIGHT, padx=5)
# 转换按钮和帮助按钮
convert_frame = ttk.Frame(self.root)
convert_frame.grid(row=3, column=0, padx=10, pady=10)
ttk.Button(convert_frame, text="转换全部文件", command=lambda: self.start_conversion(True)).pack(side=tk.LEFT, padx=5)
ttk.Button(convert_frame, text="转换选中文件", command=lambda: self.start_conversion(False)).pack(side=tk.LEFT, padx=5)
ttk.Button(convert_frame, text="帮助", command=self.show_help).pack(side=tk.RIGHT, padx=5)
# 日志区域(新增清空按钮部分)
log_frame = ttk.LabelFrame(self.root, text="日志")
log_frame.grid(row=4, column=0, padx=10, pady=5, sticky="nsew")
# 添加按钮框架
log_btn_frame = ttk.Frame(log_frame)
log_btn_frame.pack(fill=tk.X, side=tk.BOTTOM)
# 清空日志按钮
ttk.Button(log_btn_frame, text="清空日志", command=self.clear_log).pack(side=tk.RIGHT, padx=5, pady=2)
self.log_text = tk.Text(log_frame, height=15)
self.log_text.pack(fill=tk.BOTH, expand=True)
# 布局配置
self.root.columnconfigure(0, weight=1)
self.root.rowconfigure(1, weight=1)
self.root.rowconfigure(4, weight=1)
def clear_log(self):
"""清空日志内容"""
self.log_text.delete(1.0, tk.END)
def show_help(self):
messagebox.showinfo("帮助", HELP_TEXT)
def redirect_output(self):
class StdoutRedirector:
def __init__(self, text_widget):
self.text_widget = text_widget
self.original_stdout = sys.stdout
def write(self, message):
self.text_widget.insert(tk.END, message)
self.text_widget.see(tk.END)
self.original_stdout.write(message)
def flush(self):
self.original_stdout.flush()
sys.stdout = StdoutRedirector(self.log_text)
def on_tree_click(self, event):
region = self.tree.identify("region", event.x, event.y)
if region == "cell":
col = self.tree.identify_column(event.x)
item = self.tree.identify_row(event.y)
if col == "#1": # 点击的是选中列
current_val = self.tree.item(item, "values")[0]
new_val = "[√]" if current_val == "[ ]" else "[ ]"
self.tree.item(item, values=(new_val, self.tree.item(item, "values")[1]))
def select_output_dir(self):
path = filedialog.askdirectory()
if path:
self.output_dir.set(path)
def select_files(self):
files = filedialog.askopenfilenames(filetypes=[("图片文件", "*.png;*.jpg;*.jpeg;*.bmp;*.gif")])
for f in files:
self.tree.insert("", tk.END, values=("[ ]", os.path.basename(f)), tags=(f,))
def remove_selected(self):
to_remove = []
for item in self.tree.get_children():
if self.tree.item(item, "values")[0] == "[√]":
to_remove.append(item)
for item in reversed(to_remove):
self.tree.delete(item)
def clear_files(self):
for item in self.tree.get_children():
self.tree.delete(item)
def start_conversion(self, convert_all):
input_files = [
self.tree.item(item, "tags")[0]
for item in self.tree.get_children()
if convert_all or self.tree.item(item, "values")[0] == "[√]"
]
if not input_files:
msg = "没有找到可转换的文件" if convert_all else "没有选中任何文件"
messagebox.showwarning("警告", msg)
return
os.makedirs(self.output_dir.get(), exist_ok=True)
# 解析转换参数
width, height = map(int, self.resolution.get().split('x'))
compress = CompressMethod.RLE if self.compress_method.get() == "RLE" else CompressMethod.NONE
# 执行转换
self.convert_images(input_files, width, height, compress)
def convert_images(self, input_files, width, height, compress):
success_count = 0
total_files = len(input_files)
for idx, file_path in enumerate(input_files):
try:
print(f"正在处理: {os.path.basename(file_path)}")
with Image.open(file_path) as img:
# 调整图片大小
img = img.resize((width, height), Image.Resampling.LANCZOS)
# 处理颜色格式
color_format_str = self.color_format.get()
if color_format_str == "自动识别":
# 检测透明通道
has_alpha = img.mode in ('RGBA', 'LA') or (img.mode == 'P' and 'transparency' in img.info)
if has_alpha:
img = img.convert('RGBA')
cf = ColorFormat.RGB565A8
else:
img = img.convert('RGB')
cf = ColorFormat.RGB565
else:
if color_format_str == "RGB565A8":
img = img.convert('RGBA')
cf = ColorFormat.RGB565A8
else:
img = img.convert('RGB')
cf = ColorFormat.RGB565
# 保存调整后的图片
base_name = os.path.splitext(os.path.basename(file_path))[0]
output_image_path = os.path.join(self.output_dir.get(), f"{base_name}_{width}x{height}.png")
img.save(output_image_path, 'PNG')
# 创建临时文件
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile:
temp_path = tmpfile.name
img.save(temp_path, 'PNG')
# 转换为LVGL C数组
lvgl_img = LVGLImage().from_png(temp_path, cf=cf)
output_c_path = os.path.join(self.output_dir.get(), f"{base_name}.c")
lvgl_img.to_c_array(output_c_path, compress=compress)
success_count += 1
os.unlink(temp_path)
print(f"成功转换: {base_name}.c\n")
except Exception as e:
print(f"转换失败: {str(e)}\n")
print(f"转换完成! 成功 {success_count}/{total_files} 个文件\n")
if __name__ == "__main__":
root = tk.Tk()
app = ImageConverterApp(root)
root.mainloop()

View File

@@ -0,0 +1,3 @@
lz4==4.4.4
Pillow==11.3.0
pypng==0.20220715.0

View File

@@ -0,0 +1,280 @@
"""
实时AFSK解调器 - 基于Goertzel算法
"""
import numpy as np
from collections import deque
class TraceGoertzel:
"""实时Goertzel算法实现"""
def __init__(self, freq: float, n: int):
"""
初始化Goertzel算法
Args:
freq: 归一化频率 (目标频率/采样频率)
n: 窗口大小
"""
self.freq = freq
self.n = n
# 预计算系数 - 与参考代码一致
self.k = int(freq * n)
self.w = 2.0 * np.pi * freq
self.cw = np.cos(self.w)
self.sw = np.sin(self.w)
self.c = 2.0 * self.cw
# 初始化状态变量 - 使用deque存储最近两个值
self.zs = deque([0.0, 0.0], maxlen=2)
def reset(self):
"""重置算法状态"""
self.zs.clear()
self.zs.extend([0.0, 0.0])
def __call__(self, xs):
"""
处理一组采样点 - 与参考代码一致的接口
Args:
xs: 采样点序列
Returns:
计算出的振幅
"""
self.reset()
for x in xs:
z1, z2 = self.zs[-1], self.zs[-2] # Z[-1], Z[-2]
z0 = x + self.c * z1 - z2 # S[n] = x[n] + C * S[n-1] - S[n-2]
self.zs.append(float(z0)) # 更新序列
return self.amp
@property
def amp(self) -> float:
"""计算当前振幅 - 与参考代码一致"""
z1, z2 = self.zs[-1], self.zs[-2]
ip = self.cw * z1 - z2
qp = self.sw * z1
return np.sqrt(ip**2 + qp**2) / (self.n / 2.0)
class PairGoertzel:
"""双频Goertzel解调器"""
def __init__(self, f_sample: int, f_space: int, f_mark: int,
bit_rate: int, win_size: int):
"""
初始化双频解调器
Args:
f_sample: 采样频率
f_space: Space频率 (通常对应0)
f_mark: Mark频率 (通常对应1)
bit_rate: 比特率
win_size: Goertzel窗口大小
"""
assert f_sample % bit_rate == 0, "采样频率必须是比特率的整数倍"
self.Fs = f_sample
self.F0 = f_space
self.F1 = f_mark
self.bit_rate = bit_rate
self.n_per_bit = int(f_sample // bit_rate) # 每个比特的采样点数
# 计算归一化频率
f1 = f_mark / f_sample
f0 = f_space / f_sample
# 初始化Goertzel算法
self.g0 = TraceGoertzel(freq=f0, n=win_size)
self.g1 = TraceGoertzel(freq=f1, n=win_size)
# 输入缓冲区
self.in_buffer = deque(maxlen=win_size)
self.out_count = 0
print(f"PairGoertzel initialized: f0={f0:.6f}, f1={f1:.6f}, win_size={win_size}, n_per_bit={self.n_per_bit}")
def __call__(self, s: float):
"""
处理单个采样点 - 与参考代码一致的接口
Args:
s: 采样点值
Returns:
(amp0, amp1, p1_prob) - 空间频率振幅,标记频率振幅,标记概率
"""
self.in_buffer.append(s)
self.out_count += 1
amp0, amp1, p1_prob = 0, 0, None
# 每个比特周期输出一次结果
if self.out_count >= self.n_per_bit:
amp0 = self.g0(self.in_buffer) # 计算space频率振幅
amp1 = self.g1(self.in_buffer) # 计算mark频率振幅
p1_prob = amp1 / (amp0 + amp1 + 1e-8) # 计算mark概率
self.out_count = 0
return amp0, amp1, p1_prob
class RealTimeAFSKDecoder:
"""实时AFSK解码器 - 基于起始帧触发"""
def __init__(self, f_sample: int = 16000, mark_freq: int = 1800,
space_freq: int = 1500, bitrate: int = 100,
s_goertzel: int = 9, threshold: float = 0.5):
"""
初始化实时AFSK解码器
Args:
f_sample: 采样频率
mark_freq: Mark频率
space_freq: Space频率
bitrate: 比特率
s_goertzel: Goertzel窗口大小系数 (win_size = f_sample // mark_freq * s_goertzel)
threshold: 判决门限
"""
self.f_sample = f_sample
self.mark_freq = mark_freq
self.space_freq = space_freq
self.bitrate = bitrate
self.threshold = threshold
# 计算窗口大小 - 与参考代码一致
win_size = int(f_sample / mark_freq * s_goertzel)
# 初始化解调器
self.demodulator = PairGoertzel(f_sample, space_freq, mark_freq,
bitrate, win_size)
# 帧定义 - 与参考代码一致
self.start_bytes = b'\x01\x02'
self.end_bytes = b'\x03\x04'
self.start_bits = "".join(format(int(x), '08b') for x in self.start_bytes)
self.end_bits = "".join(format(int(x), '08b') for x in self.end_bytes)
# 状态机
self.state = "idle" # idle / entering
# 存储解调结果
self.buffer_prelude:deque = deque(maxlen=len(self.start_bits)) # 判断是否启动
self.indicators = [] # 存储概率序列
self.signal_bits = "" # 存储比特序列
self.text_cache = ""
# 解码结果
self.decoded_messages = []
self.total_bits_received = 0
print(f"Decoder initialized: win_size={win_size}")
print(f"Start frame: {self.start_bits} (from {self.start_bytes.hex()})")
print(f"End frame: {self.end_bits} (from {self.end_bytes.hex()})")
def process_audio(self, samples: np.array) -> str:
"""
处理音频数据并返回解码文本
Args:
audio_data: 音频字节数据 (16-bit PCM)
Returns:
新解码的文本
"""
new_text = ""
# 逐个处理采样点
for sample in samples:
amp0, amp1, p1_prob = self.demodulator(sample)
# 如果有概率输出,记录并判决
if p1_prob is not None:
bit = '1' if p1_prob > self.threshold else '0'
match self.state:
case "idle":
self.buffer_prelude.append(bit)
pass
case "entering":
self.buffer_prelude.append(bit)
self.signal_bits += bit
self.total_bits_received += 1
case _:
pass
self.indicators.append(p1_prob)
# 检查状态机
if self.state == "idle" and "".join(self.buffer_prelude) == self.start_bits:
self.state = "entering"
self.text_cache = ""
self.signal_bits = "" # 清空比特序列
self.buffer_prelude.clear()
elif self.state == "entering" and ("".join(self.buffer_prelude) == self.end_bits or len(self.signal_bits) >= 256):
self.state = "idle"
self.buffer_prelude.clear()
# 每收集一定数量的比特后尝试解码
if len(self.signal_bits) >= 8:
text = self._decode_bits_to_text(self.signal_bits)
if len(text) > len(self.text_cache):
new_text = text[len(self.text_cache) - len(text):]
self.text_cache = text
return new_text
def _decode_bits_to_text(self, bits: str) -> str:
"""
将比特串解码为文本
Args:
bits: 比特串
Returns:
解码出的文本
"""
if len(bits) < 8:
return ""
decoded_text = ""
byte_count = len(bits) // 8
for i in range(byte_count):
# 提取8位
byte_bits = bits[i*8:(i+1)*8]
# 位转字节
byte_val = int(byte_bits, 2)
# 尝试解码为ASCII字符
if 32 <= byte_val <= 126: # 可打印ASCII字符
decoded_text += chr(byte_val)
elif byte_val == 0: # NULL字符忽略
continue
else:
# 非可打印字符pass以十六进制显示
pass
# decoded_text += f"\\x{byte_val:02X}"
return decoded_text
def clear(self):
"""清空解码状态"""
self.indicators = []
self.signal_bits = ""
self.decoded_messages = []
self.total_bits_received = 0
print("解码器状态已清空")
def get_stats(self) -> dict:
"""获取解码统计信息"""
return {
'prelude_bits': "".join(self.buffer_prelude),
"state": self.state,
'total_chars': sum(len(msg) for msg in self.text_cache),
'buffer_bits': len(self.signal_bits),
'mark_freq': self.mark_freq,
'space_freq': self.space_freq,
'bitrate': self.bitrate,
'threshold': self.threshold,
}

View File

@@ -0,0 +1,444 @@
import sys
import numpy as np
import asyncio
import wave
from collections import deque
import qasync
import matplotlib
matplotlib.use('qtagg')
from matplotlib.backends.backend_qtagg import FigureCanvasQTAgg as FigureCanvas
from matplotlib.backends.backend_qtagg import NavigationToolbar2QT as NavigationToolbar # noqa: F401
from matplotlib.figure import Figure
from PyQt6.QtWidgets import (QApplication, QMainWindow, QVBoxLayout, QWidget,
QHBoxLayout, QLineEdit, QPushButton, QLabel, QTextEdit)
from PyQt6.QtCore import QTimer
# 导入解码器
from demod import RealTimeAFSKDecoder
class UDPServerProtocol(asyncio.DatagramProtocol):
"""UDP服务器协议类"""
def __init__(self, data_queue):
self.client_address = None
self.data_queue: deque = data_queue
def connection_made(self, transport):
self.transport = transport
def datagram_received(self, data, addr):
# 如果还没有客户端地址,记录第一个连接的客户端
if self.client_address is None:
self.client_address = addr
print(f"接受来自 {addr} 的连接")
# 只处理来自已记录客户端的数据
if addr == self.client_address:
# 将接收到的音频数据添加到队列
self.data_queue.extend(data)
else:
print(f"忽略来自未知地址 {addr} 的数据")
class MatplotlibWidget(QWidget):
def __init__(self, parent=None):
super().__init__(parent)
# 创建 Matplotlib 的 Figure 对象
self.figure = Figure()
# 创建 FigureCanvas 对象,它是 Figure 的 QWidget 容器
self.canvas = FigureCanvas(self.figure)
# 创建 Matplotlib 的导航工具栏
# self.toolbar = NavigationToolbar(self.canvas, self)
self.toolbar = None
# 创建布局
layout = QVBoxLayout()
layout.addWidget(self.toolbar)
layout.addWidget(self.canvas)
self.setLayout(layout)
# 初始化音频数据参数
self.freq = 16000 # 采样频率
self.time_window = 20 # 显示时间窗口
self.wave_data = deque(maxlen=self.freq * self.time_window * 2) # 缓冲队列, 用于分发计算/绘图
self.signals = deque(maxlen=self.freq * self.time_window) # 双端队列存储信号数据
# 创建包含两个子图的画布
self.ax1 = self.figure.add_subplot(2, 1, 1)
self.ax2 = self.figure.add_subplot(2, 1, 2)
# 时域子图
self.ax1.set_title('Real-time Audio Waveform')
self.ax1.set_xlabel('Sample Index')
self.ax1.set_ylabel('Amplitude')
self.line_time, = self.ax1.plot([], [])
self.ax1.grid(True, alpha=0.3)
# 频域子图
self.ax2.set_title('Real-time Frequency Spectrum')
self.ax2.set_xlabel('Frequency (Hz)')
self.ax2.set_ylabel('Magnitude')
self.line_freq, = self.ax2.plot([], [])
self.ax2.grid(True, alpha=0.3)
self.figure.tight_layout()
# 定时器用于更新图表
self.timer = QTimer(self)
self.timer.setInterval(100) # 100毫秒更新一次
self.timer.timeout.connect(self.update_plot)
# 初始化AFSK解码器
self.decoder = RealTimeAFSKDecoder(
f_sample=self.freq,
mark_freq=1800,
space_freq=1500,
bitrate=100,
s_goertzel=9,
threshold=0.5
)
# 解码结果回调
self.decode_callback = None
def start_plotting(self):
"""开始绘图"""
self.timer.start()
def stop_plotting(self):
"""停止绘图"""
self.timer.stop()
def update_plot(self):
"""更新绘图数据"""
if len(self.wave_data) >= 2:
# 进行实时解码
# 获取最新的音频数据进行解码
even = len(self.wave_data) // 2 * 2
print(f"length of wave_data: {len(self.wave_data)}")
drained = [self.wave_data.popleft() for _ in range(even)]
signal = np.frombuffer(bytearray(drained), dtype='<i2') / 32768
decoded_text_new = self.decoder.process_audio(signal) # 处理新增信号, 返回全量解码文本
if decoded_text_new and self.decode_callback:
self.decode_callback(decoded_text_new)
self.signals.extend(signal.tolist()) # 将波形数据添加到绘图数据
if len(self.signals) > 0:
# 只显示最近的一段数据,避免图表过于密集
signal = np.array(self.signals)
max_samples = min(len(signal), self.freq * self.time_window)
if len(signal) > max_samples:
signal = signal[-max_samples:]
# 更新时域图
x = np.arange(len(signal))
self.line_time.set_data(x, signal)
# 自动调整时域坐标轴范围
if len(signal) > 0:
self.ax1.set_xlim(0, len(signal))
y_min, y_max = np.min(signal), np.max(signal)
if y_min != y_max:
margin = (y_max - y_min) * 0.1
self.ax1.set_ylim(y_min - margin, y_max + margin)
else:
self.ax1.set_ylim(-1, 1)
# 计算频谱(短时离散傅立叶变换)
if len(signal) > 1:
# 计算FFT
fft_signal = np.abs(np.fft.fft(signal))
frequencies = np.fft.fftfreq(len(signal), 1/self.freq)
# 只取正频率部分
positive_freq_idx = frequencies >= 0
freq_positive = frequencies[positive_freq_idx]
fft_positive = fft_signal[positive_freq_idx]
# 更新频域图
self.line_freq.set_data(freq_positive, fft_positive)
# 自动调整频域坐标轴范围
if len(fft_positive) > 0:
# 限制频率显示范围到0-4000Hz避免过于密集
max_freq_show = min(4000, self.freq // 2)
freq_mask = freq_positive <= max_freq_show
if np.any(freq_mask):
self.ax2.set_xlim(0, max_freq_show)
fft_masked = fft_positive[freq_mask]
if len(fft_masked) > 0:
fft_max = np.max(fft_masked)
if fft_max > 0:
self.ax2.set_ylim(0, fft_max * 1.1)
else:
self.ax2.set_ylim(0, 1)
self.canvas.draw()
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle("Acoustic Check")
self.setGeometry(100, 100, 1000, 800)
# 主窗口部件
main_widget = QWidget()
self.setCentralWidget(main_widget)
# 主布局
main_layout = QVBoxLayout(main_widget)
# 绘图区域
self.matplotlib_widget = MatplotlibWidget()
main_layout.addWidget(self.matplotlib_widget)
# 控制面板
control_panel = QWidget()
control_layout = QHBoxLayout(control_panel)
# 监听地址和端口输入
control_layout.addWidget(QLabel("监听地址:"))
self.address_input = QLineEdit("0.0.0.0")
self.address_input.setFixedWidth(120)
control_layout.addWidget(self.address_input)
control_layout.addWidget(QLabel("端口:"))
self.port_input = QLineEdit("8000")
self.port_input.setFixedWidth(80)
control_layout.addWidget(self.port_input)
# 监听按钮
self.listen_button = QPushButton("开始监听")
self.listen_button.clicked.connect(self.toggle_listening)
control_layout.addWidget(self.listen_button)
# 状态标签
self.status_label = QLabel("状态: 未连接")
control_layout.addWidget(self.status_label)
# 数据统计标签
self.data_label = QLabel("接收数据: 0 bytes")
control_layout.addWidget(self.data_label)
# 保存按钮
self.save_button = QPushButton("保存音频")
self.save_button.clicked.connect(self.save_audio)
self.save_button.setEnabled(False)
control_layout.addWidget(self.save_button)
control_layout.addStretch() # 添加弹性空间
main_layout.addWidget(control_panel)
# 解码显示区域
decode_panel = QWidget()
decode_layout = QVBoxLayout(decode_panel)
# 解码标题
decode_title = QLabel("实时AFSK解码结果:")
decode_title.setStyleSheet("font-weight: bold; font-size: 14px;")
decode_layout.addWidget(decode_title)
# 解码文本显示
self.decode_text = QTextEdit()
self.decode_text.setMaximumHeight(150)
self.decode_text.setReadOnly(True)
self.decode_text.setStyleSheet("font-family: 'Courier New', monospace; font-size: 12px;")
decode_layout.addWidget(self.decode_text)
# 解码控制按钮
decode_control_layout = QHBoxLayout()
# 清空按钮
self.clear_decode_button = QPushButton("清空解码")
self.clear_decode_button.clicked.connect(self.clear_decode_text)
decode_control_layout.addWidget(self.clear_decode_button)
# 解码统计标签
self.decode_stats_label = QLabel("解码统计: 0 bits, 0 chars")
decode_control_layout.addWidget(self.decode_stats_label)
decode_control_layout.addStretch()
decode_layout.addLayout(decode_control_layout)
main_layout.addWidget(decode_panel)
# 设置解码回调
self.matplotlib_widget.decode_callback = self.on_decode_text
# UDP相关属性
self.udp_transport = None
self.is_listening = False
# 数据统计定时器
self.stats_timer = QTimer(self)
self.stats_timer.setInterval(1000) # 每秒更新一次统计
self.stats_timer.timeout.connect(self.update_stats)
def on_decode_text(self, new_text: str):
"""解码文本回调"""
if new_text:
# 添加新解码的文本
current_text = self.decode_text.toPlainText()
updated_text = current_text + new_text
# 限制文本长度保留最新的1000个字符
if len(updated_text) > 1000:
updated_text = updated_text[-1000:]
self.decode_text.setPlainText(updated_text)
# 滚动到底部
cursor = self.decode_text.textCursor()
cursor.movePosition(cursor.MoveOperation.End)
self.decode_text.setTextCursor(cursor)
def clear_decode_text(self):
"""清空解码文本"""
self.decode_text.clear()
if hasattr(self.matplotlib_widget, 'decoder'):
self.matplotlib_widget.decoder.clear()
self.decode_stats_label.setText("解码统计: 0 bits, 0 chars")
def update_decode_stats(self):
"""更新解码统计"""
if hasattr(self.matplotlib_widget, 'decoder'):
stats = self.matplotlib_widget.decoder.get_stats()
stats_text = (
f"前置: {stats['prelude_bits']} , 已接收{stats['total_chars']} chars, "
f"缓冲: {stats['buffer_bits']} bits, 状态: {stats['state']}"
)
self.decode_stats_label.setText(stats_text)
def toggle_listening(self):
"""切换监听状态"""
if not self.is_listening:
self.start_listening()
else:
self.stop_listening()
async def start_listening_async(self):
"""异步启动UDP监听"""
try:
address = self.address_input.text().strip()
port = int(self.port_input.text().strip())
loop = asyncio.get_running_loop()
self.udp_transport, protocol = await loop.create_datagram_endpoint(
lambda: UDPServerProtocol(self.matplotlib_widget.wave_data),
local_addr=(address, port)
)
self.status_label.setText(f"状态: 监听中 ({address}:{port})")
print(f"UDP服务器启动, 监听 {address}:{port}")
except Exception as e:
self.status_label.setText(f"状态: 启动失败 - {str(e)}")
print(f"UDP服务器启动失败: {e}")
self.is_listening = False
self.listen_button.setText("开始监听")
self.address_input.setEnabled(True)
self.port_input.setEnabled(True)
def start_listening(self):
"""开始监听"""
try:
int(self.port_input.text().strip()) # 验证端口号格式
except ValueError:
self.status_label.setText("状态: 端口号必须是数字")
return
self.is_listening = True
self.listen_button.setText("停止监听")
self.address_input.setEnabled(False)
self.port_input.setEnabled(False)
self.save_button.setEnabled(True)
# 清空数据队列
self.matplotlib_widget.wave_data.clear()
# 启动绘图和统计更新
self.matplotlib_widget.start_plotting()
self.stats_timer.start()
# 异步启动UDP服务器
loop = asyncio.get_event_loop()
loop.create_task(self.start_listening_async())
def stop_listening(self):
"""停止监听"""
self.is_listening = False
self.listen_button.setText("开始监听")
self.address_input.setEnabled(True)
self.port_input.setEnabled(True)
# 停止UDP服务器
if self.udp_transport:
self.udp_transport.close()
self.udp_transport = None
# 停止绘图和统计更新
self.matplotlib_widget.stop_plotting()
self.matplotlib_widget.wave_data.clear()
self.stats_timer.stop()
self.status_label.setText("状态: 已停止")
def update_stats(self):
"""更新数据统计"""
data_size = len(self.matplotlib_widget.signals)
self.data_label.setText(f"接收数据: {data_size} 采样")
# 更新解码统计
self.update_decode_stats()
def save_audio(self):
"""保存音频数据"""
if len(self.matplotlib_widget.signals) > 0:
try:
signal_data = np.array(self.matplotlib_widget.signals)
# 保存为WAV文件
with wave.open("received_audio.wav", "wb") as wf:
wf.setnchannels(1) # 单声道
wf.setsampwidth(2) # 采样宽度为2字节
wf.setframerate(self.matplotlib_widget.freq) # 设置采样率
wf.writeframes(signal_data.tobytes()) # 写入数据
self.status_label.setText("状态: 音频已保存为 received_audio.wav")
print("音频已保存为 received_audio.wav")
except Exception as e:
self.status_label.setText(f"状态: 保存失败 - {str(e)}")
print(f"保存音频失败: {e}")
else:
self.status_label.setText("状态: 没有足够的数据可保存")
async def main():
"""异步主函数"""
app = QApplication(sys.argv)
# 设置异步事件循环
loop = qasync.QEventLoop(app)
asyncio.set_event_loop(loop)
window = MainWindow()
window.show()
try:
with loop:
await loop.run_forever()
except KeyboardInterrupt:
print("程序被用户中断")
finally:
# 确保清理资源
if window.udp_transport:
window.udp_transport.close()

View File

@@ -0,0 +1,18 @@
#!/usr/bin/env python3
"""
音频实时监听与绘图系统主程序
基于Qt GUI + Matplotlib + UDP接收 + AFSK解码字符串
"""
import sys
import asyncio
from graphic import main
if __name__ == '__main__':
try:
asyncio.run(main())
except KeyboardInterrupt:
print("程序被用户中断")
except Exception as e:
print(f"程序执行出错: {e}")
sys.exit(1)

View File

@@ -0,0 +1,23 @@
# 声波测试
该gui用于测试接受小智设备通过`udp`回传的`pcm`转时域/频域, 可以保存窗口长度的声音, 用于判断噪音频率分布和测试声波传输ascii的准确度,
固件测试需要打开`USE_AUDIO_DEBUGGER`, 并设置好`AUDIO_DEBUG_UDP_SERVER`是本机地址.
声波`demod`可以通过`sonic_wifi_config.html`或者上传至`PinMe`的[小智声波配网](https://iqf7jnhi.pinit.eth.limo)来输出声波测试
# 声波解码测试记录
> `✓`代表在I2S DIN接收原始PCM信号时就能成功解码, `△`代表需要降噪或额外操作可稳定解码, `X`代表降噪后效果也不好(可能能解部分但非常不稳定)。
> 个别ADC需要I2C配置阶段做更精细的降噪调整, 由于设备不通用暂只按照boards内提供的config测试
| 设备 | ADC | MIC | 效果 | 备注 |
| ---- | ---- | --- | --- | ---- |
| bread-compact | INMP441 | 集成MEMEMIC | ✓ |
| atk-dnesp32s3-box | ES8311 | | ✓ |
| magiclick-2p5 | ES8311 | | ✓ |
| lichuang-dev | ES7210 | | △ | 测试时需要关掉INPUT_REFERENCE
| kevin-box-2 | ES7210 | | △ | 测试时需要关掉INPUT_REFERENCE
| m5stack-core-s3 | ES7210 | | △ | 测试时需要关掉INPUT_REFERENCE
| xmini-c3 | ES8311 | | △ | 需降噪
| atoms3r-echo-base | ES8311 | | △ | 需降噪
| atk-dnesp32s3-box0 | ES8311 | | X | 能接收且解码, 但是丢包率很高
| movecall-moji-esp32s3 | ES8311 | | X | 能接收且解码, 但是丢包率很高

View File

@@ -0,0 +1,4 @@
matplotlib==3.10.5
numpy==2.3.2
PyQt6==6.9.1
qasync==0.27.1

View File

@@ -0,0 +1,54 @@
import socket
import wave
import argparse
'''
Create a UDP socket and bind it to the server's IP:8000.
Listen for incoming messages and print them to the console.
Save the audio to a WAV file.
'''
def main(samplerate, channels):
# Create a UDP socket
server_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
server_socket.bind(('0.0.0.0', 8000))
# Create WAV file with parameters
filename = f"{samplerate}_{channels}.wav"
wav_file = wave.open(filename, "wb")
wav_file.setnchannels(channels) # channels parameter
wav_file.setsampwidth(2) # 2 bytes per sample (16-bit)
wav_file.setframerate(samplerate) # samplerate parameter
print(f"Start saving audio from 0.0.0.0:8000 to {filename}...")
try:
while True:
# Receive a message from the client
message, address = server_socket.recvfrom(8000)
# Write PCM data to WAV file
wav_file.writeframes(message)
# Print length of the message
print(f"Received {len(message)} bytes from {address}")
except KeyboardInterrupt:
print("\nStopping recording...")
finally:
# Close files and socket
wav_file.close()
server_socket.close()
print(f"WAV file '{filename}' saved successfully")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='UDP音频数据接收器保存为WAV文件')
parser.add_argument('--samplerate', '-s', type=int, default=16000,
help='采样率 (默认: 16000)')
parser.add_argument('--channels', '-c', type=int, default=2,
help='声道数 (默认: 2)')
args = parser.parse_args()
main(args.samplerate, args.channels)

2
scripts/flash.sh Executable file
View File

@@ -0,0 +1,2 @@
#!/bin/sh
esptool.py -p /dev/ttyACM0 -b 2000000 write_flash 0 ../releases/v0.9.9_bread-compact-wifi/merged-binary.bin

187
scripts/gen_lang.py Normal file
View File

@@ -0,0 +1,187 @@
#!/usr/bin/env python3
import argparse
import json
import os
HEADER_TEMPLATE = """// Auto-generated language config
// Language: {lang_code} with en-US fallback
#pragma once
#include <string_view>
#ifndef {lang_code_for_font}
#define {lang_code_for_font} // 預設語言
#endif
namespace Lang {{
// 语言元数据
constexpr const char* CODE = "{lang_code}";
// 字符串资源 (en-US as fallback for missing keys)
namespace Strings {{
{strings}
}}
// 音效资源 (en-US as fallback for missing audio files)
namespace Sounds {{
{sounds}
}}
}}
"""
def load_base_language(assets_dir):
"""加载 en-US 基准语言数据"""
base_lang_path = os.path.join(assets_dir, 'locales', 'en-US', 'language.json')
if os.path.exists(base_lang_path):
try:
with open(base_lang_path, 'r', encoding='utf-8') as f:
base_data = json.load(f)
print(f"Loaded base language en-US with {len(base_data.get('strings', {}))} strings")
return base_data
except json.JSONDecodeError as e:
print(f"Warning: Failed to parse en-US language file: {e}")
else:
print("Warning: en-US base language file not found, fallback mechanism disabled")
return {'strings': {}}
def get_sound_files(directory):
"""获取目录中的音效文件列表"""
if not os.path.exists(directory):
return []
return [f for f in os.listdir(directory) if f.endswith('.ogg')]
def generate_header(lang_code, output_path):
# 从输出路径推导项目结构
# output_path 通常是 main/assets/lang_config.h
main_dir = os.path.dirname(output_path) # main/assets
if os.path.basename(main_dir) == 'assets':
main_dir = os.path.dirname(main_dir) # main
project_dir = os.path.dirname(main_dir) # 项目根目录
assets_dir = os.path.join(main_dir, 'assets')
# 构建语言JSON文件路径
input_path = os.path.join(assets_dir, 'locales', lang_code, 'language.json')
print(f"Processing language: {lang_code}")
print(f"Input file path: {input_path}")
print(f"Output file path: {output_path}")
if not os.path.exists(input_path):
raise FileNotFoundError(f"Language file not found: {input_path}")
with open(input_path, 'r', encoding='utf-8') as f:
data = json.load(f)
# 验证数据结构
if 'language' not in data or 'strings' not in data:
raise ValueError("Invalid JSON structure")
# 加载 en-US 基准语言数据
base_data = load_base_language(assets_dir)
# 合并字符串:以 en-US 为基准,用户语言覆盖
base_strings = base_data.get('strings', {})
user_strings = data['strings']
merged_strings = base_strings.copy()
merged_strings.update(user_strings)
# 统计信息
base_count = len(base_strings)
user_count = len(user_strings)
total_count = len(merged_strings)
fallback_count = total_count - user_count
print(f"Language {lang_code} string statistics:")
print(f" - Base language (en-US): {base_count} strings")
print(f" - User language: {user_count} strings")
print(f" - Total: {total_count} strings")
if fallback_count > 0:
print(f" - Fallback to en-US: {fallback_count} strings")
# 生成字符串常量
strings = []
sounds = []
for key, value in merged_strings.items():
value = value.replace('"', '\\"')
strings.append(f' constexpr const char* {key.upper()} = "{value}";')
# 收集音效文件:以 en-US 为基准,用户语言覆盖
current_lang_dir = os.path.join(assets_dir, 'locales', lang_code)
base_lang_dir = os.path.join(assets_dir, 'locales', 'en-US')
common_dir = os.path.join(assets_dir, 'common')
# 获取所有可能的音效文件
base_sounds = get_sound_files(base_lang_dir)
current_sounds = get_sound_files(current_lang_dir)
common_sounds = get_sound_files(common_dir)
# 合并音效文件列表:用户语言覆盖基准语言
all_sound_files = set(base_sounds)
all_sound_files.update(current_sounds)
# 音效统计信息
base_sound_count = len(base_sounds)
user_sound_count = len(current_sounds)
common_sound_count = len(common_sounds)
sound_fallback_count = len(set(base_sounds) - set(current_sounds))
print(f"Language {lang_code} sound statistics:")
print(f" - Base language (en-US): {base_sound_count} sounds")
print(f" - User language: {user_sound_count} sounds")
print(f" - Common sounds: {common_sound_count} sounds")
if sound_fallback_count > 0:
print(f" - Sound fallback to en-US: {sound_fallback_count} sounds")
# 生成语言特定音效常量
for file in sorted(all_sound_files):
base_name = os.path.splitext(file)[0]
# 优先使用当前语言的音效,如果不存在则回退到 en-US
if file in current_sounds:
sound_lang = lang_code.replace('-', '_').lower()
else:
sound_lang = 'en_us'
sounds.append(f'''
extern const char ogg_{base_name}_start[] asm("_binary_{base_name}_ogg_start");
extern const char ogg_{base_name}_end[] asm("_binary_{base_name}_ogg_end");
static const std::string_view OGG_{base_name.upper()} {{
static_cast<const char*>(ogg_{base_name}_start),
static_cast<size_t>(ogg_{base_name}_end - ogg_{base_name}_start)
}};''')
# 生成公共音效常量
for file in sorted(common_sounds):
base_name = os.path.splitext(file)[0]
sounds.append(f'''
extern const char ogg_{base_name}_start[] asm("_binary_{base_name}_ogg_start");
extern const char ogg_{base_name}_end[] asm("_binary_{base_name}_ogg_end");
static const std::string_view OGG_{base_name.upper()} {{
static_cast<const char*>(ogg_{base_name}_start),
static_cast<size_t>(ogg_{base_name}_end - ogg_{base_name}_start)
}};''')
# 填充模板
content = HEADER_TEMPLATE.format(
lang_code=lang_code,
lang_code_for_font=lang_code.replace('-', '_').lower(),
strings="\n".join(sorted(strings)),
sounds="\n".join(sorted(sounds))
)
# 写入文件
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, 'w', encoding='utf-8') as f:
f.write(content)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Generate language configuration header file with en-US fallback")
parser.add_argument("--language", required=True, help="Language code (e.g: zh-CN, en-US, ja-JP)")
parser.add_argument("--output", required=True, help="Output header file path")
args = parser.parse_args()
try:
generate_header(args.language, args.output)
print(f"Successfully generated language config file: {args.output}")
except Exception as e:
print(f"Error: {e}")
exit(1)

3
scripts/mp3_to_ogg.sh Normal file
View File

@@ -0,0 +1,3 @@
#!/bin/sh
# mp3_to_ogg.sh <input_mp3_file> <output_ogg_file>
ffmpeg -i $1 -c:a libopus -b:a 16k -ac 1 -ar 16000 -frame_duration 60 $2

View File

@@ -0,0 +1,29 @@
# ogg_covertor 小智AI OGG 批量转换器
本脚本为OGG批量转换工具支持将输入的音频文件转换为小智可使用的OGG格式
基于Python第三方库`ffmpeg-python`实现
支持OGG和音频之间的互转响度调节等功能
# 创建并激活虚拟环境
```bash
# 创建虚拟环境
python -m venv venv
# 激活虚拟环境
source venv/bin/activate # Mac/Linux
venv\Scripts\activate # Windows
```
# 安装依赖
请在虚拟环境中执行
```bash
pip install ffmpeg-python
```
# 运行脚本
```bash
python ogg_covertor.py
```

View File

@@ -0,0 +1,230 @@
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import os
import threading
import sys
import ffmpeg
class AudioConverterApp:
def __init__(self, master):
self.master = master
master.title("小智AI OGG音频批量转换工具")
master.geometry("680x600") # 调整窗口高度
# 初始化变量
self.mode = tk.StringVar(value="audio_to_ogg")
self.output_dir = tk.StringVar()
self.output_dir.set(os.path.abspath("output"))
self.enable_loudnorm = tk.BooleanVar(value=True)
self.target_lufs = tk.DoubleVar(value=-16.0)
# 创建UI组件
self.create_widgets()
self.redirect_output()
def create_widgets(self):
# 模式选择
mode_frame = ttk.LabelFrame(self.master, text="转换模式")
mode_frame.grid(row=0, column=0, padx=10, pady=5, sticky="ew")
ttk.Radiobutton(mode_frame, text="音频转到OGG", variable=self.mode,
value="audio_to_ogg", command=self.toggle_settings,
width=12).grid(row=0, column=0, padx=5)
ttk.Radiobutton(mode_frame, text="OGG转回音频", variable=self.mode,
value="ogg_to_audio", command=self.toggle_settings,
width=12).grid(row=0, column=1, padx=5)
# 响度设置
self.loudnorm_frame = ttk.Frame(self.master)
self.loudnorm_frame.grid(row=1, column=0, padx=10, pady=5, sticky="ew")
ttk.Checkbutton(self.loudnorm_frame, text="启用响度调整",
variable=self.enable_loudnorm, width=15
).grid(row=0, column=0, padx=2)
ttk.Entry(self.loudnorm_frame, textvariable=self.target_lufs,
width=6).grid(row=0, column=1, padx=2)
ttk.Label(self.loudnorm_frame, text="LUFS").grid(row=0, column=2, padx=2)
# 文件选择
file_frame = ttk.LabelFrame(self.master, text="输入文件")
file_frame.grid(row=2, column=0, padx=10, pady=5, sticky="nsew")
# 文件操作按钮
ttk.Button(file_frame, text="选择文件", command=self.select_files,
width=12).grid(row=0, column=0, padx=5, pady=2)
ttk.Button(file_frame, text="移除选中", command=self.remove_selected,
width=12).grid(row=0, column=1, padx=5, pady=2)
ttk.Button(file_frame, text="清空列表", command=self.clear_files,
width=12).grid(row=0, column=2, padx=5, pady=2)
# 文件列表使用Treeview
self.tree = ttk.Treeview(file_frame, columns=("selected", "filename"),
show="headings", height=8)
self.tree.heading("selected", text="选中", anchor=tk.W)
self.tree.heading("filename", text="文件名", anchor=tk.W)
self.tree.column("selected", width=60, anchor=tk.W)
self.tree.column("filename", width=600, anchor=tk.W)
self.tree.grid(row=1, column=0, columnspan=3, sticky="nsew", padx=5, pady=2)
self.tree.bind("<ButtonRelease-1>", self.on_tree_click)
# 输出目录
output_frame = ttk.LabelFrame(self.master, text="输出目录")
output_frame.grid(row=3, column=0, padx=10, pady=5, sticky="ew")
ttk.Entry(output_frame, textvariable=self.output_dir, width=60
).grid(row=0, column=0, padx=5, sticky="ew")
ttk.Button(output_frame, text="浏览", command=self.select_output_dir,
width=8).grid(row=0, column=1, padx=5)
# 转换按钮区域
button_frame = ttk.Frame(self.master)
button_frame.grid(row=4, column=0, padx=10, pady=10, sticky="ew")
ttk.Button(button_frame, text="转换全部文件", command=lambda: self.start_conversion(True),
width=15).pack(side=tk.LEFT, padx=5)
ttk.Button(button_frame, text="转换选中文件", command=lambda: self.start_conversion(False),
width=15).pack(side=tk.LEFT, padx=5)
# 日志区域
log_frame = ttk.LabelFrame(self.master, text="日志")
log_frame.grid(row=5, column=0, padx=10, pady=5, sticky="nsew")
self.log_text = tk.Text(log_frame, height=14, width=80)
self.log_text.pack(fill=tk.BOTH, expand=True)
# 配置布局权重
self.master.columnconfigure(0, weight=1)
self.master.rowconfigure(2, weight=1)
self.master.rowconfigure(5, weight=3)
file_frame.columnconfigure(0, weight=1)
file_frame.rowconfigure(1, weight=1)
def toggle_settings(self):
if self.mode.get() == "audio_to_ogg":
self.loudnorm_frame.grid()
else:
self.loudnorm_frame.grid_remove()
def select_files(self):
file_types = [
("音频文件", "*.wav *.mogg *.ogg *.flac") if self.mode.get() == "audio_to_ogg"
else ("ogg文件", "*.ogg")
]
files = filedialog.askopenfilenames(filetypes=file_types)
for f in files:
self.tree.insert("", tk.END, values=("[ ]", os.path.basename(f)), tags=(f,))
def on_tree_click(self, event):
"""处理复选框点击事件"""
region = self.tree.identify("region", event.x, event.y)
if region == "cell":
col = self.tree.identify_column(event.x)
item = self.tree.identify_row(event.y)
if col == "#1": # 点击的是选中列
current_val = self.tree.item(item, "values")[0]
new_val = "[√]" if current_val == "[ ]" else "[ ]"
self.tree.item(item, values=(new_val, self.tree.item(item, "values")[1]))
def remove_selected(self):
"""移除选中的文件"""
to_remove = []
for item in self.tree.get_children():
if self.tree.item(item, "values")[0] == "[√]":
to_remove.append(item)
for item in reversed(to_remove):
self.tree.delete(item)
def clear_files(self):
"""清空所有文件"""
for item in self.tree.get_children():
self.tree.delete(item)
def select_output_dir(self):
path = filedialog.askdirectory()
if path:
self.output_dir.set(path)
def redirect_output(self):
class StdoutRedirector:
def __init__(self, text_widget):
self.text_widget = text_widget
self.original_stdout = sys.stdout
def write(self, message):
self.text_widget.insert(tk.END, message)
self.text_widget.see(tk.END)
self.original_stdout.write(message)
def flush(self):
self.original_stdout.flush()
sys.stdout = StdoutRedirector(self.log_text)
def start_conversion(self, convert_all):
"""开始转换"""
input_files = []
for item in self.tree.get_children():
if convert_all or self.tree.item(item, "values")[0] == "[√]":
input_files.append(self.tree.item(item, "tags")[0])
if not input_files:
msg = "没有找到可转换的文件" if convert_all else "没有选中任何文件"
messagebox.showwarning("警告", msg)
return
os.makedirs(self.output_dir.get(), exist_ok=True)
try:
if self.mode.get() == "audio_to_ogg":
target_lufs = self.target_lufs.get() if self.enable_loudnorm.get() else None
thread = threading.Thread(target=self.convert_audio_to_ogg, args=(target_lufs, input_files))
else:
thread = threading.Thread(target=self.convert_ogg_to_audio, args=(input_files,))
thread.start()
except Exception as e:
print(f"转换初始化失败: {str(e)}")
def convert_audio_to_ogg(self, target_lufs, input_files):
"""音频转到ogg转换逻辑"""
for input_path in input_files:
try:
filename = os.path.basename(input_path)
base_name = os.path.splitext(filename)[0]
output_path = os.path.join(self.output_dir.get(), f"{base_name}.ogg")
print(f"正在转换: {filename}")
(
ffmpeg
.input(input_path)
.output(output_path, acodec='libopus', audio_bitrate='16k', ac=1, ar=16000, frame_duration=60)
.run(overwrite_output=True)
)
print(f"转换成功: {filename}\n")
except Exception as e:
print(f"转换失败: {str(e)}\n")
def convert_ogg_to_audio(self, input_files):
"""ogg转回音频转换逻辑"""
for input_path in input_files:
try:
filename = os.path.basename(input_path)
base_name = os.path.splitext(filename)[0]
output_path = os.path.join(self.output_dir.get(), f"{base_name}.ogg")
print(f"正在转换: {filename}")
(
ffmpeg
.input(input_path)
.output(output_path, acodec='libopus', audio_bitrate='16k', ac=1, ar=16000, frame_duration=60)
.run(overwrite_output=True)
)
print(f"转换成功: {filename}\n")
except Exception as e:
print(f"转换失败: {str(e)}\n")
if __name__ == "__main__":
root = tk.Tk()
app = AudioConverterApp(root)
root.mainloop()

View File

@@ -0,0 +1,95 @@
# P3音频格式转换与播放工具
这个目录包含两个用于处理P3格式音频文件的Python脚本
## 1. 音频转换工具 (convert_audio_to_p3.py)
将普通音频文件转换为P3格式4字节header + Opus数据包的流式结构并进行响度标准化。
### 使用方法
```bash
python convert_audio_to_p3.py <输入音频文件> <输出P3文件> [-l LUFS] [-d]
```
其中,可选选项 `-l` 用于指定响度标准化的目标响度,默认为 -16 LUFS可选选项 `-d` 可以禁用响度标准化。
如果输入的音频文件符合下面的任一条件,建议使用 `-d` 禁用响度标准化:
- 音频过短
- 音频已经调整过响度
- 音频来自默认 TTS (小智当前使用的 TTS 的默认响度已是 -16 LUFS
例如:
```bash
python convert_audio_to_p3.py input.mp3 output.p3
```
## 2. P3音频播放工具 (play_p3.py)
播放P3格式的音频文件。
### 特性
- 解码并播放P3格式的音频文件
- 在播放结束或用户中断时应用淡出效果,避免破音
- 支持通过命令行参数指定要播放的文件
### 使用方法
```bash
python play_p3.py <P3文件路径>
```
例如:
```bash
python play_p3.py output.p3
```
## 3. 音频转回工具 (convert_p3_to_audio.py)
将P3格式转换回普通音频文件。
### 使用方法
```bash
python convert_p3_to_audio.py <输入P3文件> <输出音频文件>
```
输出音频文件需要有扩展名。
例如:
```bash
python convert_p3_to_audio.py input.p3 output.wav
```
## 4. 音频/P3批量转换工具
一个图形化的工具支持批量转换音频到P3P3到音频
![](./img/img.png)
### 使用方法:
```bash
python batch_convert_gui.py
```
## 依赖安装
在使用这些脚本前请确保安装了所需的Python库
```bash
pip install librosa opuslib numpy tqdm sounddevice pyloudnorm soundfile
```
或者使用提供的requirements.txt文件
```bash
pip install -r requirements.txt
```
## P3格式说明
P3格式是一种简单的流式音频格式结构如下
- 每个音频帧由一个4字节的头部和一个Opus编码的数据包组成
- 头部格式:[1字节类型, 1字节保留, 2字节长度]
- 采样率固定为16000Hz单声道
- 每帧时长为60ms

View File

@@ -0,0 +1,221 @@
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import os
import threading
import sys
from convert_audio_to_p3 import encode_audio_to_opus
from convert_p3_to_audio import decode_p3_to_audio
class AudioConverterApp:
def __init__(self, master):
self.master = master
master.title("音频/P3 批量转换工具")
master.geometry("680x600") # 调整窗口高度
# 初始化变量
self.mode = tk.StringVar(value="audio_to_p3")
self.output_dir = tk.StringVar()
self.output_dir.set(os.path.abspath("output"))
self.enable_loudnorm = tk.BooleanVar(value=True)
self.target_lufs = tk.DoubleVar(value=-16.0)
# 创建UI组件
self.create_widgets()
self.redirect_output()
def create_widgets(self):
# 模式选择
mode_frame = ttk.LabelFrame(self.master, text="转换模式")
mode_frame.grid(row=0, column=0, padx=10, pady=5, sticky="ew")
ttk.Radiobutton(mode_frame, text="音频转P3", variable=self.mode,
value="audio_to_p3", command=self.toggle_settings,
width=12).grid(row=0, column=0, padx=5)
ttk.Radiobutton(mode_frame, text="P3转音频", variable=self.mode,
value="p3_to_audio", command=self.toggle_settings,
width=12).grid(row=0, column=1, padx=5)
# 响度设置
self.loudnorm_frame = ttk.Frame(self.master)
self.loudnorm_frame.grid(row=1, column=0, padx=10, pady=5, sticky="ew")
ttk.Checkbutton(self.loudnorm_frame, text="启用响度调整",
variable=self.enable_loudnorm, width=15
).grid(row=0, column=0, padx=2)
ttk.Entry(self.loudnorm_frame, textvariable=self.target_lufs,
width=6).grid(row=0, column=1, padx=2)
ttk.Label(self.loudnorm_frame, text="LUFS").grid(row=0, column=2, padx=2)
# 文件选择
file_frame = ttk.LabelFrame(self.master, text="输入文件")
file_frame.grid(row=2, column=0, padx=10, pady=5, sticky="nsew")
# 文件操作按钮
ttk.Button(file_frame, text="选择文件", command=self.select_files,
width=12).grid(row=0, column=0, padx=5, pady=2)
ttk.Button(file_frame, text="移除选中", command=self.remove_selected,
width=12).grid(row=0, column=1, padx=5, pady=2)
ttk.Button(file_frame, text="清空列表", command=self.clear_files,
width=12).grid(row=0, column=2, padx=5, pady=2)
# 文件列表使用Treeview
self.tree = ttk.Treeview(file_frame, columns=("selected", "filename"),
show="headings", height=8)
self.tree.heading("selected", text="选中", anchor=tk.W)
self.tree.heading("filename", text="文件名", anchor=tk.W)
self.tree.column("selected", width=60, anchor=tk.W)
self.tree.column("filename", width=600, anchor=tk.W)
self.tree.grid(row=1, column=0, columnspan=3, sticky="nsew", padx=5, pady=2)
self.tree.bind("<ButtonRelease-1>", self.on_tree_click)
# 输出目录
output_frame = ttk.LabelFrame(self.master, text="输出目录")
output_frame.grid(row=3, column=0, padx=10, pady=5, sticky="ew")
ttk.Entry(output_frame, textvariable=self.output_dir, width=60
).grid(row=0, column=0, padx=5, sticky="ew")
ttk.Button(output_frame, text="浏览", command=self.select_output_dir,
width=8).grid(row=0, column=1, padx=5)
# 转换按钮区域
button_frame = ttk.Frame(self.master)
button_frame.grid(row=4, column=0, padx=10, pady=10, sticky="ew")
ttk.Button(button_frame, text="转换全部文件", command=lambda: self.start_conversion(True),
width=15).pack(side=tk.LEFT, padx=5)
ttk.Button(button_frame, text="转换选中文件", command=lambda: self.start_conversion(False),
width=15).pack(side=tk.LEFT, padx=5)
# 日志区域
log_frame = ttk.LabelFrame(self.master, text="日志")
log_frame.grid(row=5, column=0, padx=10, pady=5, sticky="nsew")
self.log_text = tk.Text(log_frame, height=14, width=80)
self.log_text.pack(fill=tk.BOTH, expand=True)
# 配置布局权重
self.master.columnconfigure(0, weight=1)
self.master.rowconfigure(2, weight=1)
self.master.rowconfigure(5, weight=3)
file_frame.columnconfigure(0, weight=1)
file_frame.rowconfigure(1, weight=1)
def toggle_settings(self):
if self.mode.get() == "audio_to_p3":
self.loudnorm_frame.grid()
else:
self.loudnorm_frame.grid_remove()
def select_files(self):
file_types = [
("音频文件", "*.wav *.mp3 *.ogg *.flac") if self.mode.get() == "audio_to_p3"
else ("P3文件", "*.p3")
]
files = filedialog.askopenfilenames(filetypes=file_types)
for f in files:
self.tree.insert("", tk.END, values=("[ ]", os.path.basename(f)), tags=(f,))
def on_tree_click(self, event):
"""处理复选框点击事件"""
region = self.tree.identify("region", event.x, event.y)
if region == "cell":
col = self.tree.identify_column(event.x)
item = self.tree.identify_row(event.y)
if col == "#1": # 点击的是选中列
current_val = self.tree.item(item, "values")[0]
new_val = "[√]" if current_val == "[ ]" else "[ ]"
self.tree.item(item, values=(new_val, self.tree.item(item, "values")[1]))
def remove_selected(self):
"""移除选中的文件"""
to_remove = []
for item in self.tree.get_children():
if self.tree.item(item, "values")[0] == "[√]":
to_remove.append(item)
for item in reversed(to_remove):
self.tree.delete(item)
def clear_files(self):
"""清空所有文件"""
for item in self.tree.get_children():
self.tree.delete(item)
def select_output_dir(self):
path = filedialog.askdirectory()
if path:
self.output_dir.set(path)
def redirect_output(self):
class StdoutRedirector:
def __init__(self, text_widget):
self.text_widget = text_widget
self.original_stdout = sys.stdout
def write(self, message):
self.text_widget.insert(tk.END, message)
self.text_widget.see(tk.END)
self.original_stdout.write(message)
def flush(self):
self.original_stdout.flush()
sys.stdout = StdoutRedirector(self.log_text)
def start_conversion(self, convert_all):
"""开始转换"""
input_files = []
for item in self.tree.get_children():
if convert_all or self.tree.item(item, "values")[0] == "[√]":
input_files.append(self.tree.item(item, "tags")[0])
if not input_files:
msg = "没有找到可转换的文件" if convert_all else "没有选中任何文件"
messagebox.showwarning("警告", msg)
return
os.makedirs(self.output_dir.get(), exist_ok=True)
try:
if self.mode.get() == "audio_to_p3":
target_lufs = self.target_lufs.get() if self.enable_loudnorm.get() else None
thread = threading.Thread(target=self.convert_audio_to_p3, args=(target_lufs, input_files))
else:
thread = threading.Thread(target=self.convert_p3_to_audio, args=(input_files,))
thread.start()
except Exception as e:
print(f"转换初始化失败: {str(e)}")
def convert_audio_to_p3(self, target_lufs, input_files):
"""音频转P3转换逻辑"""
for input_path in input_files:
try:
filename = os.path.basename(input_path)
base_name = os.path.splitext(filename)[0]
output_path = os.path.join(self.output_dir.get(), f"{base_name}.p3")
print(f"正在转换: {filename}")
encode_audio_to_opus(input_path, output_path, target_lufs)
print(f"转换成功: {filename}\n")
except Exception as e:
print(f"转换失败: {str(e)}\n")
def convert_p3_to_audio(self, input_files):
"""P3转音频转换逻辑"""
for input_path in input_files:
try:
filename = os.path.basename(input_path)
base_name = os.path.splitext(filename)[0]
output_path = os.path.join(self.output_dir.get(), f"{base_name}.wav")
print(f"正在转换: {filename}")
decode_p3_to_audio(input_path, output_path)
print(f"转换成功: {filename}\n")
except Exception as e:
print(f"转换失败: {str(e)}\n")
if __name__ == "__main__":
root = tk.Tk()
app = AudioConverterApp(root)
root.mainloop()

View File

@@ -0,0 +1,62 @@
# convert audio files to protocol v3 stream
import librosa
import opuslib
import struct
import sys
import tqdm
import numpy as np
import argparse
import pyloudnorm as pyln
def encode_audio_to_opus(input_file, output_file, target_lufs=None):
# Load audio file using librosa
audio, sample_rate = librosa.load(input_file, sr=None, mono=False, dtype=np.float32)
# Convert to mono if stereo
if audio.ndim == 2:
audio = librosa.to_mono(audio)
if target_lufs is not None:
print("Note: Automatic loudness adjustment is enabled, which may cause", file=sys.stderr)
print(" audio distortion. If the input audio has already been ", file=sys.stderr)
print(" loudness-adjusted or if the input audio is TTS audio, ", file=sys.stderr)
print(" please use the `-d` parameter to disable loudness adjustment.", file=sys.stderr)
meter = pyln.Meter(sample_rate)
current_loudness = meter.integrated_loudness(audio)
audio = pyln.normalize.loudness(audio, current_loudness, target_lufs)
print(f"Adjusted loudness: {current_loudness:.1f} LUFS -> {target_lufs} LUFS")
# Convert sample rate to 16000Hz if necessary
target_sample_rate = 16000
if sample_rate != target_sample_rate:
audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=target_sample_rate)
sample_rate = target_sample_rate
# Convert audio data back to int16 after processing
audio = (audio * 32767).astype(np.int16)
# Initialize Opus encoder
encoder = opuslib.Encoder(sample_rate, 1, opuslib.APPLICATION_AUDIO)
# Encode and save
with open(output_file, 'wb') as f:
duration = 60 # 60ms per frame
frame_size = int(sample_rate * duration / 1000)
for i in tqdm.tqdm(range(0, len(audio) - frame_size, frame_size)):
frame = audio[i:i + frame_size]
opus_data = encoder.encode(frame.tobytes(), frame_size=frame_size)
packet = struct.pack('>BBH', 0, 0, len(opus_data)) + opus_data
f.write(packet)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Convert audio to Opus with loudness normalization')
parser.add_argument('input_file', help='Input audio file')
parser.add_argument('output_file', help='Output .opus file')
parser.add_argument('-l', '--lufs', type=float, default=-16.0,
help='Target loudness in LUFS (default: -16)')
parser.add_argument('-d', '--disable-loudnorm', action='store_true',
help='Disable loudness normalization')
args = parser.parse_args()
target_lufs = None if args.disable_loudnorm else args.lufs
encode_audio_to_opus(args.input_file, args.output_file, target_lufs)

View File

@@ -0,0 +1,51 @@
import struct
import sys
import opuslib
import numpy as np
from tqdm import tqdm
import soundfile as sf
def decode_p3_to_audio(input_file, output_file):
sample_rate = 16000
channels = 1
decoder = opuslib.Decoder(sample_rate, channels)
pcm_frames = []
frame_size = int(sample_rate * 60 / 1000)
with open(input_file, "rb") as f:
f.seek(0, 2)
total_size = f.tell()
f.seek(0)
with tqdm(total=total_size, unit="B", unit_scale=True) as pbar:
while True:
header = f.read(4)
if not header or len(header) < 4:
break
pkt_type, reserved, opus_len = struct.unpack(">BBH", header)
opus_data = f.read(opus_len)
if len(opus_data) != opus_len:
break
pcm = decoder.decode(opus_data, frame_size)
pcm_frames.append(np.frombuffer(pcm, dtype=np.int16))
pbar.update(4 + opus_len)
if not pcm_frames:
raise ValueError("No valid audio data found")
pcm_data = np.concatenate(pcm_frames)
sf.write(output_file, pcm_data, sample_rate, subtype="PCM_16")
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python convert_p3_to_audio.py <input.p3> <output.wav>")
sys.exit(1)
decode_p3_to_audio(sys.argv[1], sys.argv[2])

Binary file not shown.

After

Width:  |  Height:  |  Size: 142 KiB

View File

@@ -0,0 +1,241 @@
import tkinter as tk
from tkinter import filedialog, messagebox
import threading
import time
import opuslib
import struct
import numpy as np
import sounddevice as sd
import os
def play_p3_file(input_file, stop_event=None, pause_event=None):
"""
播放p3格式的音频文件
p3格式: [1字节类型, 1字节保留, 2字节长度, Opus数据]
"""
# 初始化Opus解码器
sample_rate = 16000 # 采样率固定为16000Hz
channels = 1 # 单声道
decoder = opuslib.Decoder(sample_rate, channels)
# 帧大小 (60ms)
frame_size = int(sample_rate * 60 / 1000)
# 打开音频流
stream = sd.OutputStream(
samplerate=sample_rate,
channels=channels,
dtype='int16'
)
stream.start()
try:
with open(input_file, 'rb') as f:
print(f"正在播放: {input_file}")
while True:
if stop_event and stop_event.is_set():
break
if pause_event and pause_event.is_set():
time.sleep(0.1)
continue
# 读取头部 (4字节)
header = f.read(4)
if not header or len(header) < 4:
break
# 解析头部
packet_type, reserved, data_len = struct.unpack('>BBH', header)
# 读取Opus数据
opus_data = f.read(data_len)
if not opus_data or len(opus_data) < data_len:
break
# 解码Opus数据
pcm_data = decoder.decode(opus_data, frame_size)
# 将字节转换为numpy数组
audio_array = np.frombuffer(pcm_data, dtype=np.int16)
# 播放音频
stream.write(audio_array)
except KeyboardInterrupt:
print("\n播放已停止")
finally:
stream.stop()
stream.close()
print("播放完成")
class P3PlayerApp:
def __init__(self, root):
self.root = root
self.root.title("P3 文件简易播放器")
self.root.geometry("500x400")
self.playlist = []
self.current_index = 0
self.is_playing = False
self.is_paused = False
self.stop_event = threading.Event()
self.pause_event = threading.Event()
self.loop_playback = tk.BooleanVar(value=False) # 循环播放复选框的状态
# 创建界面组件
self.create_widgets()
def create_widgets(self):
# 播放列表
self.playlist_label = tk.Label(self.root, text="播放列表:")
self.playlist_label.pack(pady=5)
self.playlist_frame = tk.Frame(self.root)
self.playlist_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
self.playlist_listbox = tk.Listbox(self.playlist_frame, selectmode=tk.SINGLE)
self.playlist_listbox.pack(fill=tk.BOTH, expand=True)
# 复选框和移除按钮
self.checkbox_frame = tk.Frame(self.root)
self.checkbox_frame.pack(pady=5)
self.remove_button = tk.Button(self.checkbox_frame, text="移除文件", command=self.remove_files)
self.remove_button.pack(side=tk.LEFT, padx=5)
# 循环播放复选框
self.loop_checkbox = tk.Checkbutton(self.checkbox_frame, text="循环播放", variable=self.loop_playback)
self.loop_checkbox.pack(side=tk.LEFT, padx=5)
# 控制按钮
self.control_frame = tk.Frame(self.root)
self.control_frame.pack(pady=10)
self.add_button = tk.Button(self.control_frame, text="添加文件", command=self.add_file)
self.add_button.grid(row=0, column=0, padx=5)
self.play_button = tk.Button(self.control_frame, text="播放", command=self.play)
self.play_button.grid(row=0, column=1, padx=5)
self.pause_button = tk.Button(self.control_frame, text="暂停", command=self.pause)
self.pause_button.grid(row=0, column=2, padx=5)
self.stop_button = tk.Button(self.control_frame, text="停止", command=self.stop)
self.stop_button.grid(row=0, column=3, padx=5)
# 状态标签
self.status_label = tk.Label(self.root, text="未在播放", fg="blue")
self.status_label.pack(pady=10)
def add_file(self):
files = filedialog.askopenfilenames(filetypes=[("P3 文件", "*.p3")])
if files:
self.playlist.extend(files)
self.update_playlist()
def update_playlist(self):
self.playlist_listbox.delete(0, tk.END)
for file in self.playlist:
self.playlist_listbox.insert(tk.END, os.path.basename(file)) # 仅显示文件名
def update_status(self, status_text, color="blue"):
"""更新状态标签的内容"""
self.status_label.config(text=status_text, fg=color)
def play(self):
if not self.playlist:
messagebox.showwarning("警告", "播放列表为空!")
return
if self.is_paused:
self.is_paused = False
self.pause_event.clear()
self.update_status(f"正在播放:{os.path.basename(self.playlist[self.current_index])}", "green")
return
if self.is_playing:
return
self.is_playing = True
self.stop_event.clear()
self.pause_event.clear()
self.current_index = self.playlist_listbox.curselection()[0] if self.playlist_listbox.curselection() else 0
self.play_thread = threading.Thread(target=self.play_audio, daemon=True)
self.play_thread.start()
self.update_status(f"正在播放:{os.path.basename(self.playlist[self.current_index])}", "green")
def play_audio(self):
while True:
if self.stop_event.is_set():
break
if self.pause_event.is_set():
time.sleep(0.1)
continue
# 检查当前索引是否有效
if self.current_index >= len(self.playlist):
if self.loop_playback.get(): # 如果勾选了循环播放
self.current_index = 0 # 回到第一首
else:
break # 否则停止播放
file = self.playlist[self.current_index]
self.playlist_listbox.selection_clear(0, tk.END)
self.playlist_listbox.selection_set(self.current_index)
self.playlist_listbox.activate(self.current_index)
self.update_status(f"正在播放:{os.path.basename(self.playlist[self.current_index])}", "green")
play_p3_file(file, self.stop_event, self.pause_event)
if self.stop_event.is_set():
break
if not self.loop_playback.get(): # 如果没有勾选循环播放
break # 播放完当前文件后停止
self.current_index += 1
if self.current_index >= len(self.playlist):
if self.loop_playback.get(): # 如果勾选了循环播放
self.current_index = 0 # 回到第一首
self.is_playing = False
self.is_paused = False
self.update_status("播放已停止", "red")
def pause(self):
if self.is_playing:
self.is_paused = not self.is_paused
if self.is_paused:
self.pause_event.set()
self.update_status("播放已暂停", "orange")
else:
self.pause_event.clear()
self.update_status(f"正在播放:{os.path.basename(self.playlist[self.current_index])}", "green")
def stop(self):
if self.is_playing or self.is_paused:
self.is_playing = False
self.is_paused = False
self.stop_event.set()
self.pause_event.clear()
self.update_status("播放已停止", "red")
def remove_files(self):
selected_indices = self.playlist_listbox.curselection()
if not selected_indices:
messagebox.showwarning("警告", "请先选择要移除的文件!")
return
for index in reversed(selected_indices):
self.playlist.pop(index)
self.update_playlist()
if __name__ == "__main__":
root = tk.Tk()
app = P3PlayerApp(root)
root.mainloop()

View File

@@ -0,0 +1,71 @@
# 播放p3格式的音频文件
import opuslib
import struct
import numpy as np
import sounddevice as sd
import argparse
def play_p3_file(input_file):
"""
播放p3格式的音频文件
p3格式: [1字节类型, 1字节保留, 2字节长度, Opus数据]
"""
# 初始化Opus解码器
sample_rate = 16000 # 采样率固定为16000Hz
channels = 1 # 单声道
decoder = opuslib.Decoder(sample_rate, channels)
# 帧大小 (60ms)
frame_size = int(sample_rate * 60 / 1000)
# 打开音频流
stream = sd.OutputStream(
samplerate=sample_rate,
channels=channels,
dtype='int16'
)
stream.start()
try:
with open(input_file, 'rb') as f:
print(f"正在播放: {input_file}")
while True:
# 读取头部 (4字节)
header = f.read(4)
if not header or len(header) < 4:
break
# 解析头部
packet_type, reserved, data_len = struct.unpack('>BBH', header)
# 读取Opus数据
opus_data = f.read(data_len)
if not opus_data or len(opus_data) < data_len:
break
# 解码Opus数据
pcm_data = decoder.decode(opus_data, frame_size)
# 将字节转换为numpy数组
audio_array = np.frombuffer(pcm_data, dtype=np.int16)
# 播放音频
stream.write(audio_array)
except KeyboardInterrupt:
print("\n播放已停止")
finally:
stream.stop()
stream.close()
print("播放完成")
def main():
parser = argparse.ArgumentParser(description='播放p3格式的音频文件')
parser.add_argument('input_file', help='输入的p3文件路径')
args = parser.parse_args()
play_p3_file(args.input_file)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,7 @@
librosa>=0.9.2
opuslib>=3.0.1
numpy>=1.20.0
tqdm>=4.62.0
sounddevice>=0.4.4
pyloudnorm>=0.1.1
soundfile>=0.13.1

153
scripts/release.py Executable file
View File

@@ -0,0 +1,153 @@
import sys
import os
import json
import zipfile
import argparse
# 切换到项目根目录
os.chdir(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
def get_board_type():
with open("build/compile_commands.json") as f:
data = json.load(f)
for item in data:
if not item["file"].endswith("main.cc"):
continue
command = item["command"]
# extract -DBOARD_TYPE=xxx
board_type = command.split("-DBOARD_TYPE=\\\"")[1].split("\\\"")[0].strip()
return board_type
return None
def get_project_version():
with open("CMakeLists.txt") as f:
for line in f:
if line.startswith("set(PROJECT_VER"):
return line.split("\"")[1].split("\"")[0].strip()
return None
def merge_bin():
if os.system("idf.py merge-bin") != 0:
print("merge bin failed")
sys.exit(1)
def zip_bin(board_type, project_version):
if not os.path.exists("releases"):
os.makedirs("releases")
output_path = f"releases/v{project_version}_{board_type}.zip"
if os.path.exists(output_path):
os.remove(output_path)
with zipfile.ZipFile(output_path, 'w', compression=zipfile.ZIP_DEFLATED) as zipf:
zipf.write("build/merged-binary.bin", arcname="merged-binary.bin")
print(f"zip bin to {output_path} done")
def release_current():
merge_bin()
board_type = get_board_type()
print("board type:", board_type)
project_version = get_project_version()
print("project version:", project_version)
zip_bin(board_type, project_version)
def get_all_board_types():
board_configs = {}
with open("main/CMakeLists.txt", encoding='utf-8') as f:
lines = f.readlines()
for i, line in enumerate(lines):
# 查找 if(CONFIG_BOARD_TYPE_*) 行
if "if(CONFIG_BOARD_TYPE_" in line:
config_name = line.strip().split("if(")[1].split(")")[0]
# 查找下一行的 set(BOARD_TYPE "xxx")
next_line = lines[i + 1].strip()
if next_line.startswith("set(BOARD_TYPE"):
board_type = next_line.split('"')[1]
board_configs[config_name] = board_type
return board_configs
def release(board_type, board_config, config_filename="config.json"):
config_path = f"main/boards/{board_type}/{config_filename}"
if not os.path.exists(config_path):
print(f"跳过 {board_type} 因为 {config_filename} 不存在")
return
# Print Project Version
project_version = get_project_version()
print(f"Project Version: {project_version}", config_path)
with open(config_path, "r") as f:
config = json.load(f)
target = config["target"]
builds = config["builds"]
for build in builds:
name = build["name"]
if not name.startswith(board_type):
raise ValueError(f"name {name} 必须以 {board_type} 开头")
output_path = f"releases/v{project_version}_{name}.zip"
if os.path.exists(output_path):
print(f"跳过 {board_type} 因为 {output_path} 已存在")
continue
sdkconfig_append = [f"{board_config}=y"]
for append in build.get("sdkconfig_append", []):
sdkconfig_append.append(append)
print(f"name: {name}")
print(f"target: {target}")
for append in sdkconfig_append:
print(f"sdkconfig_append: {append}")
# unset IDF_TARGET
os.environ.pop("IDF_TARGET", None)
# Call set-target
if os.system(f"idf.py set-target {target}") != 0:
print("set-target failed")
sys.exit(1)
# Append sdkconfig
with open("sdkconfig", "a") as f:
f.write("\n")
for append in sdkconfig_append:
f.write(f"{append}\n")
# Build with macro BOARD_NAME defined to name
if os.system(f"idf.py -DBOARD_NAME={name} build") != 0:
print("build failed")
sys.exit(1)
# Call merge-bin
if os.system("idf.py merge-bin") != 0:
print("merge-bin failed")
sys.exit(1)
# Zip bin
zip_bin(name, project_version)
print("-" * 80)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("board", nargs="?", default=None, help="板子类型或 all")
parser.add_argument("-c", "--config", default="config.json", help="指定 config 文件名,默认 config.json")
parser.add_argument("--list-boards", action="store_true", help="列出所有支持的 board 列表")
parser.add_argument("--json", action="store_true", help="配合 --list-boardsJSON 格式输出")
args = parser.parse_args()
if args.list_boards:
board_configs = get_all_board_types()
boards = list(board_configs.values())
if args.json:
print(json.dumps(boards))
else:
for board in boards:
print(board)
sys.exit(0)
if args.board:
board_configs = get_all_board_types()
found = False
for board_config, board_type in board_configs.items():
if args.board == 'all' or board_type == args.board:
release(board_type, board_config, config_filename=args.config)
found = True
if not found:
print(f"未找到板子类型: {args.board}")
print("可用的板子类型:")
for board_type in board_configs.values():
print(f" {board_type}")
else:
release_current()

View File

@@ -0,0 +1,208 @@
<!DOCTYPE html>
<html lang="zh">
<head>
<meta charset="UTF-8" />
<title>小智声波配网</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<style>
body {
font-family: "Segoe UI", "PingFang SC", sans-serif;
background: #f0f2f5;
margin: 0;
padding: 2rem 1rem;
display: flex;
justify-content: center;
}
.card {
background: #fff;
padding: 2rem 1.5rem;
border-radius: 16px;
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.08);
max-width: 400px;
width: 100%;
}
h2 {
text-align: center;
margin-bottom: 2rem;
}
label {
font-weight: bold;
display: block;
margin: 1rem 0 0.3rem;
}
input[type="text"],
input[type="password"] {
width: 100%;
padding: 0.75rem;
font-size: 1rem;
border-radius: 8px;
border: 1px solid #ccc;
box-sizing: border-box;
}
input[type="checkbox"] {
margin-right: 0.5rem;
}
.checkbox-container {
margin-top: 1rem;
font-size: 0.95rem;
}
button {
width: 100%;
margin-top: 1rem;
padding: 0.8rem;
font-size: 1rem;
border: none;
border-radius: 8px;
background-color: #4a90e2;
color: #fff;
cursor: pointer;
transition: background-color 0.2s;
}
button:hover {
background-color: #357ab8;
}
button:active {
background-color: #2f6ea2;
}
audio {
margin-top: 1.5rem;
width: 100%;
outline: none;
}
</style>
</head>
<body>
<div class="card">
<h2>📶 小智声波配网</h2>
<label for="ssid">WiFi 名称</label>
<input id="ssid" type="text" value="" placeholder="请输入 WiFi 名称" />
<label for="pwd">WiFi 密码</label>
<input id="pwd" type="password" value="" placeholder="请输入 WiFi 密码" />
<div class="checkbox-container">
<label><input type="checkbox" id="loopCheck" checked /> 自动循环播放声波</label>
</div>
<button onclick="generate()">🎵 生成并播放声波</button>
<button onclick="stopPlay()">⏹️ 停止播放</button>
<audio id="player" controls></audio>
</div>
<script>
const MARK = 1800;
const SPACE = 1500;
const SAMPLE_RATE = 44100;
const BIT_RATE = 100;
const START_BYTES = [0x01, 0x02];
const END_BYTES = [0x03, 0x04];
let loopTimer = null;
function checksum(data) {
return data.reduce((sum, b) => (sum + b) & 0xff, 0);
}
function toBits(byte) {
const bits = [];
for (let i = 7; i >= 0; i--) bits.push((byte >> i) & 1);
return bits;
}
function afskModulate(bits) {
const samplesPerBit = SAMPLE_RATE / BIT_RATE;
const totalSamples = Math.floor(bits.length * samplesPerBit);
const buffer = new Float32Array(totalSamples);
for (let i = 0; i < bits.length; i++) {
const freq = bits[i] ? MARK : SPACE;
for (let j = 0; j < samplesPerBit; j++) {
const t = (i * samplesPerBit + j) / SAMPLE_RATE;
buffer[i * samplesPerBit + j] = Math.sin(2 * Math.PI * freq * t);
}
}
return buffer;
}
function floatTo16BitPCM(floatSamples) {
const buffer = new Uint8Array(floatSamples.length * 2);
for (let i = 0; i < floatSamples.length; i++) {
const s = Math.max(-1, Math.min(1, floatSamples[i]));
const val = s < 0 ? s * 0x8000 : s * 0x7fff;
buffer[i * 2] = val & 0xff;
buffer[i * 2 + 1] = (val >> 8) & 0xff;
}
return buffer;
}
function buildWav(pcm) {
const wavHeader = new Uint8Array(44);
const dataLen = pcm.length;
const fileLen = 36 + dataLen;
const writeStr = (offset, str) => {
for (let i = 0; i < str.length; i++) wavHeader[offset + i] = str.charCodeAt(i);
};
const write32 = (offset, value) => {
wavHeader[offset] = value & 0xff;
wavHeader[offset + 1] = (value >> 8) & 0xff;
wavHeader[offset + 2] = (value >> 16) & 0xff;
wavHeader[offset + 3] = (value >> 24) & 0xff;
};
const write16 = (offset, value) => {
wavHeader[offset] = value & 0xff;
wavHeader[offset + 1] = (value >> 8) & 0xff;
};
writeStr(0, 'RIFF');
write32(4, fileLen);
writeStr(8, 'WAVE');
writeStr(12, 'fmt ');
write32(16, 16);
write16(20, 1);
write16(22, 1);
write32(24, SAMPLE_RATE);
write32(28, SAMPLE_RATE * 2);
write16(32, 2);
write16(34, 16);
writeStr(36, 'data');
write32(40, dataLen);
return new Blob([wavHeader, pcm], { type: 'audio/wav' });
}
function generate() {
stopPlay();
const ssid = document.getElementById('ssid').value.trim();
const pwd = document.getElementById('pwd').value.trim();
const dataStr = ssid + '\n' + pwd;
const textBytes = Array.from(new TextEncoder().encode(dataStr));
const fullBytes = [...START_BYTES, ...textBytes, checksum(textBytes), ...END_BYTES];
let bits = [];
fullBytes.forEach((b) => (bits = bits.concat(toBits(b))));
const floatBuf = afskModulate(bits);
const pcmBuf = floatTo16BitPCM(floatBuf);
const wavBlob = buildWav(pcmBuf);
const audio = document.getElementById('player');
audio.src = URL.createObjectURL(wavBlob);
audio.load();
audio.play();
// 修改了这里:使用 'ended' 事件来实现循环播放
if (document.getElementById('loopCheck').checked) {
audio.onended = function() {
audio.currentTime = 0; // 从头开始
audio.play(); // 重新播放
};
}
}
function stopPlay() {
const audio = document.getElementById('player');
audio.pause();
audio.onended = null; // 清除事件监听
}
</script>
</body>
</html>

247
scripts/versions.py Normal file
View File

@@ -0,0 +1,247 @@
#! /usr/bin/env python3
from dotenv import load_dotenv
load_dotenv()
import os
import struct
import zipfile
import oss2
import json
import requests
from requests.exceptions import RequestException
# 切换到项目根目录
os.chdir(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
def get_chip_id_string(chip_id):
return {
0x0000: "esp32",
0x0002: "esp32s2",
0x0005: "esp32c3",
0x0009: "esp32s3",
0x000C: "esp32c2",
0x000D: "esp32c6",
0x0010: "esp32h2",
0x0011: "esp32c5",
0x0012: "esp32p4",
0x0017: "esp32c5",
}[chip_id]
def get_flash_size(flash_size):
MB = 1024 * 1024
return {
0x00: 1 * MB,
0x01: 2 * MB,
0x02: 4 * MB,
0x03: 8 * MB,
0x04: 16 * MB,
0x05: 32 * MB,
0x06: 64 * MB,
0x07: 128 * MB,
}[flash_size]
def get_app_desc(data):
magic = struct.unpack("<I", data[0x00:0x04])[0]
if magic != 0xabcd5432:
raise Exception("Invalid app desc magic")
version = data[0x10:0x30].decode("utf-8").strip('\0')
project_name = data[0x30:0x50].decode("utf-8").strip('\0')
time = data[0x50:0x60].decode("utf-8").strip('\0')
date = data[0x60:0x70].decode("utf-8").strip('\0')
idf_ver = data[0x70:0x90].decode("utf-8").strip('\0')
elf_sha256 = data[0x90:0xb0].hex()
return {
"name": project_name,
"version": version,
"compile_time": date + "T" + time,
"idf_version": idf_ver,
"elf_sha256": elf_sha256,
}
def get_board_name(folder):
basename = os.path.basename(folder)
if basename.startswith("v0.2"):
return "bread-simple"
if basename.startswith("v0.3") or basename.startswith("v0.4") or basename.startswith("v0.5") or basename.startswith("v0.6"):
if "ML307" in basename:
return "bread-compact-ml307"
elif "WiFi" in basename:
return "bread-compact-wifi"
elif "KevinBox1" in basename:
return "kevin-box-1"
if basename.startswith("v0.7") or basename.startswith("v0.8") or basename.startswith("v0.9") or basename.startswith("v1.") or basename.startswith("v2."):
return basename.split("_")[1]
raise Exception(f"Unknown board name: {basename}")
def find_app_partition(data):
partition_begin = 0x8000
partition_end = partition_begin + 0x4000
# find the first parition with type 0x00
for i in range(partition_begin, partition_end, 0x20):
# magic is aa 50
if data[i] == 0xaa and data[i + 1] == 0x50:
# type is app
if data[i + 2] == 0x00:
# read offset and size
offset = struct.unpack("<I", data[i + 4:i + 8])[0]
size = struct.unpack("<I", data[i + 8:i + 12])[0]
# then 16 bytes is label
label = data[i + 12:i + 28].decode("utf-8").strip('\0')
print(f"found app partition at 0x{i:08x}, offset: 0x{offset:08x}, size: 0x{size:08x}, label: {label}")
return {
"offset": offset,
"size": size,
"label": label,
}
return None
def read_binary(dir_path):
merged_bin_path = os.path.join(dir_path, "merged-binary.bin")
merged_bin_data = open(merged_bin_path, "rb").read()
# find app partition
app_partition = find_app_partition(merged_bin_data)
if app_partition is None:
print("no app partition found")
return
app_data = merged_bin_data[app_partition["offset"]:app_partition["offset"] + app_partition["size"]]
# check magic
if app_data[0] != 0xE9:
print("not a valid image")
return
# get flash size
flash_size = get_flash_size(app_data[0x3] >> 4)
chip_id = get_chip_id_string(app_data[0xC])
# get segments
segment_count = app_data[0x1]
segments = []
offset = 0x18
image_size = 0x18
for i in range(segment_count):
segment_size = struct.unpack("<I", app_data[offset + 4:offset + 8])[0]
image_size += 8 + segment_size
offset += 8
segment_data = app_data[offset:offset + segment_size]
offset += segment_size
segments.append(segment_data)
assert offset < len(app_data), "offset is out of bounds"
# add checksum size
image_size += 1
image_size = (image_size + 15) & ~15
# hash appended
if app_data[0x17] == 1:
image_size += 32
print(f"image size: {image_size}")
# verify the remaining data are all 0xFF
for i in range(image_size, len(app_data)):
if app_data[i] != 0xFF:
print(f"Failed to verify image, data at 0x{i:08x} is not 0xFF")
return
image_data = app_data[:image_size]
# extract bin file
bin_path = os.path.join(dir_path, "xiaozhi.bin")
if not os.path.exists(bin_path):
print("extract bin file to", bin_path)
open(bin_path, "wb").write(image_data)
# The app desc is in the first segment
desc = get_app_desc(segments[0])
return {
"chip_id": chip_id,
"flash_size": flash_size,
"board": get_board_name(dir_path),
"application": desc,
"firmware_size": image_size,
}
def extract_zip(zip_path, extract_path):
if not os.path.exists(extract_path):
os.makedirs(extract_path)
print(f"Extracting {zip_path} to {extract_path}")
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(extract_path)
def upload_dir_to_oss(source_dir, target_dir):
auth = oss2.Auth(os.environ['OSS_ACCESS_KEY_ID'], os.environ['OSS_ACCESS_KEY_SECRET'])
bucket = oss2.Bucket(auth, os.environ['OSS_ENDPOINT'], os.environ['OSS_BUCKET_NAME'])
for filename in os.listdir(source_dir):
oss_key = os.path.join(target_dir, filename)
print('uploading', oss_key)
bucket.put_object(oss_key, open(os.path.join(source_dir, filename), 'rb'))
def post_info_to_server(info):
"""
将固件信息发送到服务器
Args:
info: 包含固件信息的字典
"""
try:
# 从环境变量获取服务器URL和token
server_url = os.environ.get('VERSIONS_SERVER_URL')
server_token = os.environ.get('VERSIONS_TOKEN')
if not server_url or not server_token:
raise Exception("Missing SERVER_URL or TOKEN in environment variables")
# 准备请求头和数据
headers = {
'Authorization': f'Bearer {server_token}',
'Content-Type': 'application/json'
}
# 发送POST请求
response = requests.post(
server_url,
headers=headers,
json={'jsonData': json.dumps(info)}
)
# 检查响应状态
response.raise_for_status()
print(f"Successfully uploaded version info for tag: {info['tag']}")
except RequestException as e:
if hasattr(e.response, 'json'):
error_msg = e.response.json().get('error', str(e))
else:
error_msg = str(e)
print(f"Failed to upload version info: {error_msg}")
raise
except Exception as e:
print(f"Error uploading version info: {str(e)}")
raise
def main():
release_dir = "releases"
# look for zip files startswith "v"
for name in os.listdir(release_dir):
if name.startswith("v") and name.endswith(".zip"):
tag = name[:-4]
folder = os.path.join(release_dir, tag)
info_path = os.path.join(folder, "info.json")
if not os.path.exists(info_path):
if not os.path.exists(folder):
os.makedirs(folder)
extract_zip(os.path.join(release_dir, name), folder)
info = read_binary(folder)
target_dir = os.path.join("firmwares", tag)
info["tag"] = tag
info["url"] = os.path.join(os.environ['OSS_BUCKET_URL'], target_dir, "xiaozhi.bin")
open(info_path, "w").write(json.dumps(info, indent=4))
# upload all file to oss
upload_dir_to_oss(folder, target_dir)
# read info.json
info = json.load(open(info_path))
# post info.json to server
post_info_to_server(info)
if __name__ == "__main__":
main()