add some code

This commit is contained in:
2025-09-05 13:25:11 +08:00
parent 9ff0a99e7a
commit 3cf1229a85
8911 changed files with 2535396 additions and 0 deletions

View File

@@ -0,0 +1,95 @@
# P3音频格式转换与播放工具
这个目录包含两个用于处理P3格式音频文件的Python脚本
## 1. 音频转换工具 (convert_audio_to_p3.py)
将普通音频文件转换为P3格式4字节header + Opus数据包的流式结构并进行响度标准化。
### 使用方法
```bash
python convert_audio_to_p3.py <输入音频文件> <输出P3文件> [-l LUFS] [-d]
```
其中,可选选项 `-l` 用于指定响度标准化的目标响度,默认为 -16 LUFS可选选项 `-d` 可以禁用响度标准化。
如果输入的音频文件符合下面的任一条件,建议使用 `-d` 禁用响度标准化:
- 音频过短
- 音频已经调整过响度
- 音频来自默认 TTS (小智当前使用的 TTS 的默认响度已是 -16 LUFS
例如:
```bash
python convert_audio_to_p3.py input.mp3 output.p3
```
## 2. P3音频播放工具 (play_p3.py)
播放P3格式的音频文件。
### 特性
- 解码并播放P3格式的音频文件
- 在播放结束或用户中断时应用淡出效果,避免破音
- 支持通过命令行参数指定要播放的文件
### 使用方法
```bash
python play_p3.py <P3文件路径>
```
例如:
```bash
python play_p3.py output.p3
```
## 3. 音频转回工具 (convert_p3_to_audio.py)
将P3格式转换回普通音频文件。
### 使用方法
```bash
python convert_p3_to_audio.py <输入P3文件> <输出音频文件>
```
输出音频文件需要有扩展名。
例如:
```bash
python convert_p3_to_audio.py input.p3 output.wav
```
## 4. 音频/P3批量转换工具
一个图形化的工具支持批量转换音频到P3P3到音频
![](./img/img.png)
### 使用方法:
```bash
python batch_convert_gui.py
```
## 依赖安装
在使用这些脚本前请确保安装了所需的Python库
```bash
pip install librosa opuslib numpy tqdm sounddevice pyloudnorm soundfile
```
或者使用提供的requirements.txt文件
```bash
pip install -r requirements.txt
```
## P3格式说明
P3格式是一种简单的流式音频格式结构如下
- 每个音频帧由一个4字节的头部和一个Opus编码的数据包组成
- 头部格式:[1字节类型, 1字节保留, 2字节长度]
- 采样率固定为16000Hz单声道
- 每帧时长为60ms

View File

@@ -0,0 +1,221 @@
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import os
import threading
import sys
from convert_audio_to_p3 import encode_audio_to_opus
from convert_p3_to_audio import decode_p3_to_audio
class AudioConverterApp:
def __init__(self, master):
self.master = master
master.title("音频/P3 批量转换工具")
master.geometry("680x600") # 调整窗口高度
# 初始化变量
self.mode = tk.StringVar(value="audio_to_p3")
self.output_dir = tk.StringVar()
self.output_dir.set(os.path.abspath("output"))
self.enable_loudnorm = tk.BooleanVar(value=True)
self.target_lufs = tk.DoubleVar(value=-16.0)
# 创建UI组件
self.create_widgets()
self.redirect_output()
def create_widgets(self):
# 模式选择
mode_frame = ttk.LabelFrame(self.master, text="转换模式")
mode_frame.grid(row=0, column=0, padx=10, pady=5, sticky="ew")
ttk.Radiobutton(mode_frame, text="音频转P3", variable=self.mode,
value="audio_to_p3", command=self.toggle_settings,
width=12).grid(row=0, column=0, padx=5)
ttk.Radiobutton(mode_frame, text="P3转音频", variable=self.mode,
value="p3_to_audio", command=self.toggle_settings,
width=12).grid(row=0, column=1, padx=5)
# 响度设置
self.loudnorm_frame = ttk.Frame(self.master)
self.loudnorm_frame.grid(row=1, column=0, padx=10, pady=5, sticky="ew")
ttk.Checkbutton(self.loudnorm_frame, text="启用响度调整",
variable=self.enable_loudnorm, width=15
).grid(row=0, column=0, padx=2)
ttk.Entry(self.loudnorm_frame, textvariable=self.target_lufs,
width=6).grid(row=0, column=1, padx=2)
ttk.Label(self.loudnorm_frame, text="LUFS").grid(row=0, column=2, padx=2)
# 文件选择
file_frame = ttk.LabelFrame(self.master, text="输入文件")
file_frame.grid(row=2, column=0, padx=10, pady=5, sticky="nsew")
# 文件操作按钮
ttk.Button(file_frame, text="选择文件", command=self.select_files,
width=12).grid(row=0, column=0, padx=5, pady=2)
ttk.Button(file_frame, text="移除选中", command=self.remove_selected,
width=12).grid(row=0, column=1, padx=5, pady=2)
ttk.Button(file_frame, text="清空列表", command=self.clear_files,
width=12).grid(row=0, column=2, padx=5, pady=2)
# 文件列表使用Treeview
self.tree = ttk.Treeview(file_frame, columns=("selected", "filename"),
show="headings", height=8)
self.tree.heading("selected", text="选中", anchor=tk.W)
self.tree.heading("filename", text="文件名", anchor=tk.W)
self.tree.column("selected", width=60, anchor=tk.W)
self.tree.column("filename", width=600, anchor=tk.W)
self.tree.grid(row=1, column=0, columnspan=3, sticky="nsew", padx=5, pady=2)
self.tree.bind("<ButtonRelease-1>", self.on_tree_click)
# 输出目录
output_frame = ttk.LabelFrame(self.master, text="输出目录")
output_frame.grid(row=3, column=0, padx=10, pady=5, sticky="ew")
ttk.Entry(output_frame, textvariable=self.output_dir, width=60
).grid(row=0, column=0, padx=5, sticky="ew")
ttk.Button(output_frame, text="浏览", command=self.select_output_dir,
width=8).grid(row=0, column=1, padx=5)
# 转换按钮区域
button_frame = ttk.Frame(self.master)
button_frame.grid(row=4, column=0, padx=10, pady=10, sticky="ew")
ttk.Button(button_frame, text="转换全部文件", command=lambda: self.start_conversion(True),
width=15).pack(side=tk.LEFT, padx=5)
ttk.Button(button_frame, text="转换选中文件", command=lambda: self.start_conversion(False),
width=15).pack(side=tk.LEFT, padx=5)
# 日志区域
log_frame = ttk.LabelFrame(self.master, text="日志")
log_frame.grid(row=5, column=0, padx=10, pady=5, sticky="nsew")
self.log_text = tk.Text(log_frame, height=14, width=80)
self.log_text.pack(fill=tk.BOTH, expand=True)
# 配置布局权重
self.master.columnconfigure(0, weight=1)
self.master.rowconfigure(2, weight=1)
self.master.rowconfigure(5, weight=3)
file_frame.columnconfigure(0, weight=1)
file_frame.rowconfigure(1, weight=1)
def toggle_settings(self):
if self.mode.get() == "audio_to_p3":
self.loudnorm_frame.grid()
else:
self.loudnorm_frame.grid_remove()
def select_files(self):
file_types = [
("音频文件", "*.wav *.mp3 *.ogg *.flac") if self.mode.get() == "audio_to_p3"
else ("P3文件", "*.p3")
]
files = filedialog.askopenfilenames(filetypes=file_types)
for f in files:
self.tree.insert("", tk.END, values=("[ ]", os.path.basename(f)), tags=(f,))
def on_tree_click(self, event):
"""处理复选框点击事件"""
region = self.tree.identify("region", event.x, event.y)
if region == "cell":
col = self.tree.identify_column(event.x)
item = self.tree.identify_row(event.y)
if col == "#1": # 点击的是选中列
current_val = self.tree.item(item, "values")[0]
new_val = "[√]" if current_val == "[ ]" else "[ ]"
self.tree.item(item, values=(new_val, self.tree.item(item, "values")[1]))
def remove_selected(self):
"""移除选中的文件"""
to_remove = []
for item in self.tree.get_children():
if self.tree.item(item, "values")[0] == "[√]":
to_remove.append(item)
for item in reversed(to_remove):
self.tree.delete(item)
def clear_files(self):
"""清空所有文件"""
for item in self.tree.get_children():
self.tree.delete(item)
def select_output_dir(self):
path = filedialog.askdirectory()
if path:
self.output_dir.set(path)
def redirect_output(self):
class StdoutRedirector:
def __init__(self, text_widget):
self.text_widget = text_widget
self.original_stdout = sys.stdout
def write(self, message):
self.text_widget.insert(tk.END, message)
self.text_widget.see(tk.END)
self.original_stdout.write(message)
def flush(self):
self.original_stdout.flush()
sys.stdout = StdoutRedirector(self.log_text)
def start_conversion(self, convert_all):
"""开始转换"""
input_files = []
for item in self.tree.get_children():
if convert_all or self.tree.item(item, "values")[0] == "[√]":
input_files.append(self.tree.item(item, "tags")[0])
if not input_files:
msg = "没有找到可转换的文件" if convert_all else "没有选中任何文件"
messagebox.showwarning("警告", msg)
return
os.makedirs(self.output_dir.get(), exist_ok=True)
try:
if self.mode.get() == "audio_to_p3":
target_lufs = self.target_lufs.get() if self.enable_loudnorm.get() else None
thread = threading.Thread(target=self.convert_audio_to_p3, args=(target_lufs, input_files))
else:
thread = threading.Thread(target=self.convert_p3_to_audio, args=(input_files,))
thread.start()
except Exception as e:
print(f"转换初始化失败: {str(e)}")
def convert_audio_to_p3(self, target_lufs, input_files):
"""音频转P3转换逻辑"""
for input_path in input_files:
try:
filename = os.path.basename(input_path)
base_name = os.path.splitext(filename)[0]
output_path = os.path.join(self.output_dir.get(), f"{base_name}.p3")
print(f"正在转换: {filename}")
encode_audio_to_opus(input_path, output_path, target_lufs)
print(f"转换成功: {filename}\n")
except Exception as e:
print(f"转换失败: {str(e)}\n")
def convert_p3_to_audio(self, input_files):
"""P3转音频转换逻辑"""
for input_path in input_files:
try:
filename = os.path.basename(input_path)
base_name = os.path.splitext(filename)[0]
output_path = os.path.join(self.output_dir.get(), f"{base_name}.wav")
print(f"正在转换: {filename}")
decode_p3_to_audio(input_path, output_path)
print(f"转换成功: {filename}\n")
except Exception as e:
print(f"转换失败: {str(e)}\n")
if __name__ == "__main__":
root = tk.Tk()
app = AudioConverterApp(root)
root.mainloop()

View File

@@ -0,0 +1,62 @@
# convert audio files to protocol v3 stream
import librosa
import opuslib
import struct
import sys
import tqdm
import numpy as np
import argparse
import pyloudnorm as pyln
def encode_audio_to_opus(input_file, output_file, target_lufs=None):
# Load audio file using librosa
audio, sample_rate = librosa.load(input_file, sr=None, mono=False, dtype=np.float32)
# Convert to mono if stereo
if audio.ndim == 2:
audio = librosa.to_mono(audio)
if target_lufs is not None:
print("Note: Automatic loudness adjustment is enabled, which may cause", file=sys.stderr)
print(" audio distortion. If the input audio has already been ", file=sys.stderr)
print(" loudness-adjusted or if the input audio is TTS audio, ", file=sys.stderr)
print(" please use the `-d` parameter to disable loudness adjustment.", file=sys.stderr)
meter = pyln.Meter(sample_rate)
current_loudness = meter.integrated_loudness(audio)
audio = pyln.normalize.loudness(audio, current_loudness, target_lufs)
print(f"Adjusted loudness: {current_loudness:.1f} LUFS -> {target_lufs} LUFS")
# Convert sample rate to 16000Hz if necessary
target_sample_rate = 16000
if sample_rate != target_sample_rate:
audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=target_sample_rate)
sample_rate = target_sample_rate
# Convert audio data back to int16 after processing
audio = (audio * 32767).astype(np.int16)
# Initialize Opus encoder
encoder = opuslib.Encoder(sample_rate, 1, opuslib.APPLICATION_AUDIO)
# Encode and save
with open(output_file, 'wb') as f:
duration = 60 # 60ms per frame
frame_size = int(sample_rate * duration / 1000)
for i in tqdm.tqdm(range(0, len(audio) - frame_size, frame_size)):
frame = audio[i:i + frame_size]
opus_data = encoder.encode(frame.tobytes(), frame_size=frame_size)
packet = struct.pack('>BBH', 0, 0, len(opus_data)) + opus_data
f.write(packet)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Convert audio to Opus with loudness normalization')
parser.add_argument('input_file', help='Input audio file')
parser.add_argument('output_file', help='Output .opus file')
parser.add_argument('-l', '--lufs', type=float, default=-16.0,
help='Target loudness in LUFS (default: -16)')
parser.add_argument('-d', '--disable-loudnorm', action='store_true',
help='Disable loudness normalization')
args = parser.parse_args()
target_lufs = None if args.disable_loudnorm else args.lufs
encode_audio_to_opus(args.input_file, args.output_file, target_lufs)

View File

@@ -0,0 +1,51 @@
import struct
import sys
import opuslib
import numpy as np
from tqdm import tqdm
import soundfile as sf
def decode_p3_to_audio(input_file, output_file):
sample_rate = 16000
channels = 1
decoder = opuslib.Decoder(sample_rate, channels)
pcm_frames = []
frame_size = int(sample_rate * 60 / 1000)
with open(input_file, "rb") as f:
f.seek(0, 2)
total_size = f.tell()
f.seek(0)
with tqdm(total=total_size, unit="B", unit_scale=True) as pbar:
while True:
header = f.read(4)
if not header or len(header) < 4:
break
pkt_type, reserved, opus_len = struct.unpack(">BBH", header)
opus_data = f.read(opus_len)
if len(opus_data) != opus_len:
break
pcm = decoder.decode(opus_data, frame_size)
pcm_frames.append(np.frombuffer(pcm, dtype=np.int16))
pbar.update(4 + opus_len)
if not pcm_frames:
raise ValueError("No valid audio data found")
pcm_data = np.concatenate(pcm_frames)
sf.write(output_file, pcm_data, sample_rate, subtype="PCM_16")
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python convert_p3_to_audio.py <input.p3> <output.wav>")
sys.exit(1)
decode_p3_to_audio(sys.argv[1], sys.argv[2])

Binary file not shown.

After

Width:  |  Height:  |  Size: 142 KiB

View File

@@ -0,0 +1,241 @@
import tkinter as tk
from tkinter import filedialog, messagebox
import threading
import time
import opuslib
import struct
import numpy as np
import sounddevice as sd
import os
def play_p3_file(input_file, stop_event=None, pause_event=None):
"""
播放p3格式的音频文件
p3格式: [1字节类型, 1字节保留, 2字节长度, Opus数据]
"""
# 初始化Opus解码器
sample_rate = 16000 # 采样率固定为16000Hz
channels = 1 # 单声道
decoder = opuslib.Decoder(sample_rate, channels)
# 帧大小 (60ms)
frame_size = int(sample_rate * 60 / 1000)
# 打开音频流
stream = sd.OutputStream(
samplerate=sample_rate,
channels=channels,
dtype='int16'
)
stream.start()
try:
with open(input_file, 'rb') as f:
print(f"正在播放: {input_file}")
while True:
if stop_event and stop_event.is_set():
break
if pause_event and pause_event.is_set():
time.sleep(0.1)
continue
# 读取头部 (4字节)
header = f.read(4)
if not header or len(header) < 4:
break
# 解析头部
packet_type, reserved, data_len = struct.unpack('>BBH', header)
# 读取Opus数据
opus_data = f.read(data_len)
if not opus_data or len(opus_data) < data_len:
break
# 解码Opus数据
pcm_data = decoder.decode(opus_data, frame_size)
# 将字节转换为numpy数组
audio_array = np.frombuffer(pcm_data, dtype=np.int16)
# 播放音频
stream.write(audio_array)
except KeyboardInterrupt:
print("\n播放已停止")
finally:
stream.stop()
stream.close()
print("播放完成")
class P3PlayerApp:
def __init__(self, root):
self.root = root
self.root.title("P3 文件简易播放器")
self.root.geometry("500x400")
self.playlist = []
self.current_index = 0
self.is_playing = False
self.is_paused = False
self.stop_event = threading.Event()
self.pause_event = threading.Event()
self.loop_playback = tk.BooleanVar(value=False) # 循环播放复选框的状态
# 创建界面组件
self.create_widgets()
def create_widgets(self):
# 播放列表
self.playlist_label = tk.Label(self.root, text="播放列表:")
self.playlist_label.pack(pady=5)
self.playlist_frame = tk.Frame(self.root)
self.playlist_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
self.playlist_listbox = tk.Listbox(self.playlist_frame, selectmode=tk.SINGLE)
self.playlist_listbox.pack(fill=tk.BOTH, expand=True)
# 复选框和移除按钮
self.checkbox_frame = tk.Frame(self.root)
self.checkbox_frame.pack(pady=5)
self.remove_button = tk.Button(self.checkbox_frame, text="移除文件", command=self.remove_files)
self.remove_button.pack(side=tk.LEFT, padx=5)
# 循环播放复选框
self.loop_checkbox = tk.Checkbutton(self.checkbox_frame, text="循环播放", variable=self.loop_playback)
self.loop_checkbox.pack(side=tk.LEFT, padx=5)
# 控制按钮
self.control_frame = tk.Frame(self.root)
self.control_frame.pack(pady=10)
self.add_button = tk.Button(self.control_frame, text="添加文件", command=self.add_file)
self.add_button.grid(row=0, column=0, padx=5)
self.play_button = tk.Button(self.control_frame, text="播放", command=self.play)
self.play_button.grid(row=0, column=1, padx=5)
self.pause_button = tk.Button(self.control_frame, text="暂停", command=self.pause)
self.pause_button.grid(row=0, column=2, padx=5)
self.stop_button = tk.Button(self.control_frame, text="停止", command=self.stop)
self.stop_button.grid(row=0, column=3, padx=5)
# 状态标签
self.status_label = tk.Label(self.root, text="未在播放", fg="blue")
self.status_label.pack(pady=10)
def add_file(self):
files = filedialog.askopenfilenames(filetypes=[("P3 文件", "*.p3")])
if files:
self.playlist.extend(files)
self.update_playlist()
def update_playlist(self):
self.playlist_listbox.delete(0, tk.END)
for file in self.playlist:
self.playlist_listbox.insert(tk.END, os.path.basename(file)) # 仅显示文件名
def update_status(self, status_text, color="blue"):
"""更新状态标签的内容"""
self.status_label.config(text=status_text, fg=color)
def play(self):
if not self.playlist:
messagebox.showwarning("警告", "播放列表为空!")
return
if self.is_paused:
self.is_paused = False
self.pause_event.clear()
self.update_status(f"正在播放:{os.path.basename(self.playlist[self.current_index])}", "green")
return
if self.is_playing:
return
self.is_playing = True
self.stop_event.clear()
self.pause_event.clear()
self.current_index = self.playlist_listbox.curselection()[0] if self.playlist_listbox.curselection() else 0
self.play_thread = threading.Thread(target=self.play_audio, daemon=True)
self.play_thread.start()
self.update_status(f"正在播放:{os.path.basename(self.playlist[self.current_index])}", "green")
def play_audio(self):
while True:
if self.stop_event.is_set():
break
if self.pause_event.is_set():
time.sleep(0.1)
continue
# 检查当前索引是否有效
if self.current_index >= len(self.playlist):
if self.loop_playback.get(): # 如果勾选了循环播放
self.current_index = 0 # 回到第一首
else:
break # 否则停止播放
file = self.playlist[self.current_index]
self.playlist_listbox.selection_clear(0, tk.END)
self.playlist_listbox.selection_set(self.current_index)
self.playlist_listbox.activate(self.current_index)
self.update_status(f"正在播放:{os.path.basename(self.playlist[self.current_index])}", "green")
play_p3_file(file, self.stop_event, self.pause_event)
if self.stop_event.is_set():
break
if not self.loop_playback.get(): # 如果没有勾选循环播放
break # 播放完当前文件后停止
self.current_index += 1
if self.current_index >= len(self.playlist):
if self.loop_playback.get(): # 如果勾选了循环播放
self.current_index = 0 # 回到第一首
self.is_playing = False
self.is_paused = False
self.update_status("播放已停止", "red")
def pause(self):
if self.is_playing:
self.is_paused = not self.is_paused
if self.is_paused:
self.pause_event.set()
self.update_status("播放已暂停", "orange")
else:
self.pause_event.clear()
self.update_status(f"正在播放:{os.path.basename(self.playlist[self.current_index])}", "green")
def stop(self):
if self.is_playing or self.is_paused:
self.is_playing = False
self.is_paused = False
self.stop_event.set()
self.pause_event.clear()
self.update_status("播放已停止", "red")
def remove_files(self):
selected_indices = self.playlist_listbox.curselection()
if not selected_indices:
messagebox.showwarning("警告", "请先选择要移除的文件!")
return
for index in reversed(selected_indices):
self.playlist.pop(index)
self.update_playlist()
if __name__ == "__main__":
root = tk.Tk()
app = P3PlayerApp(root)
root.mainloop()

View File

@@ -0,0 +1,71 @@
# 播放p3格式的音频文件
import opuslib
import struct
import numpy as np
import sounddevice as sd
import argparse
def play_p3_file(input_file):
"""
播放p3格式的音频文件
p3格式: [1字节类型, 1字节保留, 2字节长度, Opus数据]
"""
# 初始化Opus解码器
sample_rate = 16000 # 采样率固定为16000Hz
channels = 1 # 单声道
decoder = opuslib.Decoder(sample_rate, channels)
# 帧大小 (60ms)
frame_size = int(sample_rate * 60 / 1000)
# 打开音频流
stream = sd.OutputStream(
samplerate=sample_rate,
channels=channels,
dtype='int16'
)
stream.start()
try:
with open(input_file, 'rb') as f:
print(f"正在播放: {input_file}")
while True:
# 读取头部 (4字节)
header = f.read(4)
if not header or len(header) < 4:
break
# 解析头部
packet_type, reserved, data_len = struct.unpack('>BBH', header)
# 读取Opus数据
opus_data = f.read(data_len)
if not opus_data or len(opus_data) < data_len:
break
# 解码Opus数据
pcm_data = decoder.decode(opus_data, frame_size)
# 将字节转换为numpy数组
audio_array = np.frombuffer(pcm_data, dtype=np.int16)
# 播放音频
stream.write(audio_array)
except KeyboardInterrupt:
print("\n播放已停止")
finally:
stream.stop()
stream.close()
print("播放完成")
def main():
parser = argparse.ArgumentParser(description='播放p3格式的音频文件')
parser.add_argument('input_file', help='输入的p3文件路径')
args = parser.parse_args()
play_p3_file(args.input_file)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,7 @@
librosa>=0.9.2
opuslib>=3.0.1
numpy>=1.20.0
tqdm>=4.62.0
sounddevice>=0.4.4
pyloudnorm>=0.1.1
soundfile>=0.13.1