基于 ESP32-S3 + VB6824 的四博 AI 双目智能终端方案设计:触控、姿态、震动与双目表情联动实现
1. 项目背景
传统 AI 音箱更多是“语音输入 + 语音输出”的单通道交互,用户说一句,设备回一句。但对于 AI 桌宠、儿童陪伴、IP 潮玩、智能学习终端这类产品,仅靠语音交互是不够的。
四博 AI 双目方案的核心思路是:
用ESP32-S3 作为主控,结合VB6824 语音前端、0.71 / 1.28 寸双目光屏、四路触控、三轴姿态传感器、震动马达、Wi-Fi / BLE、小程序、声音克隆、知识库和 MCP 工具调用,构建一套完整的多模态 AI 交互系统。
整体能力可以概括为:
ESP32-S3 主控
+ VB6824 语音前端
+ 0.71 / 1.28 寸双目屏
+ 四路触控感应
+ 三轴姿态传感器
+ 震动马达
+ Wi-Fi / BLE 配网
+ 四博小助手小程序
+ 声音克隆 / 知识库 / MCP / OTA
这套方案不只是“AI 音箱加两块屏”,而是把语音、视觉、触觉、姿态感知和云端 AI 做成一个统一的事件系统。
2. 系统总体架构
推荐采用ESP-IDF + FreeRTOS + 事件队列 + 状态机的架构。
app_main
├── wifi_task // Wi-Fi 连接、断线重连
├── blufi_task // BLE / 小程序配网
├── vb6824_task // 唤醒、打断、录音事件
├── ai_client_task // WebSocket / MQTT AI 通信
├── eye_render_task // 双目动画刷新
├── touch_scan_task // 四路触控扫描
├── imu_detect_task // 姿态检测
├── haptic_task // 震动马达控制
├── mcp_dispatch_task // MCP 工具调用
├── ota_task // 固件和素材升级
└── app_dispatch_task // 全局事件分发
硬件关系可以简化为:
┌────────────────────────────────────┐
│ 四博小助手小程序 │
│ 配网 / 声音克隆 / 知识库 / MCP / OTA │
└───────────────┬────────────────────┘
│ BLE / Wi-Fi / WebSocket
┌───────────────▼────────────────────┐
│ ESP32-S3 主控 │
│ Wi-Fi / BLE / 状态机 / OTA / MCP │
│ 双目动画 / 触控 / IMU / 震动马达 │
└───────┬────────────┬────────────┬───┘
│ │ │
┌───────▼──────┐ ┌───▼──────┐ ┌──▼──────────┐
│ VB6824 │ │ 双目光屏 │ │ 触控/IMU/马达 │
│ 唤醒/AEC/打断 │ │ 0.71/1.28 │ │ 四触控/三轴/震动 │
└───────┬──────┘ └──────────┘ └─────────────┘
│
┌───────▼──────────────┐
│ 麦克风 / 功放 / 喇叭 │
└──────────────────────┘
3. 全局事件系统设计
AI 双目设备输入源很多:语音唤醒、触摸、摇晃、翻转、网络变化、AI 回复、OTA 等。如果每个模块各自处理,很容易逻辑混乱。
建议所有模块都转换成统一事件。
typedef enum {
APP_EVT_NONE = 0,
APP_EVT_WIFI_CONNECTED,
APP_EVT_WIFI_DISCONNECTED,
APP_EVT_BLUFI_START,
APP_EVT_BLUFI_DONE,
APP_EVT_WAKE_WORD,
APP_EVT_VOICE_INTERRUPT,
APP_EVT_RECORD_START,
APP_EVT_RECORD_STOP,
APP_EVT_TOUCH_TOP,
APP_EVT_TOUCH_LEFT,
APP_EVT_TOUCH_RIGHT,
APP_EVT_TOUCH_BACK,
APP_EVT_IMU_PICKUP,
APP_EVT_IMU_SHAKE,
APP_EVT_IMU_FLIP,
APP_EVT_IMU_TILT_LEFT,
APP_EVT_IMU_TILT_RIGHT,
APP_EVT_AI_THINKING,
APP_EVT_AI_SPEAKING,
APP_EVT_AI_FINISHED,
APP_EVT_AI_ERROR,
APP_EVT_MCP_TOOL_CALL,
APP_EVT_LOW_BATTERY,
APP_EVT_OTA_START,
APP_EVT_OTA_DONE,
} app_event_id_t;
typedef struct {
app_event_id_t id;
int param1;
int param2;
char payload[256];
} app_event_t;
static QueueHandle_t g_app_event_queue;
事件投递函数:
void app_post_event(app_event_id_t id, int p1, int p2, const char *payload)
{
if (!g_app_event_queue) {
return;
}
app_event_t evt = {
.id = id,
.param1 = p1,
.param2 = p2,
};
if (payload) {
strncpy(evt.payload, payload, sizeof(evt.payload) - 1);
}
xQueueSend(g_app_event_queue, &evt, 0);
}
主程序初始化:
void app_main(void)
{
ESP_ERROR_CHECK(nvs_flash_init());
g_app_event_queue = xQueueCreate(32, sizeof(app_event_t));
if (!g_app_event_queue) {
ESP_LOGE("APP", "事件队列创建失败");
return;
}
eye_app_start();
haptic_app_start();
touch_app_start();
imu_app_start();
wifi_app_start();
blufi_app_start();
vb6824_app_start();
ai_client_start();
mcp_app_start();
ota_app_start();
app_dispatch_start();
ESP_LOGI("APP", "四博 AI 双目系统启动完成");
}
4. AI 状态机设计
双目动画、提示音、震动反馈都应该围绕 AI 状态机工作。
typedef enum {
AI_STATE_BOOT = 0,
AI_STATE_IDLE,
AI_STATE_WAKEUP,
AI_STATE_LISTENING,
AI_STATE_THINKING,
AI_STATE_SPEAKING,
AI_STATE_TOUCH_FEEDBACK,
AI_STATE_SHAKE_FEEDBACK,
AI_STATE_SLEEP,
AI_STATE_NETWORK_ERROR,
AI_STATE_LOW_BATTERY,
AI_STATE_OTA,
} ai_state_t;
static ai_state_t g_ai_state = AI_STATE_BOOT;
状态切换实现:
void ai_set_state(ai_state_t state)
{
g_ai_state = state;
switch (state) {
case AI_STATE_BOOT:
eye_play_anim("boot");
audio_play_prompt("boot.wav");
break;
case AI_STATE_IDLE:
eye_play_anim("idle_blink");
break;
case AI_STATE_WAKEUP:
eye_play_anim("wake");
haptic_double();
audio_play_prompt("ding.wav");
break;
case AI_STATE_LISTENING:
eye_play_anim("listening");
audio_start_record();
break;
case AI_STATE_THINKING:
eye_play_anim("thinking");
break;
case AI_STATE_SPEAKING:
eye_play_anim("speaking");
break;
case AI_STATE_TOUCH_FEEDBACK:
eye_play_anim("happy");
haptic_short();
break;
case AI_STATE_SHAKE_FEEDBACK:
eye_play_anim("surprised");
haptic_double();
break;
case AI_STATE_SLEEP:
eye_play_anim("sleep");
audio_stop_tts();
break;
case AI_STATE_NETWORK_ERROR:
eye_play_anim("net_error");
haptic_long();
audio_play_prompt("network_error.wav");
break;
case AI_STATE_LOW_BATTERY:
eye_play_anim("low_battery");
haptic_long();
audio_play_prompt("low_battery.wav");
break;
case AI_STATE_OTA:
eye_play_anim("ota");
haptic_rhythm_ota();
break;
default:
eye_play_anim("idle_blink");
break;
}
}
这样用户可以通过眼睛状态直接判断设备当前行为:
唤醒成功:双目睁开 + 双震 + 提示音
正在聆听:眼神聚焦 + 开始录音
正在思考:眼球转动 / Loading
正在回复:说话表情 + TTS
触摸反馈:开心表情 + 短震
摇晃设备:惊讶表情 + 角色切换
翻转设备:睡眠表情 + 停止播放
5. 四路触控扫描实现
四路触控建议定义为:
| 触控位置 | 功能 |
|---|---|
| 顶部触控 | 唤醒 / 暂停 / 继续 |
| 左侧触控 | 上一个角色 / 上一首 |
| 右侧触控 | 下一个角色 / 下一首 |
| 背部触控 | 配网 / 静音 / 恢复出厂 |
如果使用 ESP32-S3 Touch Pad,可以这样实现:
#include "driver/touch_sensor.h"
#define TOUCH_THRESHOLD_PERCENT 70
typedef struct {
touch_pad_t pad;
uint32_t baseline;
app_event_id_t evt;
const char *name;
} touch_key_t;
static touch_key_t s_touch_keys[] = {
{TOUCH_PAD_NUM1, 0, APP_EVT_TOUCH_TOP, "TOP"},
{TOUCH_PAD_NUM2, 0, APP_EVT_TOUCH_LEFT, "LEFT"},
{TOUCH_PAD_NUM3, 0, APP_EVT_TOUCH_RIGHT, "RIGHT"},
{TOUCH_PAD_NUM4, 0, APP_EVT_TOUCH_BACK, "BACK"},
};
触控基线校准:
static void touch_calibrate(void)
{
for (int i = 0; i < 4; i++) {
uint32_t sum = 0;
for (int j = 0; j < 20; j++) {
uint32_t raw = 0;
touch_pad_read_raw_data(s_touch_keys[i].pad, &raw);
sum += raw;
vTaskDelay(pdMS_TO_TICKS(10));
}
s_touch_keys[i].baseline = sum / 20;
ESP_LOGI("TOUCH", "%s baseline=%lu",
s_touch_keys[i].name,
s_touch_keys[i].baseline);
}
}
触控任务:
static void touch_task(void *arg)
{
while (1) {
for (int i = 0; i < 4; i++) {
uint32_t raw = 0;
touch_pad_read_raw_data(s_touch_keys[i].pad, &raw);
uint32_t threshold =
s_touch_keys[i].baseline * TOUCH_THRESHOLD_PERCENT / 100;
if (raw < threshold) {
ESP_LOGI("TOUCH", "%s touched raw=%lu",
s_touch_keys[i].name, raw);
app_post_event(s_touch_keys[i].evt, raw, 0, NULL);
vTaskDelay(pdMS_TO_TICKS(250));
}
}
vTaskDelay(pdMS_TO_TICKS(30));
}
}
void touch_app_start(void)
{
ESP_ERROR_CHECK(touch_pad_init());
for (int i = 0; i < 4; i++) {
ESP_ERROR_CHECK(touch_pad_config(s_touch_keys[i].pad));
}
vTaskDelay(pdMS_TO_TICKS(300));
touch_calibrate();
xTaskCreate(touch_task, "touch_task", 4096, NULL, 5, NULL);
}
6. 震动马达驱动
震动马达用于触觉反馈,建议通过 LEDC PWM 控制。
#include "driver/ledc.h"
#define MOTOR_GPIO 15
#define MOTOR_LEDC_MODE LEDC_LOW_SPEED_MODE
#define MOTOR_TIMER LEDC_TIMER_0
#define MOTOR_CHANNEL LEDC_CHANNEL_0
#define MOTOR_FREQ_HZ 2000
#define MOTOR_DUTY_MAX 8191
static void motor_set(uint32_t duty)
{
ledc_set_duty(MOTOR_LEDC_MODE, MOTOR_CHANNEL, duty);
ledc_update_duty(MOTOR_LEDC_MODE, MOTOR_CHANNEL);
}
void haptic_short(void)
{
motor_set(MOTOR_DUTY_MAX * 60 / 100);
vTaskDelay(pdMS_TO_TICKS(60));
motor_set(0);
}
void haptic_double(void)
{
for (int i = 0; i < 2; i++) {
motor_set(MOTOR_DUTY_MAX * 70 / 100);
vTaskDelay(pdMS_TO_TICKS(50));
motor_set(0);
vTaskDelay(pdMS_TO_TICKS(80));
}
}
void haptic_long(void)
{
motor_set(MOTOR_DUTY_MAX * 80 / 100);
vTaskDelay(pdMS_TO_TICKS(300));
motor_set(0);
}
void haptic_rhythm_ota(void)
{
for (int i = 0; i < 3; i++) {
motor_set(MOTOR_DUTY_MAX * 50 / 100);
vTaskDelay(pdMS_TO_TICKS(80));
motor_set(0);
vTaskDelay(pdMS_TO_TICKS(120));
}
}
初始化:
void haptic_app_start(void)
{
ledc_timer_config_t timer = {
.speed_mode = MOTOR_LEDC_MODE,
.timer_num = MOTOR_TIMER,
.duty_resolution = LEDC_TIMER_13_BIT,
.freq_hz = MOTOR_FREQ_HZ,
.clk_cfg = LEDC_AUTO_CLK,
};
ESP_ERROR_CHECK(ledc_timer_config(&timer));
ledc_channel_config_t ch = {
.gpio_num = MOTOR_GPIO,
.speed_mode = MOTOR_LEDC_MODE,
.channel = MOTOR_CHANNEL,
.timer_sel = MOTOR_TIMER,
.duty = 0,
.hpoint = 0,
};
ESP_ERROR_CHECK(ledc_channel_config(&ch));
}
7. 三轴姿态检测
三轴传感器可以实现摇晃、翻转、倾斜、拿起等动作识别。
typedef struct {
int16_t x;
int16_t y;
int16_t z;
} accel_data_t;
I2C 读取示例:
#include "driver/i2c.h"
#include <math.h>
#define I2C_PORT I2C_NUM_0
#define I2C_SDA_GPIO 8
#define I2C_SCL_GPIO 9
#define I2C_FREQ_HZ 400000
#define IMU_ADDR 0x68
#define IMU_REG_ACCEL_X 0x3B
static esp_err_t imu_read_accel(accel_data_t *acc)
{
uint8_t reg = IMU_REG_ACCEL_X;
uint8_t buf[6] = {0};
esp_err_t ret = i2c_master_write_read_device(
I2C_PORT,
IMU_ADDR,
®,
1,
buf,
sizeof(buf),
pdMS_TO_TICKS(50)
);
if (ret != ESP_OK) {
return ret;
}
acc->x = (int16_t)((buf[0] << 8) | buf[1]);
acc->y = (int16_t)((buf[2] << 8) | buf[3]);
acc->z = (int16_t)((buf[4] << 8) | buf[5]);
return ESP_OK;
}
动作判断:
static bool imu_detect_shake(accel_data_t now, accel_data_t last)
{
int dx = abs(now.x - last.x);
int dy = abs(now.y - last.y);
int dz = abs(now.z - last.z);
return (dx + dy + dz) > 18000;
}
static bool imu_detect_flip(accel_data_t now)
{
return now.z < -12000;
}
static bool imu_detect_tilt_left(accel_data_t now)
{
return now.x < -10000;
}
static bool imu_detect_tilt_right(accel_data_t now)
{
return now.x > 10000;
}
IMU 任务:
static void imu_task(void *arg)
{
accel_data_t last = {0};
while (1) {
accel_data_t now;
if (imu_read_accel(&now) == ESP_OK) {
if (imu_detect_shake(now, last)) {
app_post_event(APP_EVT_IMU_SHAKE, now.x, now.y, NULL);
}
if (imu_detect_flip(now)) {
app_post_event(APP_EVT_IMU_FLIP, now.x, now.z, NULL);
}
if (imu_detect_tilt_left(now)) {
app_post_event(APP_EVT_IMU_TILT_LEFT, now.x, 0, NULL);
}
if (imu_detect_tilt_right(now)) {
app_post_event(APP_EVT_IMU_TILT_RIGHT, now.x, 0, NULL);
}
last = now;
}
vTaskDelay(pdMS_TO_TICKS(80));
}
}
姿态事件处理:
static void handle_imu_event(app_event_t *evt)
{
switch (evt->id) {
case APP_EVT_IMU_SHAKE:
ai_set_state(AI_STATE_SHAKE_FEEDBACK);
role_switch_random();
break;
case APP_EVT_IMU_FLIP:
ai_set_state(AI_STATE_SLEEP);
audio_stop_tts();
break;
case APP_EVT_IMU_TILT_LEFT:
eye_set_gaze(-30, 0);
break;
case APP_EVT_IMU_TILT_RIGHT:
eye_set_gaze(30, 0);
break;
default:
break;
}
}
8. VB6824 语音事件解析
VB6824 负责唤醒、AEC、实时打断、录音事件等,ESP32-S3 通过 UART 接收事件。
协议示例:
0xA5 0x01 0x00 0x5A 唤醒成功
0xA5 0x02 0x00 0x5A 用户打断
0xA5 0x03 0x00 0x5A 开始录音
0xA5 0x04 0x00 0x5A 停止录音
解析代码:
#define VB_UART_NUM UART_NUM_1
#define VB_UART_TX 17
#define VB_UART_RX 18
#define VB_UART_BAUD 115200
#define VB_HEAD 0xA5
#define VB_TAIL 0x5A
#define VB_CMD_WAKEUP 0x01
#define VB_CMD_INTERRUPT 0x02
#define VB_CMD_REC_START 0x03
#define VB_CMD_REC_STOP 0x04
static void vb6824_parse_frame(uint8_t *buf, int len)
{
if (len < 4) {
return;
}
if (buf[0] != VB_HEAD || buf[3] != VB_TAIL) {
return;
}
switch (buf[1]) {
case VB_CMD_WAKEUP:
app_post_event(APP_EVT_WAKE_WORD, 0, 0, NULL);
break;
case VB_CMD_INTERRUPT:
app_post_event(APP_EVT_VOICE_INTERRUPT, 0, 0, NULL);
break;
case VB_CMD_REC_START:
app_post_event(APP_EVT_RECORD_START, 0, 0, NULL);
break;
case VB_CMD_REC_STOP:
app_post_event(APP_EVT_RECORD_STOP, 0, 0, NULL);
break;
default:
break;
}
}
UART 接收任务:
static void vb6824_uart_task(void *arg)
{
uint8_t rx[64];
while (1) {
int len = uart_read_bytes(
VB_UART_NUM,
rx,
sizeof(rx),
pdMS_TO_TICKS(100)
);
if (len > 0) {
vb6824_parse_frame(rx, len);
}
}
}
void vb6824_app_start(void)
{
uart_config_t cfg = {
.baud_rate = VB_UART_BAUD,
.data_bits = UART_DATA_8_BITS,
.parity = UART_PARITY_DISABLE,
.stop_bits = UART_STOP_BITS_1,
.flow_ctrl = UART_HW_FLOWCTRL_DISABLE,
};
ESP_ERROR_CHECK(uart_driver_install(VB_UART_NUM, 2048, 0, 0, NULL, 0));
ESP_ERROR_CHECK(uart_param_config(VB_UART_NUM, &cfg));
ESP_ERROR_CHECK(uart_set_pin(VB_UART_NUM,
VB_UART_TX,
VB_UART_RX,
UART_PIN_NO_CHANGE,
UART_PIN_NO_CHANGE));
xTaskCreate(vb6824_uart_task, "vb6824_uart", 4096, NULL, 6, NULL);
}
9. 统一事件分发
static void app_dispatch_task(void *arg)
{
app_event_t evt;
while (1) {
if (xQueueReceive(g_app_event_queue, &evt, portMAX_DELAY)) {
switch (evt.id) {
case APP_EVT_WIFI_CONNECTED:
ai_set_state(AI_STATE_IDLE);
break;
case APP_EVT_WIFI_DISCONNECTED:
ai_set_state(AI_STATE_NETWORK_ERROR);
break;
case APP_EVT_WAKE_WORD:
ai_set_state(AI_STATE_WAKEUP);
ai_set_state(AI_STATE_LISTENING);
break;
case APP_EVT_VOICE_INTERRUPT:
audio_stop_tts();
ai_set_state(AI_STATE_LISTENING);
break;
case APP_EVT_RECORD_START:
ai_set_state(AI_STATE_LISTENING);
break;
case APP_EVT_RECORD_STOP:
audio_stop_record();
ai_set_state(AI_STATE_THINKING);
break;
case APP_EVT_TOUCH_TOP:
ai_set_state(AI_STATE_TOUCH_FEEDBACK);
app_post_event(APP_EVT_WAKE_WORD, 0, 0, NULL);
break;
case APP_EVT_TOUCH_LEFT:
role_switch_prev();
eye_play_anim("prev_role");
haptic_short();
break;
case APP_EVT_TOUCH_RIGHT:
role_switch_next();
eye_play_anim("next_role");
haptic_short();
break;
case APP_EVT_TOUCH_BACK:
eye_play_anim("config");
haptic_double();
blufi_start_config();
break;
case APP_EVT_IMU_SHAKE:
case APP_EVT_IMU_FLIP:
case APP_EVT_IMU_TILT_LEFT:
case APP_EVT_IMU_TILT_RIGHT:
handle_imu_event(&evt);
break;
case APP_EVT_AI_THINKING:
ai_set_state(AI_STATE_THINKING);
break;
case APP_EVT_AI_SPEAKING:
ai_set_state(AI_STATE_SPEAKING);
break;
case APP_EVT_AI_ERROR:
ai_set_state(AI_STATE_NETWORK_ERROR);
break;
case APP_EVT_OTA_START:
ai_set_state(AI_STATE_OTA);
break;
default:
break;
}
}
}
}
10. MCP 工具调用
MCP 的作用是让 AI 不只是回答问题,还能控制设备。
例如用户说:
把眼睛切换成爱心主题。
进入配网模式。
切换成睡觉表情。
停止播放。
定义表情工具:
{
"name": "self.eye.set_expression",
"description": "设置四博AI双目的表情状态",
"parameters": {
"type": "object",
"properties": {
"expression": {
"type": "string",
"enum": ["开心", "思考", "惊讶", "困惑", "睡觉", "待机"]
}
},
"required": ["expression"]
}
}
设备侧实现:
void mcp_set_eye_expression(const char *expression)
{
if (strcmp(expression, "开心") == 0) {
eye_play_anim("happy");
haptic_short();
} else if (strcmp(expression, "思考") == 0) {
eye_play_anim("thinking");
} else if (strcmp(expression, "惊讶") == 0) {
eye_play_anim("surprised");
haptic_double();
} else if (strcmp(expression, "困惑") == 0) {
eye_play_anim("confused");
} else if (strcmp(expression, "睡觉") == 0) {
eye_play_anim("sleep");
} else {
eye_play_anim("idle_blink");
}
}
MCP 分发:
void mcp_dispatch_tool(const char *tool_name, cJSON *args)
{
if (strcmp(tool_name, "self.eye.set_expression") == 0) {
cJSON *exp = cJSON_GetObjectItem(args, "expression");
if (cJSON_IsString(exp)) {
mcp_set_eye_expression(exp->valuestring);
}
return;
}
if (strcmp(tool_name, "self.device.enter_config") == 0) {
blufi_start_config();
eye_play_anim("config");
haptic_double();
audio_tts_play("设备已进入配网模式。");
return;
}
if (strcmp(tool_name, "self.audio.stop") == 0) {
audio_stop_tts();
ai_set_state(AI_STATE_IDLE);
return;
}
}
11. 素材 OTA 与分区设计
眼睛素材建议不要全部写死在固件中,而是放到素材分区。
/spiffs/
├── eye_128/
│ ├── idle.bin
│ ├── wake.bin
│ ├── thinking.bin
│ ├── speaking.bin
│ ├── happy.bin
│ └── sleep.bin
├── eye_071/
│ ├── idle.bin
│ ├── wake.bin
│ └── happy.bin
└── prompt/
├── boot.wav
├── ding.wav
├── net_error.wav
└── low_battery.wav
推荐分区:
# Name, Type, SubType, Offset, Size
nvs, data, nvs, 0x9000, 0x6000
otadata, data, ota, 0xf000, 0x2000
phy_init, data, phy, 0x11000, 0x1000
factory, app, factory, 0x20000, 2M
ota_0, app, ota_0, 0x220000, 2M
ota_1, app, ota_1, 0x420000, 2M
assets, data, spiffs, 0x620000, 4M
storage, data, fat, 0xA20000, 4M
12. 结论
四博 AI 双目方案的本质,是一个多模态 AI 硬件底座:
ESP32-S3:联网、状态机、UI、MCP、OTA
VB6824:唤醒、AEC、打断、语音前端
双目屏:表情和状态可视化
四路触控:本地输入
三轴传感:动作感知
震动马达:触觉反馈
四博小助手:声音克隆、知识库、MCP、OTA
它适合 AI 音箱、AI 桌宠、儿童陪伴机、智能学习终端、IP 潮玩、品牌客服终端等产品形态。
真正有竞争力的 AI 硬件,不是单纯“会回答问题”,而是能够通过声音、眼神、触摸、姿态和触觉反馈与用户形成自然交互