Просмотр исходного кода

feat(speaking): make WS-to-HTTP fallback retry-only and tidy chat timing

- Add VITE_SPEAKING_TRANSPORT config (default 'websocket'); HTTP path
  still reachable for debugging or as opt-in
- Remove WS auto-fallback to /speak: WS errors now surface to user via
  student message error state. Clicking the existing 'retry' button calls
  retryMessage → sendStudentMessage → /speak (HTTP on retry only)
- SSE 'error' event support: parseSSEStream yields it,
  sendStudentMessage marks studentMsg as error and removes ai placeholder
- Defer aiMsg push until transcript arrives so AI typing-bubble doesn't
  precede student row; add student-side typing-bubble for STT loading
- Drop the orphan AI-typing floater + hasLoadingPlaceholder computed
- Style finish-btn:disabled gray so it visibly disables during 'starting'
- Update test-dialogue-stream-fallback to assert no auto-fallback +
  manual retry hits /speak with original turnId

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
jimmylee 1 неделя назад
Родитель
Сommit
66b48a0896

+ 4 - 1
.env

@@ -1,3 +1,6 @@
 VITE_AZURE_SPEECH_KEY=5b401f52c9064cec9247f4190cf99ea4
 VITE_AZURE_SPEECH_REGION=eastasia
-# VITE_SPEAKING_API_HOST=http://localhost:8000
+VITE_SPEAKING_API_HOST=http://localhost:8000
+
+# 学生录音上送方式:websocket(默认,流式) | http(单次 POST /speak)
+VITE_SPEAKING_TRANSPORT=http

+ 5 - 0
.env.example

@@ -1,3 +1,8 @@
 VITE_AZURE_SPEECH_KEY=
 VITE_AZURE_SPEECH_REGION=
 VITE_SPEAKING_API_HOST=https://ppt-english-speaking-api.cocorobo.cn
+
+# 学生录音上送方式:websocket(默认,流式) | http(单次 POST /speak)
+# 留空或 'websocket'/'ws' → WebSocket;'http' → /speak。WS 失败时不会自动降级,
+# 由用户点"重试"按钮触发 /speak 重发。
+VITE_SPEAKING_TRANSPORT=websocket

+ 1 - 0
package.json

@@ -8,6 +8,7 @@
     "build": "run-p type-check \"build-only {@}\" --",
     "preview": "vite preview",
     "test:speaking-api-config": "node scripts/test-speaking-api-config.mjs",
+    "test:dialogue-stream-fallback": "node scripts/test-dialogue-stream-fallback.mjs",
     "build-only": "vite build",
     "type-check": "vue-tsc --build --force",
     "lint": "eslint . --ext .vue,.js,.jsx,.cjs,.mjs,.ts,.tsx,.cts,.mts --fix --ignore-path .gitignore",

+ 126 - 0
scripts/test-dialogue-stream-fallback.mjs

@@ -0,0 +1,126 @@
+import assert from 'node:assert/strict'
+import { readFile } from 'node:fs/promises'
+import ts from 'typescript'
+
+const sourceUrl = new URL('../src/views/Editor/EnglishSpeaking/composables/useDialogueEngine.ts', import.meta.url)
+let source = await readFile(sourceUrl, 'utf8')
+
+source = source
+  .replace(
+    "import { ref, reactive, computed, onUnmounted } from 'vue'",
+    `
+const ref = value => ({ value })
+const reactive = value => value
+const computed = getter => ({ get value() { return getter() } })
+const onUnmounted = () => {}
+`,
+  )
+  .replace(
+    "import { createDialogueApi, DialogueApiError } from '../services/llmService'",
+    `
+const createDialogueApi = () => globalThis.__dialogueApi
+class DialogueApiError extends Error {
+  constructor(message, status) {
+    super(message)
+    this.status = status
+  }
+}
+`,
+  )
+  .replace(
+    "import { buildSpeakingWsUrl } from '../services/speakingApiConfig'",
+    "const buildSpeakingWsUrl = () => 'wss://example.test/api/speaking/dialogue/speak-stream'",
+  )
+
+const compiled = ts.transpileModule(source, {
+  compilerOptions: {
+    module: ts.ModuleKind.ESNext,
+    target: ts.ScriptTarget.ES2022,
+  },
+}).outputText
+
+class FakeWebSocket {
+  static CONNECTING = 0
+  static OPEN = 1
+  static CLOSING = 2
+  static CLOSED = 3
+  static instances = []
+
+  readyState = FakeWebSocket.CONNECTING
+  binaryType = ''
+  sent = []
+  onopen = null
+  onmessage = null
+  onerror = null
+  onclose = null
+
+  constructor(url) {
+    this.url = url
+    FakeWebSocket.instances.push(this)
+  }
+
+  send(payload) {
+    this.sent.push(payload)
+  }
+
+  close() {
+    this.readyState = FakeWebSocket.CLOSED
+    this.onclose?.({})
+  }
+}
+
+globalThis.WebSocket = FakeWebSocket
+
+const speakCalls = []
+globalThis.__dialogueApi = {
+  async *speak(sessionId, audioBlob, signal, turnId) {
+    speakCalls.push({ sessionId, audioBlob, signal, turnId })
+    yield { type: 'transcript', text: 'hello' }
+    yield { type: 'token', text: 'Hi.' }
+    yield { type: 'done', isComplete: false }
+  },
+  generateGreeting() {
+    throw new Error('not used')
+  },
+  getReport() {
+    throw new Error('not used')
+  },
+  completeSession() {
+    throw new Error('not used')
+  },
+}
+
+const mod = await import(`data:text/javascript,${encodeURIComponent(compiled)}`)
+const engine = mod.useDialogueEngine()
+engine.attachSession({ sessionId: 'session-1', totalRounds: 3 })
+
+const ctl = engine.beginStudentStream({ sampleRate: 16000, bits: 16, channels: 1 })
+assert.ok(ctl)
+
+const ws = FakeWebSocket.instances[0]
+ws.readyState = FakeWebSocket.CLOSED
+ws.onerror?.({})
+ws.onclose?.({})
+
+const audioBlob = new Blob([new Uint8Array([1, 2, 3, 4])], { type: 'audio/webm' })
+ctl.commit(audioBlob)
+await new Promise(resolve => setTimeout(resolve, 0))
+
+// WS 失败时不再自动降级 HTTP——api.speak 不应被自动调用。
+assert.equal(speakCalls.length, 0, 'auto fallback to /speak should not happen')
+
+// 应当只 push 一条 student 消息(已 splice 掉 ai 占位),处于 error 状态等待用户点重试。
+assert.equal(engine.messages.value.length, 1)
+const studentMsg = engine.messages.value[0]
+assert.equal(studentMsg.role, 'student')
+assert.equal(studentMsg.status, 'error')
+assert.equal(studentMsg.recovery, 'retry')
+assert.equal(studentMsg.audioBlob, audioBlob, 'audioBlob preserved for retry')
+assert.equal(studentMsg.turnId, ctl.turnId)
+
+// 用户点重试 → engine.retryMessage 走 sendStudentMessage → /speak(HTTP)。
+await engine.retryMessage(studentMsg.id)
+assert.equal(speakCalls.length, 1, 'retry should invoke /speak once')
+assert.equal(speakCalls[0].sessionId, 'session-1')
+assert.equal(speakCalls[0].turnId, ctl.turnId, 'retry reuses original turnId for idempotency')
+assert.equal(speakCalls[0].audioBlob, audioBlob)

+ 1 - 0
src/types/englishSpeaking.ts

@@ -232,6 +232,7 @@ export type SSEEvent =
   | { type: 'transcript'; text: string }
   | { type: 'token'; text: string }
   | { type: 'done'; isComplete: boolean }
+  | { type: 'error'; message: string }
 
 // 对话会话配置
 export interface SessionConfig {

+ 112 - 44
src/views/Editor/EnglishSpeaking/composables/useDialogueEngine.ts

@@ -113,7 +113,8 @@ export function useDialogueEngine() {
   async function sendStudentMessage(audioBlob: Blob, turnId: string) {
     if (!sessionId.value || isProcessing.value) return
 
-    // Add student message (loading)
+    // 只 push student 占位。aiMsg 留到 transcript 事件到达再 push——避免 AI typing-bubble
+    // 抢在 student 转录文本前出现。学生 loading 状态下由 student 自己的 typing-bubble 占位。
     const studentMsg = reactive<PreviewChatMessage>({
       id: crypto.randomUUID(),
       role: 'student',
@@ -125,7 +126,6 @@ export function useDialogueEngine() {
     })
     messages.value.push(studentMsg)
 
-    // Add AI message placeholder
     const aiMsg = reactive<PreviewChatMessage>({
       id: crypto.randomUUID(),
       role: 'ai',
@@ -144,8 +144,9 @@ export function useDialogueEngine() {
         if (event.type === 'transcript') {
           studentMsg.content = event.text
           studentMsg.status = 'done'
-          // Now push AI message placeholder
-          messages.value.push(aiMsg)
+          if (!isFinalRound.value) {
+            messages.value.push(aiMsg)
+          }
         } else if (event.type === 'token') {
           aiMsg.content += event.text
         } else if (event.type === 'done') {
@@ -155,6 +156,13 @@ export function useDialogueEngine() {
           if (!event.isComplete) {
             currentRound.value++
           }
+        } else if (event.type === 'error') {
+          studentMsg.status = 'error'
+          studentMsg.error = friendlyErrorMessage(event.message)
+          studentMsg.recovery = classifyError(event.message, undefined, 'student')
+          const aiIdx = messages.value.indexOf(aiMsg)
+          if (aiIdx !== -1) messages.value.splice(aiIdx, 1)
+          return
         }
       }
 
@@ -330,15 +338,55 @@ export function useDialogueEngine() {
 
     let aborted = false
     let committed = false
+    let pendingCommitBlob: Blob | null = null
+    let openWaitTimer: ReturnType<typeof setTimeout> | null = null
     let chunkQueue: ArrayBuffer[] = []
     let open = false
 
+    const clearOpenWaitTimer = () => {
+      if (openWaitTimer) {
+        clearTimeout(openWaitTimer)
+        openWaitTimer = null
+      }
+    }
+
+    const pushPlaceholders = (blob: Blob) => {
+      studentMsg = reactive<PreviewChatMessage>({
+        id: crypto.randomUUID(),
+        role: 'student',
+        content: '',
+        timestamp: new Date(),
+        status: 'loading',
+        audioBlob: blob,
+        turnId,
+      })
+      messages.value.push(studentMsg)
+
+      // aiMsg 创建但暂不 push——等 transcript 事件到达后再插入聊天列表。
+      // 这样 student loading 阶段不会有 AI typing-bubble 抢先显示。
+      if (!isFinalRound.value) {
+        aiMsg = reactive<PreviewChatMessage>({
+          id: crypto.randomUUID(),
+          role: 'ai',
+          content: '',
+          timestamp: new Date(),
+          status: 'loading',
+          turnId,
+        })
+      }
+    }
+
     const finalizeError = (raw: string) => {
       const text = friendlyErrorMessage(raw)
       if (studentMsg && studentMsg.status === 'loading') {
         studentMsg.status = 'error'
         studentMsg.error = text
         studentMsg.recovery = classifyError(raw, undefined, 'student')
+        // 学生侧出错时把 ai 占位移除,避免孤立 typing-bubble。
+        if (aiMsg && aiMsg.status === 'loading') {
+          const idx = messages.value.indexOf(aiMsg)
+          if (idx !== -1) messages.value.splice(idx, 1)
+        }
       } else if (aiMsg && aiMsg.status === 'loading') {
         aiMsg.status = 'error'
         aiMsg.error = text
@@ -347,7 +395,12 @@ export function useDialogueEngine() {
     }
 
     ws.onopen = () => {
+      if (aborted) {
+        try { ws.close() } catch { /* ignore */ }
+        return
+      }
       open = true
+      clearOpenWaitTimer()
       ws.send(JSON.stringify({
         type: 'start',
         sessionId: sessionId.value,
@@ -358,6 +411,12 @@ export function useDialogueEngine() {
       }))
       for (const c of chunkQueue) ws.send(c)
       chunkQueue = []
+      if (pendingCommitBlob) {
+        const blob = pendingCommitBlob
+        pendingCommitBlob = null
+        pushPlaceholders(blob)
+        ws.send(JSON.stringify({ type: 'stop' }))
+      }
     }
 
     ws.onmessage = (e: MessageEvent) => {
@@ -367,6 +426,9 @@ export function useDialogueEngine() {
         if (data.type === 'transcript' && studentMsg) {
           studentMsg.content = data.text
           studentMsg.status = 'done'
+          if (aiMsg && !messages.value.includes(aiMsg)) {
+            messages.value.push(aiMsg)
+          }
         }
         else if (data.type === 'token' && aiMsg) {
           aiMsg.content += data.content
@@ -386,10 +448,31 @@ export function useDialogueEngine() {
     }
 
     ws.onerror = () => {
-      if (!aborted && committed) finalizeError('WebSocket error')
+      if (aborted) return
+      if (pendingCommitBlob) {
+        // commit 已发但 ws 仍 CONNECTING → 直接出错给用户,不再自动降级 HTTP。
+        const blob = pendingCommitBlob
+        pendingCommitBlob = null
+        clearOpenWaitTimer()
+        pushPlaceholders(blob)
+        finalizeError('WebSocket error')
+        try { ws.close() } catch { /* ignore */ }
+        return
+      }
+      if (committed) finalizeError('WebSocket error')
+      // commit 之前断开:什么都不做,等用户按完成时由 commit 处理 readyState。
     }
     ws.onclose = () => {
-      if (!committed || aborted) return
+      if (aborted) return
+      if (pendingCommitBlob) {
+        const blob = pendingCommitBlob
+        pendingCommitBlob = null
+        clearOpenWaitTimer()
+        pushPlaceholders(blob)
+        finalizeError('Connection closed')
+        return
+      }
+      if (!committed) return
       if (studentMsg?.status === 'loading') finalizeError('Connection closed')
       else if (aiMsg?.status === 'loading') finalizeError('Connection closed')
     }
@@ -401,41 +484,38 @@ export function useDialogueEngine() {
     }
 
     const commit = (blob: Blob) => {
-      if (committed || aborted) return
+      if (committed) return
       committed = true
 
-      studentMsg = reactive<PreviewChatMessage>({
-        id: crypto.randomUUID(),
-        role: 'student',
-        content: '',
-        timestamp: new Date(),
-        status: 'loading',
-        audioBlob: blob,
-        turnId,
-      })
-      messages.value.push(studentMsg)
-
-      if (!isFinalRound.value) {
-        aiMsg = reactive<PreviewChatMessage>({
-          id: crypto.randomUUID(),
-          role: 'ai',
-          content: '',
-          timestamp: new Date(),
-          status: 'loading',
-          turnId,
-        })
-        messages.value.push(aiMsg)
-      }
-
       if (open && ws.readyState === WebSocket.OPEN) {
+        pushPlaceholders(blob)
         ws.send(JSON.stringify({ type: 'stop' }))
-      } else {
-        ws.close()
+        return
+      }
+
+      if (ws.readyState === WebSocket.CONNECTING) {
+        // 连接中按完成:等 onopen 至多 2.5s。超时还没来 = 直接报错给用户重试。
+        pendingCommitBlob = blob
+        openWaitTimer = setTimeout(() => {
+          if (!pendingCommitBlob) return
+          const b = pendingCommitBlob
+          pendingCommitBlob = null
+          try { ws.close() } catch { /* ignore */ }
+          pushPlaceholders(b)
+          finalizeError('Connection closed')
+        }, 2500)
+        return
       }
+
+      // CLOSING / CLOSED:commit 时 ws 已断开 → 直接报错。
+      pushPlaceholders(blob)
+      finalizeError('Connection closed')
     }
 
     const abort = () => {
       aborted = true
+      pendingCommitBlob = null
+      clearOpenWaitTimer()
       try { ws.close() } catch { /* ignore */ }
       // No messages were pushed (commit not called) → nothing to clean up.
     }
@@ -443,17 +523,6 @@ export function useDialogueEngine() {
     return { turnId, pushChunk, commit, abort }
   }
 
-  /**
-   * 流式失败时的 HTTP fallback:用完整 audioBlob 走旧 /speak 路径。
-   * 会把 beginStudentStream 已 push 的占位消息回收(避免重复)。
-   */
-  async function streamFallback(audioBlob: Blob, studentMsgId: string, aiMsgId: string, turnId: string) {
-    // 移除占位消息
-    messages.value = messages.value.filter(m => m.id !== studentMsgId && m.id !== aiMsgId)
-    // 走旧流程
-    await sendStudentMessage(audioBlob, turnId)
-  }
-
   /**
    * 丢弃当前轮次的所有消息(student + ai),用于"重录"按钮。
    * 通过 turnId 精确定位同一轮的两条消息并一并移除。
@@ -496,7 +565,6 @@ export function useDialogueEngine() {
     retryGreeting,
     sendStudentMessage,
     beginStudentStream,
-    streamFallback,
     retryMessage,
     regenerateAiMessage,
     discardCurrentTurn,

+ 35 - 29
src/views/Editor/EnglishSpeaking/preview/DialogueChatView.vue

@@ -183,6 +183,13 @@
             </button>
           </div>
 
+          <!-- 学生 STT 加载中(与 AI typing-bubble 对称,避免按完成后 student 侧空白) -->
+          <div v-if="message.status === 'loading' && !message.content" class="typing-bubble typing-bubble-student">
+            <span class="typing-dot" style="animation-delay: 0ms" />
+            <span class="typing-dot" style="animation-delay: 150ms" />
+            <span class="typing-dot" style="animation-delay: 300ms" />
+          </div>
+
           <!-- 英文识别文本(带高亮) -->
           <div v-if="showEnglishText && message.content" class="bubble bubble-student">
             <template v-if="message.evaluation?.wordAnalysis">
@@ -262,18 +269,6 @@
         </div>
       </template>
 
-      <!-- AI 思考(STT 或 ai_thinking 且无占位消息时展示) -->
-      <div
-        v-if="(state === 'stt' || state === 'ai_thinking') && !hasLoadingPlaceholder"
-        class="msg-row msg-ai fade-in"
-      >
-        <div class="avatar-sm">{{ aiAvatar }}</div>
-        <div class="typing-bubble">
-          <span class="typing-dot" style="animation-delay: 0ms" />
-          <span class="typing-dot" style="animation-delay: 150ms" />
-          <span class="typing-dot" style="animation-delay: 300ms" />
-        </div>
-      </div>
     </div>
 
     <!-- 沉默提示浮层 -->
@@ -549,6 +544,7 @@ import { useAudioRecorder } from '../composables/useAudioRecorder'
 import { useAudioPlayer } from '../composables/useAudioPlayer'
 import TaskHintModal from './TaskHintModal.vue'
 import { createDialogueApi } from '../services/llmService'
+import { getSpeakTransport } from '../services/speakingApiConfig'
 
 // ─────────────────────────────────────────────
 // Props / Emits
@@ -585,7 +581,11 @@ const emit = defineEmits<{
 // Config
 // ─────────────────────────────────────────────
 
-const MAX_RECORDING_SECONDS = 60
+const MAX_RECORDING_SECONDS = 10
+
+// 学生录音上送方式:'websocket'(默认,流式)或 'http'(单次 POST /speak)。
+// 由环境变量 VITE_SPEAKING_TRANSPORT 控制;详见 speakingApiConfig.getSpeakTransport。
+const speakTransport = getSpeakTransport()
 
 const SILENCE_HINTS = [
   'You could say: "I really like pandas because they are so cute!"',
@@ -667,13 +667,6 @@ const state = computed<
   return 'idle'
 })
 
-// stt/ai_thinking 时,若最后一条已是 loading 的 AI 占位消息,列表里已有 typing-bubble,就不再在末尾叠一个
-const hasLoadingPlaceholder = computed(() => {
-  const msgs = engine.messages.value
-  const last = msgs[msgs.length - 1]
-  return last?.status === 'loading' && last.role === 'ai' && !last.content
-})
-
 const progressPct = computed(() => Math.min((recorder.recordingDuration.value / MAX_RECORDING_SECONDS) * 100, 100))
 const isNearLimit = computed(() => recorder.recordingDuration.value >= MAX_RECORDING_SECONDS * 0.8)
 
@@ -738,14 +731,18 @@ async function handleStartRecording() {
 
   try {
     await recorder.startRecording(startAbortController.signal)
-    // Mic acquired — open the WS now (no placeholders pushed yet).
-    streamCtl = engine.beginStudentStream({
-      sampleRate: recorder.sampleRate.value,
-      bits: 16,
-      channels: 1,
-    })
-    if (streamCtl) {
-      recorder.onChunk.value = streamCtl.pushChunk
+    // 学生录音上送方式由 speakingApiConfig.getSpeakTransport() 决定:
+    // - websocket(默认):开 WS 流式推 PCM;失败由 useDialogueEngine 暴露 error,用户点"重试"才走 HTTP。
+    // - http:跳过 WS,handleFinishRecording 直接走 sendStudentMessage → /speak。
+    if (speakTransport === 'websocket') {
+      streamCtl = engine.beginStudentStream({
+        sampleRate: recorder.sampleRate.value,
+        bits: 16,
+        channels: 1,
+      })
+      if (streamCtl) {
+        recorder.onChunk.value = streamCtl.pushChunk
+      }
     }
   } catch (err: any) {
     if (err.name === 'AbortError') {
@@ -1326,6 +1323,10 @@ onUnmounted(() => {
   border-top-left-radius: 4px;
   box-shadow: 0 1px 2px rgba(0,0,0,0.05);
 }
+.typing-bubble-student {
+  border-top-left-radius: 16px;
+  border-top-right-radius: 4px;
+}
 .typing-dot {
   width: 6px; height: 6px;
   background: rgba(249,115,22,0.7);
@@ -1627,7 +1628,12 @@ onUnmounted(() => {
   background: #f97316;
   border: none;
   color: #fff;
-  &:hover { background: #ea580c; }
+  &:hover:not(:disabled) { background: #ea580c; }
+  &:disabled {
+    background: #d1d5db;
+    color: #9ca3af;
+    cursor: not-allowed;
+  }
 }
 .record-meter {
   flex: 1;

+ 2 - 0
src/views/Editor/EnglishSpeaking/services/llmService.ts

@@ -54,6 +54,8 @@ async function* parseSSEStream(reader: ReadableStreamDefaultReader<Uint8Array>):
               yield { type: 'token', text: parsed.content ?? parsed.text }
             } else if (eventType === 'done') {
               yield { type: 'done', isComplete: parsed.isComplete }
+            } else if (eventType === 'error') {
+              yield { type: 'error', message: parsed.message }
             }
           } catch {
             // skip malformed JSON

+ 14 - 0
src/views/Editor/EnglishSpeaking/services/speakingApiConfig.ts

@@ -1,9 +1,16 @@
 export const DEFAULT_SPEAKING_API_HOST = 'https://ppt-english-speaking-api.cocorobo.cn'
 export const SPEAKING_API_DIALOGUE_PATH = '/api/speaking/dialogue'
 
+export type SpeakTransport = 'websocket' | 'http'
+
+/** 学生录音上送方式:默认 websocket(流式 ASR + LLM)。
+ *  WS 失败时不再自动降级 HTTP——错误暴露给用户,由用户主动点"重试"按钮走 HTTP 重发。 */
+export const DEFAULT_SPEAK_TRANSPORT: SpeakTransport = 'websocket'
+
 type SpeakingApiEnv = {
   readonly [key: string]: string | boolean | undefined
   VITE_SPEAKING_API_HOST?: string
+  VITE_SPEAKING_TRANSPORT?: string
 }
 
 function normalizeHost(host: string): string {
@@ -23,3 +30,10 @@ export function buildSpeakingWsUrl(path: string, env: SpeakingApiEnv | undefined
   const normalizedPath = path.startsWith('/') ? path : `/${path}`
   return getSpeakingApiBaseUrl(env).replace(/^http/, 'ws') + normalizedPath
 }
+
+export function getSpeakTransport(env: SpeakingApiEnv | undefined = import.meta.env): SpeakTransport {
+  const v = env?.VITE_SPEAKING_TRANSPORT?.trim()?.toLowerCase()
+  if (v === 'http') return 'http'
+  if (v === 'websocket' || v === 'ws') return 'websocket'
+  return DEFAULT_SPEAK_TRANSPORT
+}