소스 검색

fix:
- add i18n for descriptions
- stop loading when scrape fails
- clear input after successful scrape

jimmylee 3 달 전
부모
커밋
6c13e5ee9b
5개의 변경된 파일95개의 추가작업 그리고 69개의 파일을 삭제
  1. 34 20
      src/components/pages/knowledge/WebCrawlDialog.vue
  2. 10 45
      src/components/pages/knowledge/folderFileBox.vue
  3. 17 1
      src/lang/cn.json
  4. 17 1
      src/lang/en.json
  5. 17 2
      src/lang/hk.json

+ 34 - 20
src/components/pages/knowledge/WebCrawlDialog.vue

@@ -1,54 +1,55 @@
 <template>
   <el-dialog
     :visible.sync="localVisible"
-    title="从网站获取内容"
+    :title="lang.webCrawlDialog.title"
     width="800px"
     :close-on-click-modal="true"
     class="web-crawl-dialog"
   >
     <el-tabs v-model="tab" class="web-crawl-tabs">
-      <el-tab-pane label="爬取单一网页" name="single" />
-      <el-tab-pane label="爬取整站" name="site" disabled />
+      <el-tab-pane :label="lang.webCrawlDialog.tabSingle" name="single" />
+      <el-tab-pane :label="lang.webCrawlDialog.tabSite" name="site" disabled />
     </el-tabs>
     <div class="web-crawl-form">
       <el-input
-        v-model="url"
-        placeholder="请输入网址"
+        :value="inputUrl"
+        @input="$emit('update:inputUrl', $event)"
+        :placeholder="lang.webCrawlDialog.inputPlaceholder"
         class="web-crawl-input"
       />
       <div class="web-crawl-tip">
-        处于合规原因,当前仅支持公开可获取的静态网页(微信公众号、知乎、小红书等特殊内容不可爬取)
+        {{lang.webCrawlDialog.tip}}
       </div>
       <el-collapse v-model="advancedOpen" class="web-crawl-advanced">
-        <el-collapse-item title="高级配置" name="advanced">
+        <el-collapse-item :title="lang.webCrawlDialog.advancedTitle" name="advanced">
           <div class="web-crawl-advanced-row web-crawl-advanced-row-flex">
             <div class="web-crawl-advanced-block">
               <div class="web-crawl-advanced-item">
-                <span>定时获取</span>
-                <el-select v-model="schedule" placeholder="Never" style="width: 120px; margin-left: 8px;" disabled>
+                <span>{{lang.webCrawlDialog.schedule}}</span>
+                <el-select v-model="schedule" :placeholder="lang.webCrawlDialog.scheduleNever" style="width: 120px; margin-left: 8px;" disabled>
                   <!-- 暂未实现,禁用 -->
-                  <el-option label="Never" value="never" />
-                  <el-option label="Every day" value="day" />
-                  <el-option label="Every week" value="week" />
+                  <el-option :label="lang.webCrawlDialog.scheduleNever" value="never" />
+                  <el-option :label="lang.webCrawlDialog.scheduleDay" value="day" />
+                  <el-option :label="lang.webCrawlDialog.scheduleWeek" value="week" />
                 </el-select>
               </div>
-              <div class="web-crawl-advanced-tip">如开启,将每隔指定时长重新获取该网页内容</div>
+              <div class="web-crawl-advanced-tip">{{lang.webCrawlDialog.scheduleTip}}</div>
             </div>
             <div class="web-crawl-advanced-block">
               <div class="web-crawl-advanced-item">
-                <span>获取网页中的附件</span>
+                <span>{{lang.webCrawlDialog.attachment}}</span>
                 <el-switch v-model="getAttachment" style="margin-left: 8px;" disabled />
                 <!-- 暂未实现,禁用 -->
               </div>
-              <div class="web-crawl-advanced-tip">如开启,将自动下载该页面中 pdf、docx、doc、pptx 格式的附件</div>
+              <div class="web-crawl-advanced-tip">{{lang.webCrawlDialog.attachmentTip}}</div>
             </div>
           </div>
         </el-collapse-item>
       </el-collapse>
     </div>
     <div slot="footer" class="dialog-footer">
-      <el-button @click="$emit('update:visible', false)">取消</el-button>
-      <el-button type="primary" @click="$emit('confirm', { url, tab, schedule, getAttachment })">运行</el-button>
+      <el-button @click="$emit('update:visible', false)">{{lang.cancel}}</el-button>
+      <el-button type="primary" @click="handleConfirm">{{lang.webCrawlDialog.run}}</el-button>
     </div>
   </el-dialog>
 </template>
@@ -60,16 +61,19 @@ export default {
     visible: {
       type: Boolean,
       default: false
+    },
+    inputUrl: {
+      type: String,
+      default: ''
     }
   },
   data() {
     return {
       localVisible: this.visible,
       tab: 'single',
-      url: '',
-      advancedOpen: ['advanced'],
+      advancedOpen: [],
       schedule: 'never',
-      getAttachment: true
+      getAttachment: false
     }
   },
   watch: {
@@ -85,6 +89,16 @@ export default {
   },
   updated() {
     // console.log('[WebCrawlDialog updated] props.visible:', this.visible);
+  },
+  methods: {
+    handleConfirm() {
+      this.$emit('confirm', {
+        url: this.inputUrl,
+        tab: this.tab,
+        schedule: this.schedule,
+        getAttachment: this.getAttachment
+      });
+    }
   }
 }
 </script>

+ 10 - 45
src/components/pages/knowledge/folderFileBox.vue

@@ -394,7 +394,7 @@
     ></wOffice>
     <relateFiles ref="relateFiles"></relateFiles>
     <checkDialog ref="checkDialog"></checkDialog>
-    <WebCrawlDialog :visible.sync="webUrlDialogVisible" @confirm="handleWebUrlConfirm"/>
+    <WebCrawlDialog :visible.sync="webUrlDialogVisible" :input-url.sync="webInputUrl" @confirm="handleWebUrlConfirm"/>
   </div>
 </template>
 
@@ -512,6 +512,7 @@ export default {
       ptotal: 0,
       fileType: 1,
       webUrlDialogVisible: false,
+      webInputUrl: '',
 
     };
   },
@@ -701,27 +702,10 @@ export default {
     handleWebUrlConfirm(dataFromForm) {
       if (!dataFromForm) return;
       this.proVisible = true;
-      // this.isLoading = true;
-      // 直接 decode,不用判断前缀
-      // let url = decodeURIComponent(dataFromForm.url);
-
-      // const fileExtension = file.name.split(".").pop().toLowerCase();
-
-      // if (!allowedExtensions.includes(fileExtension)) {
-      //   this.$message.error(`${this.lang.unsupFileformats}: ${file.name}`);
-      //   await new Promise((resolve) => setTimeout(resolve, 1000)); // 延迟1秒再跳过
-      //   continue; // 跳过不支持的文件
-      // }
       let uuid = uuidv4();
       let formData = new FormData();
-      // const timestamp = Date.now();
-      // const baseName = file.name.slice(0, -(fileExtension.length + 1));
       let string = [this.folderid, this.moFolderid].filter((id) => id);
       formData.append("url", dataFromForm.url)
-      // formData.append(
-      //   "file",
-      //   new File([file], `${baseName}${timestamp}.${fileExtension}`)
-      // );
       formData.append("collection_ids", JSON.stringify(string));
       formData.append("id", uuid);
       formData.append(
@@ -730,41 +714,20 @@ export default {
       );
       formData.append("ingestion_mode", "fast");
       formData.append("run_with_orchestration", "false");
-
-      // 获取html2md服务地址,优先从store取,没有则默认
+      debugger
       const html2mdApi = process.env.VUE_APP_HTML2MD_API || 'http://localhost:7004';
       const url = html2mdApi + '/file/knowledge-base/html-to-markdown';
       const headers = {'Content-Type': 'multipart/form-data'};
       this.ajax.post(url, formData, {headers})
         .then(async (response) => {
-          // if (this.loadingInstance) {
-          //   this.$nextTick(() => {
-          //     this.loadingInstance.close();
-          //   });
-          // }
-          console.log(response);
           if (response && response.data && response.data.results) {
             this.$message.success('文件上传成功');
-            // 获取文件名和id
-            // const fileName = response.data.results && response.data.results.filename ? response.data.results.filename : url;
             const fileName = response.data.title;
             const documentId = response.data.results && response.data.results.document_id;
-
-            const status =
-              response && response.status === 200
-                ? "success"
-                : "failed";
-            const msg =
-              response && response.status === 200
-                ? "切片成功"
-                : "切片失败";
-            this.$message({
-              message: msg,
-              type: status == "success" ? status : "error",
-            });
-            // setTimeout(() => {
+            const status = response && response.status === 200 ? "success" : "failed";
+            const msg = response && response.status === 200 ? "切片成功" : "切片失败";
+            this.$message({ message: msg, type: status == "success" ? status : "error" });
             this.getData();
-            // }, 1000)
             setTimeout(() => {
               this.fileArray.forEach((e) => {
                 if (e.documentid === uuid) {
@@ -772,8 +735,6 @@ export default {
                 }
               });
             }, 1000);
-
-            // 调用 bindFileAndFolder
             if (documentId) {
               await this.bindFileAndFolder(
                 fileName,
@@ -783,9 +744,12 @@ export default {
                 this.moFolderid
               );
             }
+            // 受控组件写法,直接清空 input
+            this.webInputUrl = '';
           } else {
             this.$message.error(response && response.data && response.data.msg ? response.data.msg : '文件上传失败');
           }
+          this.proVisible = false;
         })
         .catch((error) => {
           this.$message.error('服务器繁忙');
@@ -797,6 +761,7 @@ export default {
               this.loadingInstance.close();
             });
           }
+          this.proVisible = false;
         });
     },
     async bindFileAndFolder(name, docid, uid, folder_id, mofid = '') {

+ 17 - 1
src/lang/cn.json

@@ -154,5 +154,21 @@
   "allSelect":"全选",
   "pleaseSelect":"请选择",
   "a_total_of_x_items":"共 ${x} 条",
-  "x_items_selected":"已选文件${x}个"
+  "x_items_selected":"已选文件${x}个",
+  "webCrawlDialog": {
+    "title": "从网站获取内容",
+    "tabSingle": "爬取单一网页",
+    "tabSite": "爬取整站",
+    "inputPlaceholder": "请输入网址",
+    "tip": "处于合规原因,当前仅支持公开可获取的静态网页(微信公众号、知乎、小红书等特殊内容不可爬取)",
+    "advancedTitle": "高级配置",
+    "schedule": "定时获取",
+    "scheduleNever": "从不",
+    "scheduleDay": "每天",
+    "scheduleWeek": "每周",
+    "scheduleTip": "如开启,将每隔指定时长重新获取该网页内容",
+    "attachment": "获取网页中的附件",
+    "attachmentTip": "如开启,将自动下载该页面中 pdf、docx、doc、pptx 格式的附件",
+    "run": "运行"
+  }
 }

+ 17 - 1
src/lang/en.json

@@ -154,5 +154,21 @@
   "allSelect":"All Select",
   "pleaseSelect":"Please select",
   "a_total_of_x_items":"A total of ${x} items.",
-  "x_items_selected":"${x} items selected"
+  "x_items_selected":"${x} items selected",
+  "webCrawlDialog": {
+    "title": "Get Content from Website",
+    "tabSingle": "Crawl Single Page",
+    "tabSite": "Crawl Whole Site",
+    "inputPlaceholder": "Please enter URL",
+    "tip": "For compliance reasons, only publicly accessible static web pages are supported (WeChat Official Accounts, Zhihu, Xiaohongshu, etc. are not supported)",
+    "advancedTitle": "Advanced Settings",
+    "schedule": "Scheduled Fetch",
+    "scheduleNever": "Never",
+    "scheduleDay": "Every day",
+    "scheduleWeek": "Every week",
+    "scheduleTip": "If enabled, the page will be fetched again at the specified interval",
+    "attachment": "Fetch Attachments in Page",
+    "attachmentTip": "If enabled, will automatically download attachments in pdf, docx, doc, pptx format from the page",
+    "run": "Run"
+  }
 }

+ 17 - 2
src/lang/hk.json

@@ -154,6 +154,21 @@
   "allSelect":"全選",
   "pleaseSelect":"請選擇",
   "a_total_of_x_items":"共 ${x} 條",
-  "x_items_selected":"已選文件${x}個"
-
+  "x_items_selected":"已選文件${x}個",
+  "webCrawlDialog": {
+    "title": "從網站獲取內容",
+    "tabSingle": "爬取單一網頁",
+    "tabSite": "爬取整站",
+    "inputPlaceholder": "請輸入網址",
+    "tip": "處於合規原因,目前僅支持公開可獲取的靜態網頁(微信公眾號、知乎、小紅書等特殊內容不可爬取)",
+    "advancedTitle": "高級配置",
+    "schedule": "定時獲取",
+    "scheduleNever": "從不",
+    "scheduleDay": "每天",
+    "scheduleWeek": "每週",
+    "scheduleTip": "如開啟,將每隔指定時長重新獲取該網頁內容",
+    "attachment": "獲取網頁中的附件",
+    "attachmentTip": "如開啟,將自動下載該頁面中 pdf、docx、doc、pptx 格式的附件",
+    "run": "運行"
+  }
 }