Explorar el Código

first version of html2md import;

jimmylee hace 3 meses
padre
commit
c32243d323

+ 2 - 1
config/dev.env.js

@@ -3,5 +3,6 @@ const merge = require('webpack-merge')
 const prodEnv = require('./prod.env')
 
 module.exports = merge(prodEnv, {
-  NODE_ENV: '"development"'
+  NODE_ENV: '"development"',
+  VUE_APP_HTML2MD_API: '"http://localhost:7004"'
 })

+ 2 - 1
config/prod.env.js

@@ -1,4 +1,5 @@
 'use strict'
 module.exports = {
-  NODE_ENV: '"production"'
+  NODE_ENV: '"production"',
+  VUE_APP_HTML2MD_API: '"r2rapi.cocorobo.cn"'
 }

+ 148 - 0
src/components/pages/knowledge/WebCrawlDialog.vue

@@ -0,0 +1,148 @@
+<template>
+  <el-dialog
+    :visible.sync="localVisible"
+    title="从网站获取内容"
+    width="800px"
+    :close-on-click-modal="true"
+    class="web-crawl-dialog"
+  >
+    <el-tabs v-model="tab" class="web-crawl-tabs">
+      <el-tab-pane label="爬取单一网页" name="single" />
+      <el-tab-pane label="爬取整站" name="site" disabled />
+    </el-tabs>
+    <div class="web-crawl-form">
+      <el-input
+        v-model="url"
+        placeholder="请输入网址"
+        class="web-crawl-input"
+      />
+      <div class="web-crawl-tip">
+        处于合规原因,当前仅支持公开可获取的静态网页(微信公众号、知乎、小红书等特殊内容不可爬取)
+      </div>
+      <el-collapse v-model="advancedOpen" class="web-crawl-advanced">
+        <el-collapse-item title="高级配置" name="advanced">
+          <div class="web-crawl-advanced-row web-crawl-advanced-row-flex">
+            <div class="web-crawl-advanced-block">
+              <div class="web-crawl-advanced-item">
+                <span>定时获取</span>
+                <el-select v-model="schedule" placeholder="Never" style="width: 120px; margin-left: 8px;" disabled>
+                  <!-- 暂未实现,禁用 -->
+                  <el-option label="Never" value="never" />
+                  <el-option label="Every day" value="day" />
+                  <el-option label="Every week" value="week" />
+                </el-select>
+              </div>
+              <div class="web-crawl-advanced-tip">如开启,将每隔指定时长重新获取该网页内容</div>
+            </div>
+            <div class="web-crawl-advanced-block">
+              <div class="web-crawl-advanced-item">
+                <span>获取网页中的附件</span>
+                <el-switch v-model="getAttachment" style="margin-left: 8px;" disabled />
+                <!-- 暂未实现,禁用 -->
+              </div>
+              <div class="web-crawl-advanced-tip">如开启,将自动下载该页面中 pdf、docx、doc、pptx 格式的附件</div>
+            </div>
+          </div>
+        </el-collapse-item>
+      </el-collapse>
+    </div>
+    <div slot="footer" class="dialog-footer">
+      <el-button @click="$emit('update:visible', false)">取消</el-button>
+      <el-button type="primary" @click="$emit('confirm', { url, tab, schedule, getAttachment })">运行</el-button>
+    </div>
+  </el-dialog>
+</template>
+
+<script>
+export default {
+  name: 'WebCrawlDialog',
+  props: {
+    visible: {
+      type: Boolean,
+      default: false
+    }
+  },
+  data() {
+    return {
+      localVisible: this.visible,
+      tab: 'single',
+      url: '',
+      advancedOpen: ['advanced'],
+      schedule: 'never',
+      getAttachment: true
+    }
+  },
+  watch: {
+    visible(val) {
+      this.localVisible = val;
+    },
+    localVisible(val) {
+      this.$emit('update:visible', val);
+    }
+  },
+  created() {
+    // console.log('[WebCrawlDialog created] props.visible:', this.visible);
+  },
+  updated() {
+    // console.log('[WebCrawlDialog updated] props.visible:', this.visible);
+  }
+}
+</script>
+
+<style scoped>
+.web-crawl-dialog >>> .el-dialog__body {
+  padding-top: 0;
+}
+.web-crawl-tabs {
+  margin-bottom: 10px;
+}
+.web-crawl-form {
+  margin: 0 10px 0 10px;
+}
+.web-crawl-input {
+  width: 100%;
+  margin-bottom: 8px;
+}
+.web-crawl-tip {
+  color: #888;
+  font-size: 13px;
+  margin-bottom: 10px;
+}
+.web-crawl-advanced {
+  margin-top: 10px;
+}
+.web-crawl-advanced-row {
+  display: flex;
+  align-items: center;
+  margin-bottom: 10px;
+}
+.web-crawl-advanced-row-flex {
+  flex-direction: row;
+  justify-content: flex-start;
+  gap: 40px;
+}
+.web-crawl-advanced-block {
+  display: flex;
+  flex-direction: column;
+  min-width: 260px;
+}
+.web-crawl-advanced-item {
+  display: flex;
+  align-items: center;
+  margin-bottom: 0;
+}
+.web-crawl-advanced-row span {
+  min-width: 110px;
+  color: #333;
+}
+.web-crawl-advanced-tip {
+  color: #888;
+  font-size: 13px;
+  padding-left: 0;
+  margin-bottom: 8px;
+  margin-top: 4px;
+}
+.dialog-footer {
+  text-align: right;
+}
+</style> 

+ 155 - 33
src/components/pages/knowledge/folderFileBox.vue

@@ -40,39 +40,27 @@
               transform: scale(1.1);
               padding-bottom: 2px;
               box-sizing: border-box;
-            "
-            src="../../../assets/shuax.svg"
-            alt=""
-          />
-          {{ lang.flushed }}</el-button
-        >
-        <el-button
-          v-if="userid == data.userid"
-          type="primary"
-          icon="el-icon-plus"
-          size="small"
-          @click="addImg($event)"
-          >{{ lang.uploadFile }}
-          <input
-            type="file"
-            accept="*"
-            style="display: none"
-            multiple="multiple"
-            @change="beforeUpload($event)"
-        /></el-button>
-        <el-button
-          type="primary"
-          size="small"
-          @click="openG"
-          v-if="userid == data.userid && data.isMo == '2'"
-        >
-          <img
-            style="vertical-align: middle"
-            src="../../../assets/ETL.svg"
-            alt=""
-          />
-          {{ lang.associatedFiles }}</el-button
-        >
+            " src="../../../assets/shuax.svg" alt=""/>
+          {{ lang.flushed }}
+        </el-button>
+        <el-button v-if="userid == data.userid" type="primary" icon="el-icon-plus" size="small"
+                   @click="addImg($event)">{{ lang.uploadFile }}
+          <input type="file" accept="*" style="display: none" multiple="multiple"
+                 @change="beforeUpload($event)"/></el-button>
+        <el-button v-if="userid == data.userid" type="primary" icon="el-icon-plus" size="small"
+                   @click="importFromWebpage($event)">{{ lang.importFromWebpage }}
+          <!--          <input-->
+          <!--            type="file"-->
+          <!--            accept="*"-->
+          <!--            style="display: none"-->
+          <!--            multiple="multiple"-->
+          <!--            @change="importFromWebpage($event)"-->
+          <!--            />-->
+        </el-button>
+        <el-button type="primary" size="small" @click="openG" v-if="userid == data.userid && data.isMo == '2'">
+          <img style="vertical-align: middle" src="../../../assets/ETL.svg" alt=""/>
+          {{ lang.associatedFiles }}
+        </el-button>
 
         <el-button
           type="primary"
@@ -406,6 +394,7 @@
     ></wOffice>
     <relateFiles ref="relateFiles"></relateFiles>
     <checkDialog ref="checkDialog"></checkDialog>
+    <WebCrawlDialog :visible.sync="webUrlDialogVisible" @confirm="handleWebUrlConfirm"/>
   </div>
 </template>
 
@@ -434,6 +423,8 @@ import { v4 as uuidv4 } from "uuid";
 import relateFiles from "./components/relateFiles";
 import checkDialog from "./components/checkDialog";
 import testBox from "./components/testBox";
+import WebCrawlDialog from "./WebCrawlDialog.vue";
+
 export default {
   components: {
     wVideo,
@@ -442,6 +433,7 @@ export default {
     relateFiles,
     checkDialog,
     testBox,
+    WebCrawlDialog,
   },
   props: {
     userid: {
@@ -519,6 +511,8 @@ export default {
       pcount: 0,
       ptotal: 0,
       fileType: 1,
+      webUrlDialogVisible: false,
+
     };
   },
   computed: {
@@ -700,6 +694,133 @@ export default {
 
       await uploadFiles(event.target.files);
     },
+    async importFromWebpage(event) {
+      console.log('importFromWebpage');
+      this.webUrlDialogVisible = true;
+    },
+    handleWebUrlConfirm(dataFromForm) {
+      if (!dataFromForm) return;
+      // 直接 decode,不用判断前缀
+      // let url = decodeURIComponent(dataFromForm.url);
+
+      // const fileExtension = file.name.split(".").pop().toLowerCase();
+
+      // if (!allowedExtensions.includes(fileExtension)) {
+      //   this.$message.error(`${this.lang.unsupFileformats}: ${file.name}`);
+      //   await new Promise((resolve) => setTimeout(resolve, 1000)); // 延迟1秒再跳过
+      //   continue; // 跳过不支持的文件
+      // }
+      let uuid = uuidv4();
+      let formData = new FormData();
+      // const timestamp = Date.now();
+      // const baseName = file.name.slice(0, -(fileExtension.length + 1));
+      let string = [this.folderid, this.moFolderid].filter((id) => id);
+      formData.append("url", dataFromForm.url)
+      // formData.append(
+      //   "file",
+      //   new File([file], `${baseName}${timestamp}.${fileExtension}`)
+      // );
+      formData.append("collection_ids", JSON.stringify(string));
+      formData.append("id", uuid);
+      formData.append(
+        "metadata",
+        JSON.stringify({url: dataFromForm.url, collection_ids: string})
+      );
+      formData.append("ingestion_mode", "fast");
+      formData.append("run_with_orchestration", "false");
+
+      // 获取html2md服务地址,优先从store取,没有则默认
+      const html2mdApi = process.env.VUE_APP_HTML2MD_API || 'http://localhost:7004';
+      const url = html2mdApi + '/file/knowledge-base/html-to-markdown';
+      const headers = {'Content-Type': 'multipart/form-data'};
+      this.ajax.post(url, formData, {headers})
+        .then(async (response) => {
+          // if (this.loadingInstance) {
+          //   this.$nextTick(() => {
+          //     this.loadingInstance.close();
+          //   });
+          // }
+          console.log(response);
+          if (response && response.data && response.data.results) {
+            this.$message.success('文件上传成功');
+            // 获取文件名和id
+            // const fileName = response.data.results && response.data.results.filename ? response.data.results.filename : url;
+            const fileName = response.data.title;
+            const documentId = response.data.results && response.data.results.documentId;
+
+            const status =
+              response && response.status === 200
+                ? "success"
+                : "failed";
+            const msg =
+              response && response.status === 200
+                ? "切片成功"
+                : "切片失败";
+            this.$message({
+              message: msg,
+              type: status == "success" ? status : "error",
+            });
+            // setTimeout(() => {
+            this.getData();
+            // }, 1000)
+            setTimeout(() => {
+              this.fileArray.forEach((e) => {
+                if (e.documentid === uuid) {
+                  e.ingestionStatus = status;
+                }
+              });
+            }, 1000);
+
+            // 调用 bindFileAndFolder
+            if (documentId) {
+              await this.bindFileAndFolder(
+                fileName,
+                documentId,
+                this.userid,
+                this.folderid,
+                this.moFolderid
+              );
+            }
+          } else {
+            this.$message.error(response && response.data && response.data.msg ? response.data.msg : '文件上传失败');
+          }
+        })
+        .catch((error) => {
+          this.$message.error('服务器繁忙');
+          if (this.fileNames) this.fileNames.pop && this.fileNames.pop();
+          if (this.fileUrls) this.fileUrls.pop && this.fileUrls.pop();
+          console.log(error);
+          if (this.loadingInstance) {
+            this.$nextTick(() => {
+              this.loadingInstance.close();
+            });
+          }
+        });
+    },
+    async bindFileAndFolder(name, docid, uid, folder_id, mofid = '') {
+      try {
+        let params = {
+          n: name,
+          did: docid,
+          uid: this.userid,
+          fid: this.folderid,
+          mofid: this.moFolderid != this.folderid ? this.moFolderid : "",
+        };
+        const res2 = await this.ajax.post(this.$store.state.api + "addFile", [
+          params,
+        ]);
+        // 如果成功,关闭对话框
+        if (res2 && res2.status === 200) {
+          // this.handleClose && this.handleClose();
+          this.webUrlDialogVisible = false;
+        }
+
+        console.log(res2);
+      } catch (err) {
+        console.error(err);
+        this.$message.error(this.lang.uploadFail);
+      }
+    },
     async uploadFile(formData, name, uuid) {
       try {
         this.ajax
@@ -1064,6 +1185,7 @@ export default {
   padding: 0 24px;
   min-height: fit-content;
 }
+
 .f_box_top_right > .input {
   position: relative;
   /* height: 40px; */

+ 2 - 1
src/lang/cn.json

@@ -52,6 +52,7 @@
   "selectFileName":"请输入你需要搜索的文件名字",
   "flushed":"刷新",
   "uploadFile":"上传文件",
+  "importFromWebpage": "从网页导入",
   "selectedFiles":"已选文件",
   "nouploadFiles":"暂无上传文件",
   "fileName":"文件名",
@@ -154,4 +155,4 @@
   "pleaseSelect":"请选择",
   "a_total_of_x_items":"共 ${x} 条",
   "x_items_selected":"已选文件${x}个"
-}
+}

+ 2 - 1
src/lang/en.json

@@ -52,6 +52,7 @@
   "selectFileName":"Enter file name to search",
   "flushed":"Refresh",
   "uploadFile":"Upload File",
+  "importFromWebpage": "Import From Webpage",
   "selectedFiles":"Selected Files",
   "nouploadFiles":"No Uploaded Files",
   "fileName":"File Name",
@@ -154,4 +155,4 @@
   "pleaseSelect":"Please select",
   "a_total_of_x_items":"A total of ${x} items.",
   "x_items_selected":"${x} items selected"
-}
+}

+ 2 - 1
src/lang/hk.json

@@ -52,6 +52,7 @@
   "selectFileName": "請輸入你需要搜索的文件名字",
   "flushed": "刷新",
   "uploadFile": "上傳文件",
+  "importFromWebpage": "從網頁匯入資料",
   "selectedFiles": "已選文件",
   "nouploadFiles": "暫無上傳文件",
   "fileName": "文件名",
@@ -155,4 +156,4 @@
   "a_total_of_x_items":"共 ${x} 條",
   "x_items_selected":"已選文件${x}個"
 
-}
+}