Merge pull request 'dev' (#14) from dev into master

Reviewed-on: https://git.conleylee.com/conley/Tengine/pulls/14
OAID · Feb 25, 2024 · cb39e8f · cb39e8f
2 parents 7af0113 + d489e04
commit cb39e8f
Show file tree

Hide file tree

Showing 144 changed files with 1,434 additions and 1,018 deletions.
diff --git a/.drone.yml b/.drone.yml
@@ -19,11 +19,14 @@ steps:
       - ../tests/test_rv64_ops.sh
   - name: test models 
     image: ubuntu20.04:qemu
+    environment:
+      DATA_SERVER_URL:
+        from_secret: DATA_SERVER_URL
     commands:
       - cd build
-      - wget -nv http://192.168.3.19:9999/tengine_model_zoo/ci_data/models.tar.gz
-      - wget -nv http://192.168.3.19:9999/tengine_model_zoo/ci_data/images.tar.gz
-      - wget -nv http://192.168.3.19:9999/tengine_model_zoo/ci_data/data_x86.tar.gz
+      - wget -nv $${DATA_SERVER_URL}/tengine_model_zoo/ci_data/models.tar.gz
+      - wget -nv $${DATA_SERVER_URL}/tengine_model_zoo/ci_data/images.tar.gz
+      - wget -nv $${DATA_SERVER_URL}/tengine_model_zoo/ci_data/data_x86.tar.gz
       - mkdir models images data
       - tar zxvf models.tar.gz -C models
       - tar zxvf images.tar.gz -C images
@@ -37,7 +40,7 @@ steps:
     image: ubuntu20.04:qemu
     commands:
       - cd build
-      - apt install lcov -y
+      - apt update && apt install lcov -y
       - lcov --gcov-tool /home/riscv/bin/riscv64-unknown-linux-gnu-gcov --capture --directory . --output-file $${DRONE_REPO_NAME}.info
       - genhtml --branch-coverage -o ../codecov $${DRONE_REPO_NAME}.info 
   - name: scp files

diff --git a/source/device/cpu/cpu_node.h b/source/device/cpu/cpu_node.h
@@ -80,9 +80,6 @@ struct node_ops
 
     /* score */
     int (*score)(struct node_ops*, struct exec_graph*, struct node*);
-
-    /* is ref op */
-    bool is_ref_op;
 };
 
 int init_exec_node(struct exec_graph* exec_graph, struct exec_node* exec_node, struct node* ir_node, struct node_ops* node_ops);

diff --git a/source/device/cpu/op/absval/absval_ref.c b/source/device/cpu/op/absval/absval_ref.c
@@ -86,14 +86,15 @@ static int score(struct node_ops* node_ops, struct exec_graph* exec_graph, struc
     return OPS_SCORE_CANDO;
 }
 
-static struct node_ops hcl_node_ops = {.prerun = prerun,
-                                       .run = run,
-                                       .reshape = NULL,
-                                       .postrun = NULL,
-                                       .init_node = init_node,
-                                       .release_node = release_node,
-                                       .score = score,
-                                       .is_ref_op = true};
+static struct node_ops hcl_node_ops = {
+    .prerun = prerun,
+    .run = run,
+    .reshape = NULL,
+    .postrun = NULL,
+    .init_node = init_node,
+    .release_node = release_node,
+    .score = score,
+};
 
 int register_absval_ref_op()
 {

diff --git a/source/device/cpu/op/absval/cortex-a/absval_hcl_arm.c b/source/device/cpu/op/absval/cortex-a/absval_hcl_arm.c
@@ -109,14 +109,15 @@ static int score(struct node_ops* node_ops, struct exec_graph* exec_graph, struc
     return OPS_SCORE_BEST;
 }
 
-static struct node_ops hcl_node_ops = {.prerun = prerun,
-                                       .run = run,
-                                       .reshape = NULL,
-                                       .postrun = NULL,
-                                       .init_node = init_node,
-                                       .release_node = release_node,
-                                       .score = score,
-                                       .is_ref_op = false};
+static struct node_ops hcl_node_ops = {
+    .prerun = prerun,
+    .run = run,
+    .reshape = NULL,
+    .postrun = NULL,
+    .init_node = init_node,
+    .release_node = release_node,
+    .score = score,
+};
 
 int register_absval_hcl_arm_op()
 {

diff --git a/source/device/cpu/op/absval/risc-v/lp64dv/absval_hcl_rv64.c b/source/device/cpu/op/absval/risc-v/lp64dv/absval_hcl_rv64.c
@@ -0,0 +1,100 @@
+#include "api/c_api.h"
+#include "graph/tensor.h"
+#include "graph/node.h"
+#include "graph/graph.h"
+#include "op/conv/risc-v/lp64dv/vsetvl_rvv.h"
+#include "utility/sys_port.h"
+#include "utility/log.h"
+#include "device/cpu/cpu_node.h"
+#include "device/cpu/cpu_graph.h"
+#include "operator/op.h"
+#include <math.h>
+#include "device/cpu/cpu_module.h"
+
+static int init_node(struct node_ops* node_ops, struct exec_node* exec_node, struct exec_graph* exec_graph)
+{
+    return 0;
+}
+
+static int release_node(struct node_ops* node_ops, struct exec_node* exec_node, struct exec_graph* exec_graph)
+{
+    return 0;
+}
+
+static int prerun(struct node_ops* node_ops, struct exec_node* exec_node, struct exec_graph* exec_graph)
+{
+    return 0;
+}
+
+static int run(struct node_ops* node_ops, struct exec_node* exec_node, struct exec_graph* exec_graph)
+{
+    struct node* ir_node = exec_node->ir_node;
+    struct graph* ir_graph = ir_node->graph;
+    struct tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
+    struct tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
+
+    const float* input_data = input_tensor->data;
+    float* output_data = output_tensor->data;
+
+    const int batch = input_tensor->dims[0];
+    const int channel = input_tensor->dims[1];
+    const int img_size = input_tensor->dims[1] * input_tensor->dims[2] * input_tensor->dims[3];
+
+    vsetvl_e32_m2();
+
+    for (int b = 0; b < batch; ++b)
+    {
+        int i = 0;
+        for (; i < (img_size & -8); i += 8)
+        {
+            asm("vle32.v    v0, (%0);\n"
+                "vfabs.v    v2, v0;\n"
+                "vse32.v    v2, (%1);\n"
+                :
+                : "r"(input_data), "r"(output_data)
+                : "memory");
+            input_data += 8;
+            output_data += 8;
+        }
+
+        for (; i < img_size; ++i)
+        {
+            *output_data = fabsf(*input_data);
+            output_data++;
+            input_data++;
+        }
+    }
+
+    return 0;
+}
+
+static int score(struct node_ops* node_ops, struct exec_graph* exec_graph, struct node* ir_node)
+{
+    struct graph* graph = ir_node->graph;
+    struct tensor* input_tensor = get_ir_graph_tensor(graph, ir_node->input_tensors[0]);
+    if (input_tensor->data_type != TENGINE_MODE_FP32 || input_tensor->layout != TENGINE_LAYOUT_NCHW)
+    {
+        return 0;
+    }
+
+    return OPS_SCORE_PREFER;
+}
+
+static struct node_ops hcl_node_ops = {
+    .prerun = prerun,
+    .run = run,
+    .reshape = NULL,
+    .postrun = NULL,
+    .init_node = init_node,
+    .release_node = release_node,
+    .score = score};
+
+int register_absval_hcl_rv64_op()
+{
+    return register_builtin_node_ops(OP_ABSVAL, &hcl_node_ops);
+}
+
+int unregister_absval_hcl_rv64_op()
+{
+    return unregister_builtin_node_ops(OP_ABSVAL, &hcl_node_ops);
+}
diff --git a/source/device/cpu/op/add_n/add_n_ref.c b/source/device/cpu/op/add_n/add_n_ref.c
@@ -117,17 +117,27 @@ static int postrun(struct node_ops* node_ops, struct exec_node* exec_node, struc
 
 static int score(struct node_ops* node_ops, struct exec_graph* exec_graph, struct node* exec_node)
 {
-    return OPS_SCORE_BEST;
+    struct node* ir_node = exec_node;
+    struct graph* ir_graph = ir_node->graph;
+    struct tensor* input_tensor;
+
+    input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
+
+    if (input_tensor->data_type != TENGINE_DT_FP32 || input_tensor->layout != TENGINE_LAYOUT_NCHW)
+        return 0;
+
+    return OPS_SCORE_CANDO;
 }
 
-static struct node_ops add_n_node_ops = {.prerun = prerun,
-                                         .run = run,
-                                         .reshape = NULL,
-                                         .postrun = postrun,
-                                         .init_node = init_node,
-                                         .release_node = release_node,
-                                         .score = score,
-                                         .is_ref_op = true};
+static struct node_ops add_n_node_ops = {
+    .prerun = prerun,
+    .run = run,
+    .reshape = NULL,
+    .postrun = postrun,
+    .init_node = init_node,
+    .release_node = release_node,
+    .score = score,
+};
 
 int register_add_n_ref_op()
 {