Option to return document ID and body in import response. (#615)

* have a strange error, I'll try to compile without changes * added return_id keyword which returns only id * added test entry and also added test option to docker-build.sh * sorry, I can't run tests my self, so i run it with your CI * It didn't work 'cause i've been writing in python for too long :)
2025-05-17 04:02:36 +08:00 · 2022-06-13 08:51:44 +04:00 · 2022-06-13 08:51:44 +04:00 · ccb05375b7
commit ccb05375b7
parent 6c5b1d3c38
6 changed files with 64 additions and 12 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,6 +2,7 @@
 *.iml
 external*
 build*
+test-build*
 cmake-build*
 cmake-build-debug
 cmake-build-release
--- a/docker-build.sh
+++ b/docker-build.sh
@ -4,6 +4,7 @@ set -ex
 PROJECT_DIR=`dirname $0 | while read a; do cd $a && pwd && break; done`
 SYSTEM_NAME=Linux
 BUILD_DIR=build-$SYSTEM_NAME
+TEST_BUILD_DIR=test-$BUILD_DIR

 if [ -z "$TYPESENSE_VERSION" ]; then
  TYPESENSE_VERSION="nightly"
@ -15,6 +16,12 @@ if [[ "$@" == *"--clean"* ]]; then
  mkdir $PROJECT_DIR/$BUILD_DIR
 fi

+if [[ "$@" == *"--clean-test"* ]]; then
+  echo "Cleaning..."
+  rm -rf $PROJECT_DIR/$TEST_BUILD_DIR
+  mkdir $PROJECT_DIR/$TEST_BUILD_DIR
+fi
+
 if [[ "$@" == *"--depclean"* ]]; then
  echo "Cleaning dependencies..."
  rm -rf $PROJECT_DIR/external-$SYSTEM_NAME
@ -32,10 +39,18 @@ fi

 echo "Building Typesense $TYPESENSE_VERSION..."
 docker run -it -v $PROJECT_DIR:/typesense typesense/$TYPESENSE_DEV_IMAGE cmake -DTYPESENSE_VERSION=$TYPESENSE_VERSION \
-DCMAKE_BUILD_TYPE=Release -H/typesense -B/typesense/$BUILD_DIR
-
+ -DCMAKE_BUILD_TYPE=Release -H/typesense -B/typesense/$BUILD_DIR
 docker run -it -v $PROJECT_DIR:/typesense typesense/$TYPESENSE_DEV_IMAGE make typesense-server -C/typesense/$BUILD_DIR

+if [[ "$@" == *"--test"* ]]; then
+    echo "Running tests"
+    docker run -it -v $PROJECT_DIR:/typesense typesense/$TYPESENSE_DEV_IMAGE cp /typesense/$BUILD_DIR/Makefile /typesense/$TEST_BUILD_DIR
+    docker run -it -v $PROJECT_DIR:/typesense typesense/$TYPESENSE_DEV_IMAGE cp -R /typesense/$BUILD_DIR/CMakeFiles /typesense/$TEST_BUILD_DIR/
+    docker run -it -v $PROJECT_DIR:/typesense typesense/$TYPESENSE_DEV_IMAGE make typesense-test -C/typesense/$TEST_BUILD_DIR
+    docker run -it -v $PROJECT_DIR:/typesense typesense/$TYPESENSE_DEV_IMAGE chmod +x /typesense/$TEST_BUILD_DIR/typesense-test
+    docker run -it -v $PROJECT_DIR:/typesense typesense/$TYPESENSE_DEV_IMAGE /typesense/$TEST_BUILD_DIR/typesense-test
+fi
+
 if [[ "$@" == *"--build-deploy-image"* ]]; then
    echo "Creating deployment image for Typesense $TYPESENSE_VERSION server ..."

--- a/include/collection.h
+++ b/include/collection.h
@ -270,7 +270,7 @@ public:

    static void populate_result_kvs(Topster *topster, std::vector<std::vector<KV *>> &result_kvs);

-    void batch_index(std::vector<index_record>& index_records, std::vector<std::string>& json_out, size_t &num_indexed);
+    void batch_index(std::vector<index_record>& index_records, std::vector<std::string>& json_out, size_t &num_indexed, const bool& write_docs, const bool& write_id);

    bool is_exceeding_memory_threshold() const;

@ -291,7 +291,8 @@ public:

    nlohmann::json add_many(std::vector<std::string>& json_lines, nlohmann::json& document,
                            const index_operation_t& operation=CREATE, const std::string& id="",
-                            const DIRTY_VALUES& dirty_values=DIRTY_VALUES::COERCE_OR_REJECT);
+                            const DIRTY_VALUES& dirty_values=DIRTY_VALUES::COERCE_OR_REJECT,
+                            const bool& write_docs=false, const bool& write_id=false);

    Option<nlohmann::json> search(const std::string & query, const std::vector<std::string> & search_fields,
                                  const std::string & simple_filter_query, const std::vector<std::string> & facet_fields,
--- a/src/collection.cpp
+++ b/src/collection.cpp
@ -184,7 +184,7 @@ Option<nlohmann::json> Collection::add(const std::string & json_str,
                                       const DIRTY_VALUES& dirty_values) {
    nlohmann::json document;
    std::vector<std::string> json_lines = {json_str};
-    const nlohmann::json& res = add_many(json_lines, document, operation, id, dirty_values);
+    const nlohmann::json& res = add_many(json_lines, document, operation, id, dirty_values, false, false);

    if(!res["success"].get<bool>()) {
        nlohmann::json res_doc;
@ -204,7 +204,7 @@ Option<nlohmann::json> Collection::add(const std::string & json_str,

 nlohmann::json Collection::add_many(std::vector<std::string>& json_lines, nlohmann::json& document,
                                    const index_operation_t& operation, const std::string& id,
-                                    const DIRTY_VALUES& dirty_values) {
+                                    const DIRTY_VALUES& dirty_values, const bool& write_docs, const bool& write_id) {
    //LOG(INFO) << "Memory ratio. Max = " << max_memory_ratio << ", Used = " << SystemMetrics::used_memory_ratio();
    std::vector<index_record> index_records;

@ -281,7 +281,7 @@ nlohmann::json Collection::add_many(std::vector<std::string>& json_lines, nlohma
        do_batched_index:

        if((i+1) % index_batch_size == 0 || i == json_lines.size()-1 || repeated_doc) {
-            batch_index(index_records, json_lines, num_indexed);
+            batch_index(index_records, json_lines, num_indexed, write_docs, write_id);

            // to return the document for the single doc add cases
            if(index_records.size() == 1) {
@ -305,7 +305,7 @@ bool Collection::is_exceeding_memory_threshold() const {
 }

 void Collection::batch_index(std::vector<index_record>& index_records, std::vector<std::string>& json_out,
-                             size_t &num_indexed) {
+                             size_t &num_indexed, const bool& write_docs, const bool& write_id) {

    batch_index_in_memory(index_records);

@ -349,8 +349,14 @@ void Collection::batch_index(std::vector<index_record>& index_records, std::vect
                    index_record.index_success();
                }
            }
-
            res["success"] = index_record.indexed.ok();
+
+            if (write_docs & index_record.indexed.ok()) {
+                res["document"] = index_record.is_update ? index_record.new_doc : index_record.doc;
+            }
+            if (write_id & index_record.indexed.ok()) {
+                res["id"] = index_record.is_update ? index_record.new_doc["id"] : index_record.doc["id"];
+            }
            if(!index_record.indexed.ok()) {
                res["document"] = json_out[index_record.position];
                res["error"] = index_record.indexed.error();
--- a/src/core_api.cpp
+++ b/src/core_api.cpp
@ -693,6 +693,8 @@ bool post_import_documents(const std::shared_ptr<http_req>& req, const std::shar
    const char *BATCH_SIZE = "batch_size";
    const char *ACTION = "action";
    const char *DIRTY_VALUES = "dirty_values";
+    const char *RETURN_RES = "return_res";
+    const char *RETURN_ID = "return_id";

    if(req->params.count(BATCH_SIZE) == 0) {
        req->params[BATCH_SIZE] = "40";
@ -706,6 +708,14 @@ bool post_import_documents(const std::shared_ptr<http_req>& req, const std::shar
        req->params[DIRTY_VALUES] = "";  // set it empty as default will depend on `index_all_fields`
    }

+    if(req->params.count(RETURN_RES) == 0) {
+        req->params[RETURN_RES] = "false";
+    }
+
+    if(req->params.count(RETURN_ID) == 0) {
+        req->params[RETURN_RES] = "false";
+    }
+
    if(!StringUtils::is_uint32_t(req->params[BATCH_SIZE])) {
        res->final = true;
        res->set_400("Parameter `" + std::string(BATCH_SIZE) + "` must be a positive integer.");
@ -721,6 +731,20 @@ bool post_import_documents(const std::shared_ptr<http_req>& req, const std::shar
        return false;
    }

+    if(req->params[RETURN_RES] != "true" && req->params[RETURN_RES] != "false") {
+        res->final = true;
+        res->set_400("Parameter `" + std::string(RETURN_RES) + "` must be a true|false.");
+        stream_response(req, res);
+        return false;
+    }
+
+    if(req->params[RETURN_ID] != "true" && req->params[RETURN_ID] != "false") {
+        res->final = true;
+        res->set_400("Parameter `" + std::string(RETURN_ID) + "` must be a true|false.");
+        stream_response(req, res);
+        return false;
+    }
+
    const size_t IMPORT_BATCH_SIZE = std::stoi(req->params[BATCH_SIZE]);

    if(IMPORT_BATCH_SIZE == 0) {
@ -796,8 +820,10 @@ bool post_import_documents(const std::shared_ptr<http_req>& req, const std::shar
        nlohmann::json document;

        const auto& dirty_values = collection->parse_dirty_values_option(req->params[DIRTY_VALUES]);
+        const bool& return_res = req->params[RETURN_RES] == "true";
+        const bool& return_id = req->params[RETURN_ID] == "true";
        nlohmann::json json_res = collection->add_many(json_lines, document, operation, "",
-                                                       dirty_values);
+                                                       dirty_values, return_res, return_id);
        //const std::string& import_summary_json = json_res->dump();
        //response_stream << import_summary_json << "\n";

--- a/test/collection_test.cpp
+++ b/test/collection_test.cpp
@ -1264,9 +1264,10 @@ TEST_F(CollectionTest, ImportDocumentsEmplace) {
    nlohmann::json document;
    std::vector<std::string> records = {R"({"id": "0", "title": "The Matrix", "points":0})",
                                        R"({"id": "1", "title": "Inception", "points":1})"};
+    std::vector<nlohmann::json> docs = import_res_to_json(records);

    // use `emplace` mode for creating documents
-    auto import_response = coll1->add_many(records, document, EMPLACE);
+    auto import_response = coll1->add_many(records, document, EMPLACE, "", DIRTY_VALUES::COERCE_OR_REJECT, true, true);

    ASSERT_TRUE(import_response["success"].get<bool>());
    ASSERT_EQ(2, import_response["num_imported"].get<int>());
@ -1276,7 +1277,9 @@ TEST_F(CollectionTest, ImportDocumentsEmplace) {

    for (size_t i = 0; i < 2; i++) {
        ASSERT_TRUE(import_results[i]["success"].get<bool>());
-        ASSERT_EQ(1, import_results[i].size());
+        ASSERT_EQ(3, import_results[i].size());
+        ASSERT_EQ(docs[i], import_results[i]["document"]);
+        ASSERT_EQ(docs[i]["id"], import_results[i]["id"]);
    }

    auto res = coll1->search("*", {}, "", {}, {}, {0}, 10, 1, token_ordering::FREQUENCY, {true}, 10).get();