#pragma once #include #include #include #include #include "option.h" #include "text_embedder_tokenizer.h" class TextEmbedder { public: TextEmbedder(const std::string& model_path); TextEmbedder(const std::string& openai_model_path, const std::string& api_key); ~TextEmbedder(); Option> Embed(const std::string& text); Option>> batch_embed(const std::vector& inputs); const std::string& get_vocab_file_name() const; bool is_openai() { return !api_key.empty(); } static bool is_model_valid(const std::string& model_path, unsigned int& num_dims); static Option is_model_valid(const std::string openai_model_path, const std::string api_key, unsigned int& num_dims); private: std::unique_ptr session_; Ort::Env env_; encoded_input_t Encode(const std::string& text); std::unique_ptr tokenizer_; std::string vocab_file_name; static std::vector mean_pooling(const std::vector>& input); std::string output_tensor_name; std::string api_key; std::string openai_model_path; static constexpr char* OPENAI_LIST_MODELS = "https://api.openai.com/v1/models"; static constexpr char* OPENAI_CREATE_EMBEDDING = "https://api.openai.com/v1/embeddings"; };