QAicInferenceSet Example¶

The following document describes the AIC100 example named QAicInferenceSetExample.cpp.

This is a full-featured example that demonstrates running inferences against one or more compiled networks (QPCs) on a single device. It supports three inference execution patterns — callback-based, single-threaded, and multi-threaded — and optionally groups multiple networks into an InferenceSetGroup to share device resources.

QAicInferenceSetExample.cpp

  //-----------------------------------------------------------------------------
  // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
  // SPDX-License-Identifier: BSD-3-Clause-Clear
  //-----------------------------------------------------------------------------
5
  #include "QAicApi.hpp"
7
  #include <getopt.h>
  #include <iostream>
 #include <memory>
 #include <string>
 #include <vector>
 #include <optional>
 #include <cassert>
 #include <future>
16
 namespace {
18
 using InferenceSetRequestIdPairListType =
     std::vector<std::pair<qaic::rt::shInferenceSet, uint32_t>>;
21
 void usage() {
   printf(
        "Usage: qaic-inference-set-example [options]\n"
          "  -d, --aic-device-id <id>           AIC device ID default 0\n"
          "  -t, --add-program <path>           Location of program binaries\n"
          "  -g, --use-inference-set-group      All programs to share same set of device cores\n"
          "  -v, --verbose                      increase verbosity\n"
          "  -h, --help                         help\n"
          "Example activates one instance of each program with setsize one.\n"
          "It runs inferences using random data for each program.\n"
          );
 }
34
 /**
  * Set the verbosity level of QLogLevel according to command line argument
  * @param verbose integer to denote verbosity from command line argument
  * @return QLogLevel logLevel
  */
 [[nodiscard]] auto setVerbosity(const uint32_t verbose) {
   QLogLevel logLevel = QL_ERROR;
   if (1 == verbose) {
     logLevel = QL_WARN;
   } else if (verbose > 1) {
     logLevel = QL_INFO;
   }
   return logLevel;
 }
49
 template <typename T> struct StrToIntTrait;
 template <> struct StrToIntTrait<uint32_t> {
   [[nodiscard]] static long func(const char *str, char **str_end) {
     return std::strtol(str, str_end, 10);
   }
 };
56
 template <typename T> [[nodiscard]] std::optional<T> stringTo(const char *str) {
   char *pend{};
   const auto result = StrToIntTrait<uint32_t>::func(str, &pend);
   if (str == pend) {
     return std::nullopt;
   }
   return result;
 }
65
 /**
  * @struct aggregating all command line options parsed.
  */
 struct GetOptResult final {
   int retval{};
   QID qid{};
   std::vector<std::string> qpcFileList;
   bool useInferenceSetGroup{false};
   uint32_t verbose{};
 };
76
 /**
  * Command line argument parsing
  * @param argc argument count
  * @param argv argument vector
  * @return a pair of <success, results>. Users need to check against success
  * bit before using results.
  */
 [[nodiscard]] std::pair<bool, GetOptResult> getOpt(int argc, char **argv) {
   std::optional<QID> qid = 0;
   std::vector<std::string> qpcFileList;
   uint32_t verbose = 0;
   bool useInferenceSetGroup = false;
89
   struct option long_options[] = {
       {"aic-device-id", required_argument, nullptr, 'd'},
       {"add-program", required_argument, nullptr, 't'},
       {"use-inference-set-group", no_argument, nullptr, 'g'},
       {"verbose", no_argument, nullptr, 'v'},
       {"help", no_argument, nullptr, 'h'},
       {nullptr, 0, nullptr, 0}};
   int option_index = 0;
   int opt = 0;
99
  while ((opt = getopt_long(argc, argv, "d:t:gvh", long_options,
                            &option_index)) != -1) {
    switch (opt) {
    case 'd':
      qid = stringTo<uint32_t>(optarg);
      if (!qid || *qid < 0) {
        std::cerr << "Cannot parse aic-device-id qid option: " << optarg << '\n';
        return std::make_pair(false, GetOptResult{1});
      }
      break;
    case 't':
      qpcFileList.emplace_back(optarg);
      break;
    case 'g':
      useInferenceSetGroup = true;
      break;
    case 'v':
      ++verbose;
      break;
    case 'h':
      [[fallthrough]];
    case '?':
    default:
      usage();
      return std::make_pair(false, GetOptResult{0});
    }
  }
127
  if (qpcFileList.empty()) {
    std::cerr << "Need at least one test-data input" << '\n';
    usage();
    return std::make_pair(false, GetOptResult{1});
  }
133
  return std::make_pair(
      true, GetOptResult{0, *qid, qpcFileList, useInferenceSetGroup, verbose});
}
137
/**
 * Dump config to stderr
 * @param qpcFileList QPC file list
 * @param qpcList QPC list associated with each QPC file
 */
void dumpSelectedConfig(const std::vector<std::string> &qpcFileList,
                        const std::vector<qaic::rt::shQpc> &qpcList) {
  assert(qpcFileList.size() == qpcList.size());
  std::size_t idx{};
  for (const auto &fileName : qpcFileList) {
    const auto innerIdx = idx++;
    std::clog << std::setw(4) << "Program[" << idx << "] : " << fileName << '\n';
    qaic::rt::BufferMappings bufferMappings = qpcList[innerIdx]->getBufferMappings();
    for (auto const &m : bufferMappings) {
      std::string bufferDir;
      m.ioType == BUFFER_IO_TYPE_INPUT ? bufferDir = "IN " : bufferDir = "OUT";
      std::clog << "\tBuffer Index: " << m.index << ", Dir: " << bufferDir
                << ", Size: " << std::setw(8) << m.size
                << ", Name: " << m.bufferName << '\n';
    }
  }
  std::clog << '\n';
}
161
/**
 * Abstract base class for InferenceSet vs InferenceSetGroup actions.
 */
class InferenceActions {
public:
  virtual ~InferenceActions() = default;
  virtual QStatus enable() = 0;
  virtual QStatus disable() = 0;
  virtual void createInferenceGroup(qaic::rt::shContext &context, QID qid) = 0;
  virtual qaic::rt::shInferenceSetProperties createProperties() = 0;
};
173
/**
 * Non-group InferenceSet actions — enable/disable are no-ops.
 */
class InferenceSetActions : public InferenceActions {
public:
  QStatus enable() override { return QS_SUCCESS; }
  QStatus disable() override { return QS_SUCCESS; }
  void createInferenceGroup([[maybe_unused]] qaic::rt::shContext &context,
                            [[maybe_unused]] QID qid) override {}
  qaic::rt::shInferenceSetProperties createProperties() override {
    return qaic::rt::InferenceSetProperties::makeDefault();
  }
};
187
/**
 * Group InferenceSet actions — delegates to InferenceSetGroup.
 */
class InferenceSetGroupActions : public InferenceActions {
public:
  QStatus enable() override { return inferenceSetGroup->enable(); }
  QStatus disable() override { return inferenceSetGroup->disable(); }
  void createInferenceGroup(qaic::rt::shContext &context, QID qid) override {
    inferenceSetGroup =
        qaic::rt::InferenceSetGroup::Factory(context, qid, "ExampleGroup");
  }
  qaic::rt::shInferenceSetProperties createProperties() override {
    auto prop = qaic::rt::InferenceSetProperties::makeDefault();
    prop->inferenceSetGroup = inferenceSetGroup;
    return prop;
  }
  ~InferenceSetGroupActions() override { inferenceSetGroup->release(); }
private:
  qaic::rt::shInferenceSetGroup inferenceSetGroup;
};
208
/**
 * Create InferenceSet and InferenceVector for each QPC.
 * @param qpcFileList QPC file paths
 * @param qid device ID
 * @param logLevel runtime log level
 * @param inferenceActions pointer to InferenceActions implementation
 * @param verbose verbosity level
 * @return tuple of (inferenceVectorList, inferenceSetList)
 */
[[nodiscard]] auto
createInferenceSetAndVector(const std::vector<std::string> &qpcFileList,
                            const QID qid, const QLogLevel logLevel,
                            InferenceActions *inferenceActions,
                            const uint32_t verbose) {
  std::vector<qaic::rt::shQpc> qpcList;
  std::vector<qaic::rt::shInferenceVector> inferenceVectorList;
  std::vector<qaic::rt::shInferenceSet> inferenceSetList;
  std::vector<QID> qidList{qid};
227
  constexpr auto properties = nullptr;
  qaic::rt::shContext context = qaic::rt::Context::Factory(properties, qidList);
  context->setLogLevel(logLevel);
231
  inferenceActions->createInferenceGroup(context, qid);
233
  for (auto &e : qpcFileList) {
    qaic::rt::shInferenceSetProperties shInferenceSetProperties =
        inferenceActions->createProperties();
    qaic::rt::shQpc qpc = qpcList.emplace_back(qaic::rt::Qpc::Factory(e));
    auto &bufferMappings = qpc->getBufferMappings();
    auto inferenceVector = qaic::rt::InferenceVector::Factory(
        bufferMappings,
        qaic::rt::InferenceVector::DataSourceType::USER_BUFFERS);
    auto dataBufferIterPair = inferenceVector->getDataBufferIterPair();
    auto status = qaic::rt::UtilIO::fillRandomData(
        bufferMappings, dataBufferIterPair, qaic::rt::UtilIO::UNBOUND);
    if (status != QS_SUCCESS) {
      std::cerr << "Failed to fill random data\n";
    }
    inferenceVectorList.push_back(inferenceVector);
248
    constexpr auto setSize = 2U;
    constexpr auto numActivations = 1U;
    const auto ioDesc = nullptr;
    const auto enableProfiling = false;
253
    auto item = qaic::rt::InferenceSet::Factory(
        context, qpc, qid, setSize, numActivations, shInferenceSetProperties,
        enableProfiling, ioDesc);
    inferenceSetList.push_back(item);
  }
259
  if (verbose > 0) {
    std::clog << '\n';
    dumpSelectedConfig(qpcFileList, qpcList);
  }
264
  return std::make_tuple(inferenceVectorList, inferenceSetList);
}
267
/**
 * Submit a single inference request.
 * @param inferenceVector inference vector with input data
 * @param inferenceSet inference set to submit to
 * @param progIdx program index for logging
 * @param requestId user-defined request ID
 * @param verbose verbosity level
 * @return true on success
 */
[[nodiscard]] auto submitInference(qaic::rt::shInferenceVector &inferenceVector,
                                   qaic::rt::shInferenceSet &inferenceSet,
                                   std::size_t progIdx, std::size_t requestId,
                                   uint32_t verbose) {
  qaic::rt::shInferenceHandle submitInfHandle;
  QStatus status = inferenceSet->getAvailable(submitInfHandle);
  if (status != QS_SUCCESS) {
    std::cerr << "Could not get inference handle\n";
    return false;
  }
  submitInfHandle->setInferenceVector(inferenceVector);
  status = inferenceSet->submit(submitInfHandle, requestId);
  if (status != QS_SUCCESS) {
    std::cerr << "Could not submit inference request\n";
    return false;
  }
  if (verbose > 0) {
    std::clog << "\tInference submission: Program[" << progIdx
              << "], Request ID[" << requestId << "]" << '\n';
  }
  return true;
}
299
/**
 * Wait for completion of all submitted inferences.
 * @param inferenceSetRequestIdPairList list of (inferenceSet, requestId) pairs
 * @param verbose verbosity level
 * @return true on success
 */
[[nodiscard]] bool waitForCompletion(
    InferenceSetRequestIdPairListType &inferenceSetRequestIdPairList,
    const uint32_t verbose) {
  for (const auto &[inferenceSet, requestId] : inferenceSetRequestIdPairList) {
    qaic::rt::shInferenceHandle completedInfHandle;
    QStatus status = inferenceSet->getCompletedId(completedInfHandle, requestId);
    if (status != QS_SUCCESS) {
      std::cerr << "Failed to get completed inference handle" << '\n';
      return false;
    }
    status = inferenceSet->putCompleted(std::move(completedInfHandle));
    if (status != QS_SUCCESS) {
      std::cerr << "Failed to return inference handle" << '\n';
      return false;
    }
    if (verbose > 0) {
      std::clog << "\tInference completed: Request ID[" << requestId << "]" << '\n';
    }
  }
  return true;
}
327
[[nodiscard]] bool runInferenceSingleThread(
    std::vector<qaic::rt::shInferenceVector> &inferenceVectorList,
    std::vector<qaic::rt::shInferenceSet> &inferenceSetList,
    const uint32_t verbose) {
  if (verbose > 0) {
    std::clog << "\nRun Inferences Single threaded\n" << '\n';
  }
  InferenceSetRequestIdPairListType inferenceSetRequestIdPairList;
  std::size_t requestId{};
  std::size_t idx{};
  for (auto &inferenceSet : inferenceSetList) {
    const auto progIdx = idx++;
    requestId++;
    if (!submitInference(inferenceVectorList.at(progIdx), inferenceSet,
                         progIdx, requestId, verbose)) {
      std::cerr << "Failed to submit inference for program id " << progIdx << '\n';
      return false;
    }
    inferenceSetRequestIdPairList.emplace_back(inferenceSet, requestId);
  }
  return waitForCompletion(inferenceSetRequestIdPairList, verbose);
}
350
[[nodiscard]] auto runInferences(qaic::rt::shInferenceVector inferenceVectorArg,
                                 qaic::rt::shInferenceSet inferenceSetArg,
                                 std::size_t progIdxArg,
                                 const uint32_t verboseArg) {
  constexpr int32_t numInferencePerProg = 5;
  InferenceSetRequestIdPairListType inferenceSetRequestIdPairList;
  std::size_t requestId = numInferencePerProg * progIdxArg;
  for (int32_t i = 0; i < numInferencePerProg; i++) {
    requestId++;
    if (!submitInference(inferenceVectorArg, inferenceSetArg, progIdxArg,
                         requestId, verboseArg)) {
      std::cerr << "Failed to submit inference for program id " << progIdxArg << '\n';
      return false;
    }
    inferenceSetRequestIdPairList.emplace_back(inferenceSetArg, requestId);
    if (!waitForCompletion(inferenceSetRequestIdPairList, verboseArg)) {
      return false;
    }
    inferenceSetRequestIdPairList.clear();
  }
  return true;
}
373
[[nodiscard]] bool runInferenceMultiThread(
    std::vector<qaic::rt::shInferenceVector> &inferenceVectorList,
    std::vector<qaic::rt::shInferenceSet> &inferenceSetList,
    const uint32_t verbose) {
  if (verbose > 0) {
    std::clog << "\nRun Inferences Multi threaded\n" << '\n';
  }
  std::vector<std::pair<uint32_t, std::future<bool>>> statusVector;
  statusVector.reserve(inferenceSetList.size());
  std::size_t progIdx{};
  const auto transFunc(
      [&progIdx, verbose, &inferenceVectorList](const auto &is) {
        auto fut = std::async(std::launch::async, runInferences,
                              inferenceVectorList.at(progIdx), is, progIdx, verbose);
        return std::make_pair(progIdx++, std::move(fut));
      });
  std::ignore = transform(inferenceSetList.cbegin(), inferenceSetList.cend(),
                          back_inserter(statusVector), transFunc);
  auto retVal = true;
  for (auto &[infIdx, status] : statusVector) {
    if (!status.get()) {
      std::cerr << "Failed to run inferences, InferenceSet ID " << infIdx << '\n';
      retVal = false;
    }
  }
  return retVal;
}
401
struct CompletionData {
  std::atomic<uint32_t> completedCount{0};
  std::atomic<uint32_t> errorCount{0};
  uint32_t totalInferences;
  std::mutex mutex;
  std::condition_variable cv;
};
409
void onCompleted(void *data, QStatus status,
                 [[maybe_unused]] qaic::rt::shInferenceHandle infHandle) {
  auto *completionData = static_cast<CompletionData *>(data);
  if (status == QS_SUCCESS) {
    completionData->completedCount++;
  } else {
    completionData->errorCount++;
  }
  if (completionData->completedCount + completionData->errorCount ==
      completionData->totalInferences) {
    std::unique_lock<std::mutex> lock(completionData->mutex);
    completionData->cv.notify_one();
  }
}
424
qaic::rt::shInferenceVector &
getNextData(std::vector<qaic::rt::shInferenceVector> &inferenceVectorList,
            size_t &progIdx) {
  auto &data = inferenceVectorList.at(progIdx);
  progIdx = (progIdx + 1) % inferenceVectorList.size();
  return data;
}
432
[[nodiscard]] bool runInferenceCallback(
    std::vector<qaic::rt::shInferenceVector> &inferenceVectorList,
    std::vector<qaic::rt::shInferenceSet> &inferenceSetList,
    const uint32_t verbose, const uint32_t numInferencesTotal) {
  if (verbose > 0) {
    std::clog << "\nRun Inferences Callback-based\n" << '\n';
  }
  CompletionData completionData;
  completionData.totalInferences = numInferencesTotal;
  uint32_t submittedCount = 0;
  size_t progIdx = 0;
  while (submittedCount < numInferencesTotal) {
    for (auto &inferenceSet : inferenceSetList) {
      if (submittedCount >= numInferencesTotal) break;
      qaic::rt::shInferenceHandle infHandle;
      QStatus status = inferenceSet->getAvailable(infHandle, 0);
      if (status != QS_SUCCESS) {
        std::cerr << "Failed to get an available inference handle\n";
        goto wait_for_completion;
      }
      infHandle->setInferenceVector(getNextData(inferenceVectorList, progIdx));
      status = inferenceSet->submit(infHandle, onCompleted, &completionData);
      if (status != QS_SUCCESS) {
        std::cerr << "Failed to submit inference with callback\n";
        goto wait_for_completion;
      }
      submittedCount++;
    }
  }
wait_for_completion:
  std::unique_lock<std::mutex> lock(completionData.mutex);
  completionData.cv.wait(lock, [&] {
    return (completionData.completedCount + completionData.errorCount) >=
           submittedCount;
  });
  if (verbose > 0) {
    std::clog << "All inferences completed.\n";
    std::clog << "\tSuccessful: " << completionData.completedCount << '\n';
    std::clog << "\tErrors:     " << completionData.errorCount << '\n';
  }
  return completionData.errorCount == 0 &&
         completionData.completedCount == submittedCount;
}
476
} // anonymous namespace
478
int main(int argc, char **argv) {
  const auto [success, getOptResult] = getOpt(argc, argv);
  if (!success) {
    return getOptResult.retval;
  }
  const auto &qpcFileList = getOptResult.qpcFileList;
  const auto qid = getOptResult.qid;
  const auto useInferenceSetGroup = getOptResult.useInferenceSetGroup;
  const auto verbose = getOptResult.verbose;
  const auto logLevel = setVerbosity(verbose);
489
  try {
    auto inferenceActions =
        [](const bool isGroup) -> std::unique_ptr<InferenceActions> {
      if (isGroup) {
        return std::make_unique<InferenceSetGroupActions>();
      }
      return std::make_unique<InferenceSetActions>();
    }(useInferenceSetGroup);
498
    auto [inferenceVectorList, inferenceSetList] = createInferenceSetAndVector(
        qpcFileList, qid, logLevel, inferenceActions.get(), verbose);
500
    if (inferenceActions->enable() != QS_SUCCESS) {
      std::cerr << "Failed to enable InferenceSet Group" << '\n';
      return -1;
    }
505
    constexpr auto numInferencesCallback = 100;
    if (numInferencesCallback > 0) {
      std::cout << "\nRunning " << numInferencesCallback
                << " inferences using callback-based API\n";
      if (!runInferenceCallback(inferenceVectorList, inferenceSetList,
                                verbose, numInferencesCallback)) {
        std::cerr << "Failed to run Inferences via Callback" << '\n';
        return -1;
      }
    } else {
      if (!runInferenceSingleThread(inferenceVectorList, inferenceSetList, verbose)) {
        std::cerr << "Failed to run Inferences" << '\n';
        return -1;
      }
      if (!runInferenceMultiThread(inferenceVectorList, inferenceSetList, verbose)) {
        std::cerr << "Failed to run Inferences" << '\n';
        return -1;
      }
    }
525
    if (inferenceActions->disable() != QS_SUCCESS) {
      std::cerr << "Failed to disable InferenceSet Group" << '\n';
      return -1;
    }
  } catch (const qaic::ExceptionInit &e) {
    std::cerr << e.what() << '\n';
    return -1;
  } catch (const qaic::ExceptionRuntime &e) {
    std::cerr << e.what() << '\n';
    return -1;
  }
  return 0;
}

Main Flow¶

The main() function has 5 parts.

Command Line Parsing¶

The example accepts the following command line options:

-d, --aic-device-id <id> : AIC device ID to use (default: 0).
-t, --add-program <path> : Path to a QPC directory. Can be specified multiple times to load multiple programs.
-g, --use-inference-set-group : When set, all programs share the same set of device cores via an InferenceSetGroup.
-v, --verbose : Increase verbosity. Can be specified multiple times.
-h, --help : Print usage and exit.

At least one -t argument is required. Parsing is performed by getOpt(), which returns a GetOptResult struct containing all parsed values.

Setup¶

This phase creates all runtime objects needed for inference:

A Context is created for the specified device, with the log level set according to the verbosity argument.
If --use-inference-set-group was specified, an InferenceSetGroup is created and associated with the context.
For each QPC path provided on the command line:
1. A QPC object is loaded from the path.
2. An InferenceVector is created from the QPC’s buffer mappings and populated with random input data.
3. An InferenceSet is created with setSize = 2 and numActivations = 1. If a group is in use, the InferenceSetGroup is attached via InferenceSetProperties.

Enable¶

Before running inferences, the InferenceSetGroup must be enabled by calling inferenceActions->enable(). For the non-group case this is a no-op. The group can also be enabled implicitly by submitting the first inference to any associated InferenceSet.

Run Inferences¶

The example defaults to running 100 inferences using the callback-based pattern (runInferenceCallback). If numInferencesCallback is set to 0, it falls back to running inferences sequentially using the single-threaded pattern (runInferenceSingleThread) followed by the multi-threaded pattern (runInferenceMultiThread). See Inference Modes for details on each pattern.

Disable and Cleanup¶

After all inferences complete, inferenceActions->disable() is called to release the InferenceSetGroup. Any qaic::ExceptionInit or qaic::ExceptionRuntime exceptions thrown during the above phases are caught and reported before returning a non-zero exit code.

Inference Modes¶

Single-threaded¶

runInferenceSingleThread submits one inference per loaded program and waits for all of them to complete before returning. This is the simplest pattern and is suitable for sequential workloads.

Multi-threaded¶

runInferenceMultiThread launches one std::async thread per loaded program. Each thread calls runInferences, which submits 5 inferences for its assigned program and waits for each one to complete before submitting the next. All threads run concurrently, allowing multiple programs to be exercised in parallel.

Callback-based¶

runInferenceCallback submits inferences using the submit(infHandle, notifyFn, userData) overload. A callback function (onCompleted) is registered at submission time and is invoked by the runtime upon completion of each inference. A CompletionData struct with atomic counters and a condition variable is used to track progress and block the main thread until all submitted inferences have completed.

Helper Functions and Classes¶

usage() : Prints command line usage to stdout.
setVerbosity(verbose) : Maps an integer verbosity level to a QLogLevel value (QL_ERROR, QL_WARN, or QL_INFO).
GetOptResult : Struct aggregating all parsed command line options.
getOpt(argc, argv) : Parses command line arguments using getopt_long and returns a GetOptResult.
dumpSelectedConfig(qpcFileList, qpcList) : Logs each program’s name and its buffer mappings (index, direction, size, name) to stderr.
InferenceActions : Abstract base class providing enable(), disable(), createInferenceGroup(), and createProperties() interfaces.
InferenceSetActions : Concrete implementation for the non-group case. enable() and disable() are no-ops.
InferenceSetGroupActions : Concrete implementation for the group case. Delegates enable()/disable() to the underlying InferenceSetGroup and attaches it to InferenceSetProperties.
createInferenceSetAndVector(...) : Creates a Context, optionally an InferenceSetGroup, and for each QPC creates a Qpc, InferenceVector (with random data), and InferenceSet.
submitInference(...) : Acquires an available InferenceHandle, sets the inference vector, and submits the request (non-blocking).
waitForCompletion(...) : Iterates over a list of (InferenceSet, requestId) pairs, calling getCompletedId and putCompleted for each.
runInferenceSingleThread(...) : Submits one inference per program and waits for all completions.
runInferences(...) : Submits 5 inferences sequentially for a single program. Used by the multi-threaded runner.
runInferenceMultiThread(...) : Runs runInferences for each program concurrently using std::async.
CompletionData : Struct holding atomic counters and a condition variable for tracking callback-based completion.
onCompleted(data, status, infHandle) : Callback invoked by the runtime on inference completion. Increments the appropriate counter and signals the condition variable when all inferences are done.
getNextData(inferenceVectorList, progIdx) : Returns the next InferenceVector in a round-robin fashion over the list.
runInferenceCallback(...) : Submits inferences using the callback-based submit overload and waits for all completions via a condition variable.

Compile and Run Commands¶

Copy QAicInferenceSetExample.cpp into a folder with a CMakeLists.txt that links against the Platform SDK, then build:

mkdir build
cd build
cmake ..
make -j 8

Run with a single QPC:

./qaic-inference-set-example -t /path/to/programqpc.bin

Run with multiple QPCs sharing device cores via InferenceSetGroup:

./qaic-inference-set-example -t /path/to/model1 -t /path/to/model2 -g

Run with increased verbosity on device 1:

./qaic-inference-set-example -d 1 -t /path/to/programqpc.bin -vv