Load kernels when compatible by ISA, e. g. if AMDGPU_TARGETS is set to gfx1030 and some application was started on gfx1036, it loads gfx1030 kernel. Based on Debian patch by Cordell Bloor https://salsa.debian.org/rocm-team/rocm-hipamd/-/blob/master/debian/patches/0026-extend-hip-isa-compatibility-check.patch --- comgr.orig/src/comgr-metadata.cpp +++ comgr/src/comgr-metadata.cpp @@ -923,23 +923,86 @@ static constexpr const char *CLANG_OFFLOAD_BUNDLER_MAGIC = static constexpr size_t OffloadBundleMagicLen = strLiteralLength(CLANG_OFFLOAD_BUNDLER_MAGIC); -bool isCompatibleIsaName(StringRef IsaName, StringRef CodeObjectIsaName) { +struct GfxPattern { + std::string root; + std::string suffixes; +}; + +static bool matches(const GfxPattern& p, StringRef s) { + if (p.root.size() + 1 != s.size()) { + return false; + } + if (0 != std::memcmp(p.root.data(), s.data(), p.root.size())) { + return false; + } + return p.suffixes.find(s[p.root.size()]) != std::string::npos; +} + +static bool isGfx900EquivalentProcessor(StringRef processor) { + return matches(GfxPattern{"gfx90", "029c"}, processor); +} + +static bool isGfx900SupersetProcessor(StringRef processor) { + return matches(GfxPattern{"gfx90", "0269c"}, processor); +} + +static bool isGfx1030EquivalentProcessor(StringRef processor) { + return matches(GfxPattern{"gfx103", "0123456"}, processor); +} + +static bool isGfx1010EquivalentProcessor(StringRef processor) { + return matches(GfxPattern{"gfx101", "0"}, processor); +} + +static bool isGfx1010SupersetProcessor(StringRef processor) { + return matches(GfxPattern{"gfx101", "0123"}, processor); +} + +enum CompatibilityScore { + CS_EXACT_MATCH = 1 << 4, + CS_PROCESSOR_MATCH = 1 << 3, + CS_PROCESSOR_COMPATIBLE = 1 << 2, + CS_XNACK_SPECIALIZED = 1 << 1, + CS_SRAM_ECC_SPECIALIZED = 1 << 0, + CS_INCOMPATIBLE = 0, +}; + +static int getProcessorCompatibilityScore(StringRef CodeObjectProcessor, + StringRef AgentProcessor) { + if (CodeObjectProcessor == AgentProcessor) { + return CS_PROCESSOR_MATCH; + } + + bool compatible = false; + if (isGfx900SupersetProcessor(AgentProcessor)) { + compatible = isGfx900EquivalentProcessor(CodeObjectProcessor); + } else if (isGfx1010SupersetProcessor(AgentProcessor)) { + compatible = isGfx1010EquivalentProcessor(CodeObjectProcessor); + } else if (isGfx1030EquivalentProcessor(AgentProcessor)) { + compatible = isGfx1030EquivalentProcessor(CodeObjectProcessor); + } + + return compatible ? CS_PROCESSOR_COMPATIBLE : CS_INCOMPATIBLE; +} + +static int getCompatiblityScore(StringRef IsaName, StringRef CodeObjectIsaName) { if (IsaName == CodeObjectIsaName) { - return true; + return CS_EXACT_MATCH; } TargetIdentifier CodeObjectIdent; if (parseTargetIdentifier(CodeObjectIsaName, CodeObjectIdent)) { - return false; + return CS_INCOMPATIBLE; } TargetIdentifier IsaIdent; if (parseTargetIdentifier(IsaName, IsaIdent)) { - return false; + return CS_INCOMPATIBLE; } - if (CodeObjectIdent.Processor != IsaIdent.Processor) { - return false; + int ProcessorScore = getProcessorCompatibilityScore(CodeObjectIdent.Processor, IsaIdent.Processor); + if (ProcessorScore == CS_INCOMPATIBLE) { + return CS_INCOMPATIBLE; } char CodeObjectXnack = ' ', CodeObjectSramecc = ' '; @@ -963,18 +1026,23 @@ bool isCompatibleIsaName(StringRef IsaName, StringRef CodeObjectIsaName) { } } + int XnackBonus = 0; if (CodeObjectXnack != ' ') { if (CodeObjectXnack != IsaXnack) { - return false; + return CS_INCOMPATIBLE; } + XnackBonus = CS_XNACK_SPECIALIZED; } + int SrameccBonus = 0; if (CodeObjectSramecc != ' ') { if (CodeObjectSramecc != IsaSramecc) { - return false; + return CS_INCOMPATIBLE; } + SrameccBonus = CS_SRAM_ECC_SPECIALIZED; } - return true; + + return ProcessorScore + XnackBonus + SrameccBonus; } amd_comgr_status_t @@ -992,14 +1060,21 @@ lookUpCodeObjectInSharedObject(DataObject *DataP, return Status; } + int MaxScore = 0; + unsigned MaxScoreItem; for (unsigned J = 0; J < QueryListSize; J++) { - if (isCompatibleIsaName(QueryList[J].isa, IsaName)) { - QueryList[J].offset = 0; - QueryList[J].size = DataP->Size; - break; + int Score = getCompatiblityScore(QueryList[J].isa, IsaName); + if (Score > MaxScore) { + MaxScore = Score; + MaxScoreItem = J; } } + if (MaxScore) { + QueryList[MaxScoreItem].offset = 0; + QueryList[MaxScoreItem].size = DataP->Size; + } + return AMD_COMGR_STATUS_SUCCESS; } @@ -1011,7 +1086,6 @@ amd_comgr_status_t lookUpCodeObject(DataObject *DataP, return lookUpCodeObjectInSharedObject(DataP, QueryList, QueryListSize); } - int Seen = 0; BinaryStreamReader Reader(StringRef(DataP->Data, DataP->Size), support::little); @@ -1037,6 +1111,8 @@ amd_comgr_status_t lookUpCodeObject(DataObject *DataP, QueryList[I].size = 0; } + std::vector QueryListScores(QueryListSize); + // For each code object, extract BundleEntryID information, and check that // against each ISA in the QueryList for (uint64_t I = 0; I < NumOfCodeObjects; I++) { @@ -1069,28 +1145,22 @@ amd_comgr_status_t lookUpCodeObject(DataObject *DataP, } for (unsigned J = 0; J < QueryListSize; J++) { - // If this QueryList item has already been found to be compatible with + // If this QueryList item has exact match with // another BundleEntryID, no need to check against the current // BundleEntryID - if (QueryList[J].size != 0) { + if (QueryListScores[J] == CS_EXACT_MATCH) { continue; } // If the QueryList Isa is compatible with the BundleEntryID, set the // QueryList offset/size to this BundleEntryID - if (isCompatibleIsaName(QueryList[J].isa, OffloadAndTargetId.second)) { + int Score = getCompatiblityScore(QueryList[J].isa, OffloadAndTargetId.second); + if (Score > QueryListScores[J]) { + QueryListScores[J] = Score; QueryList[J].offset = BundleEntryCodeObjectOffset; QueryList[J].size = BundleEntryCodeObjectSize; - Seen++; - break; } } - - // Stop iterating over BundleEntryIDs once we have populated the entire - // QueryList - if (Seen == (int) QueryListSize) { - break; - } } return AMD_COMGR_STATUS_SUCCESS;