diff --git a/libraries/config/common.mk b/libraries/config/common.mk index b05082292..964a7d0aa 100644 --- a/libraries/config/common.mk +++ b/libraries/config/common.mk @@ -76,17 +76,12 @@ TARGET := $(notdir $(CURDIR)) BUILD := build DATA := data INCLUDES := include -SOURCES ?= source $(foreach d,$(filter-out source/arch source/board source,$(wildcard source/*)),$(if $(wildcard $d/.),$(call DIR_WILDCARD,$d) $d,)) -ifneq ($(strip $(wildcard source/$(ATMOSPHERE_ARCH_DIR)/.*)),) -SOURCES += source/$(ATMOSPHERE_ARCH_DIR) $(call DIR_WILDCARD,source/$(ATMOSPHERE_ARCH_DIR)) -endif -ifneq ($(strip $(wildcard source/$(ATMOSPHERE_BOARD_DIR)/.*)),) -SOURCES += source/$(ATMOSPHERE_BOARD_DIR) $(call DIR_WILDCARD,source/$(ATMOSPHERE_BOARD_DIR)) -endif -ifneq ($(strip $(wildcard source/$(ATMOSPHERE_OS_DIR)/.*)),) -SOURCES += source/$(ATMOSPHERE_OS_DIR) $(call DIR_WILDCARD,source/$(ATMOSPHERE_OS_DIR)) -endif +GENERAL_SOURCE_DIRS=$1 $(foreach d,$(filter-out $1/arch $1/board $1,$(wildcard $1/*)),$(if $(wildcard $d/.),$(call DIR_WILDCARD,$d) $d,)) +SPECIFIC_SOURCE_DIRS=$(if $(wildcard $1/$2/.*),$1/$2 $(call DIR_WILDCARD,$1/$2),) +ALL_SOURCE_DIRS=$(call GENERAL_SOURCE_DIRS,$1) $(call SPECIFIC_SOURCE_DIRS,$1,$(ATMOSPHERE_ARCH_DIR)) $(call SPECIFIC_SOURCE_DIRS,$1,$(ATMOSPHERE_BOARD_DIR)) $(call SPECIFIC_SOURCE_DIRS,$1,$(ATMOSPHERE_OS_DIR)) + +SOURCES ?= $(call ALL_SOURCE_DIRS,source) #--------------------------------------------------------------------------------- # Rules for compiling pre-compiled headers diff --git a/libraries/libmesosphere/Makefile b/libraries/libmesosphere/Makefile index be6fb94d6..79213e4fc 100644 --- a/libraries/libmesosphere/Makefile +++ b/libraries/libmesosphere/Makefile @@ -14,6 +14,8 @@ CFLAGS := $(ATMOSPHERE_CFLAGS) $(SETTINGS) $(DEFINES) $(INCLUDE) CXXFLAGS := $(CFLAGS) $(ATMOSPHERE_CXXFLAGS) -fno-use-cxa-atexit -flto ASFLAGS := $(ATMOSPHERE_ASFLAGS) $(SETTINGS) +SOURCES += $(call ALL_SOURCE_DIRS,../libvapours/source) + LIBS := #--------------------------------------------------------------------------------- diff --git a/libraries/libstratosphere/Makefile b/libraries/libstratosphere/Makefile index d7a3965ea..16328c048 100644 --- a/libraries/libstratosphere/Makefile +++ b/libraries/libstratosphere/Makefile @@ -23,6 +23,8 @@ ASFLAGS := $(ATMOSPHERE_ASFLAGS) $(SETTINGS) LDFLAGS := -specs=$(DEVKITPRO)/libnx/switch.specs $(SETTINGS) -Wl,-Map,$(notdir $*.map) +SOURCES += $(call ALL_SOURCE_DIRS,../libvapours/source) + LIBS := -lnx #--------------------------------------------------------------------------------- diff --git a/libraries/libvapours/include/vapours/assert.hpp b/libraries/libvapours/include/vapours/assert.hpp index b4fe1fbee..74a5f0590 100644 --- a/libraries/libvapours/include/vapours/assert.hpp +++ b/libraries/libvapours/include/vapours/assert.hpp @@ -19,7 +19,7 @@ namespace ams::impl { template - ALWAYS_INLINE void UnusedImpl(ArgTypes... args) { + constexpr ALWAYS_INLINE void UnusedImpl(ArgTypes... args) { (static_cast(args), ...); } diff --git a/libraries/libvapours/include/vapours/crypto.hpp b/libraries/libvapours/include/vapours/crypto.hpp index 274db7952..a468ee98a 100644 --- a/libraries/libvapours/include/vapours/crypto.hpp +++ b/libraries/libvapours/include/vapours/crypto.hpp @@ -18,3 +18,5 @@ #include #include +#include +#include diff --git a/libraries/libvapours/include/vapours/crypto/crypto_memory_clear.hpp b/libraries/libvapours/include/vapours/crypto/crypto_memory_clear.hpp new file mode 100644 index 000000000..6a8be44e8 --- /dev/null +++ b/libraries/libvapours/include/vapours/crypto/crypto_memory_clear.hpp @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2018-2020 Atmosphère-NX + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#pragma once +#include +#include +#include + +namespace ams::crypto { + + void ClearMemory(void *mem, size_t size); + +} diff --git a/libraries/libvapours/include/vapours/crypto/crypto_memory_compare.hpp b/libraries/libvapours/include/vapours/crypto/crypto_memory_compare.hpp index 4798f7d4f..68040a4b8 100644 --- a/libraries/libvapours/include/vapours/crypto/crypto_memory_compare.hpp +++ b/libraries/libvapours/include/vapours/crypto/crypto_memory_compare.hpp @@ -19,20 +19,8 @@ #include #include -#ifdef ATMOSPHERE_ARCH_ARM64 - -#include - -#else - -#error "Unknown architecture for crypto::IsSameBytes" - -#endif - namespace ams::crypto { - inline bool IsSameBytes(const void *lhs, const void *rhs, size_t size) { - return impl::IsSameBytes(lhs, rhs, size); - } + bool IsSameBytes(const void *lhs, const void *rhs, size_t size); } diff --git a/libraries/libvapours/include/vapours/crypto/impl/crypto_bignum.hpp b/libraries/libvapours/include/vapours/crypto/impl/crypto_bignum.hpp new file mode 100644 index 000000000..585e41975 --- /dev/null +++ b/libraries/libvapours/include/vapours/crypto/impl/crypto_bignum.hpp @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2018-2020 Atmosphère-NX + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#pragma once +#include +#include +#include +#include +#include + +namespace ams::crypto::impl { + + class BigNum { + NON_COPYABLE(BigNum); + NON_MOVEABLE(BigNum); + public: + using HalfWord = u16; + using Word = u32; + using DoubleWord = u64; + + static constexpr size_t MaxBits = 4096; + static constexpr size_t BitsPerWord = sizeof(Word) * CHAR_BIT; + static constexpr Word MaxWord = std::numeric_limits::max(); + static constexpr Word MaxHalfWord = std::numeric_limits::max(); + + class WordAllocator { + NON_COPYABLE(WordAllocator); + NON_MOVEABLE(WordAllocator); + public: + class Allocation { + NON_COPYABLE(Allocation); + NON_MOVEABLE(Allocation); + private: + friend class WordAllocator; + private: + WordAllocator *allocator; + Word *buffer; + size_t count; + private: + constexpr ALWAYS_INLINE Allocation(WordAllocator *a, Word *w, size_t c) : allocator(a), buffer(w), count(c) { /* ... */ } + public: + ALWAYS_INLINE ~Allocation() { if (allocator) { allocator->Free(this->buffer, this->count); } } + + constexpr ALWAYS_INLINE Word *GetBuffer() const { return this->buffer; } + constexpr ALWAYS_INLINE size_t GetCount() const { return this->count; } + constexpr ALWAYS_INLINE bool IsValid() const { return this->buffer != nullptr; } + }; + + friend class Allocation; + private: + Word *buffer; + size_t count; + size_t max_count; + size_t min_count; + private: + ALWAYS_INLINE void Free(void *words, size_t num) { + this->buffer -= num; + this->count += num; + + AMS_ASSERT(words == this->buffer); + } + public: + constexpr ALWAYS_INLINE WordAllocator(Word *buf, size_t c) : buffer(buf), count(c), max_count(c), min_count(c) { /* ... */ } + + ALWAYS_INLINE Allocation Allocate(size_t num) { + if (num <= this->count) { + Word *allocated = this->buffer; + + this->buffer += num; + this->count -= num; + this->min_count = std::min(this->count, this->min_count); + + return Allocation(this, allocated, num); + } else { + return Allocation(nullptr, nullptr, 0); + } + } + + constexpr ALWAYS_INLINE size_t GetMaxUsedSize() const { + return (this->max_count - this->min_count) * sizeof(Word); + } + }; + private: + Word *words; + size_t num_words; + size_t max_words; + private: + static void ImportImpl(Word *out, size_t out_size, const u8 *src, size_t src_size); + static void ExportImpl(u8 *out, size_t out_size, const Word *src, size_t src_size); + public: + constexpr BigNum() : words(), num_words(), max_words() { /* ... */ } + ~BigNum() { /* ... */ } + + constexpr void ReserveStatic(Word *buf, size_t capacity) { + this->words = buf; + this->max_words = capacity; + } + + bool Import(const void *src, size_t src_size); + void Export(void *dst, size_t dst_size); + + size_t GetSize() const; + + bool IsZero() const { + return this->num_words == 0; + } + + bool ExpMod(void *dst, const void *src, size_t size, const BigNum &exp, u32 *work_buf, size_t work_buf_size) const; + void ClearToZero(); + void UpdateCount(); + public: + /* Utility. */ + static bool IsZero(const Word *w, size_t num_words); + static int Compare(const Word *lhs, const Word *rhs, size_t num_words); + static size_t CountWords(const Word *w, size_t num_words); + static size_t CountSignificantBits(Word w); + static void ClearToZero(Word *w, size_t num_words); + static void SetToWord(Word *w, size_t num_words, Word v); + static void Copy(Word *dst, const Word *src, size_t num_words); + + /* Arithmetic. */ + static bool ExpMod(Word *dst, const Word *src, const Word *exp, size_t exp_num_words, const Word *mod, size_t mod_num_words, WordAllocator *allocator); + static bool MultMod(Word *dst, const Word *src, const Word *mult, const Word *mod, size_t num_words, WordAllocator *allocator); + static bool Mod(Word *dst, const Word *src, size_t src_words, const Word *mod, size_t mod_words, WordAllocator *allocator); + static bool DivMod(Word *quot, Word *rem, const Word *top, size_t top_words, const Word *bot, size_t bot_words, WordAllocator *allocator); + static bool Mult(Word *dst, const Word *lhs, const Word *rhs, size_t num_words, WordAllocator *allocator); + + static Word LeftShift(Word *dst, const Word *w, size_t num_words, const size_t shift); + static Word RightShift(Word *dst, const Word *w, size_t num_words, const size_t shift); + static Word Add(Word *dst, const Word *lhs, const Word *rhs, size_t num_words); + static Word Sub(Word *dst, const Word *lhs, const Word *rhs, size_t num_words); + static Word MultAdd(Word *dst, const Word *w, size_t num_words, Word mult); + static Word MultSub(Word *dst, const Word *w, const Word *v, size_t num_words, Word mult); + }; + + template + class StackBigNum : public BigNum { + public: + static constexpr size_t NumBits = Bits; + static constexpr size_t NumWords = util::AlignUp(NumBits, BitsPerWord) / BitsPerWord; + static constexpr size_t NumBytes = NumWords * sizeof(Word); + private: + Word word_buf[NumWords]; + public: + constexpr StackBigNum() : word_buf() { + this->ReserveStatic(word_buf, NumWords); + } + }; + +} diff --git a/libraries/libvapours/include/vapours/svc/svc_common.hpp b/libraries/libvapours/include/vapours/svc/svc_common.hpp index e5e3c3353..49bd91327 100644 --- a/libraries/libvapours/include/vapours/svc/svc_common.hpp +++ b/libraries/libvapours/include/vapours/svc/svc_common.hpp @@ -24,10 +24,8 @@ namespace ams::svc { /* TODO: C++ style handle? */ #ifdef ATMOSPHERE_IS_STRATOSPHERE using Handle = ::Handle; -#elif defined ATMOSPHERE_IS_MESOSPHERE - using Handle = u32; #else - #error "Unknown target for svc::Handle" + using Handle = u32; #endif static constexpr size_t MaxWaitSynchronizationHandleCount = 0x40; diff --git a/libraries/libvapours/source/crypto/crypto_memory_clear.cpp b/libraries/libvapours/source/crypto/crypto_memory_clear.cpp new file mode 100644 index 000000000..a5bb28320 --- /dev/null +++ b/libraries/libvapours/source/crypto/crypto_memory_clear.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2018-2020 Atmosphère-NX + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#include + +namespace ams::crypto { + + void ClearMemory(void *_mem, size_t size) { + volatile u8 *mem = reinterpret_cast(_mem); + + for (size_t i = 0; i < size; i++) { + mem[i] = 0; + } + } + +} \ No newline at end of file diff --git a/libraries/libvapours/include/vapours/crypto/impl/crypto_memory_compare.arch.arm64.hpp b/libraries/libvapours/source/crypto/crypto_memory_compare.arch.arm64.cpp similarity index 91% rename from libraries/libvapours/include/vapours/crypto/impl/crypto_memory_compare.arch.arm64.hpp rename to libraries/libvapours/source/crypto/crypto_memory_compare.arch.arm64.cpp index f1a1350f7..08dbb2d47 100644 --- a/libraries/libvapours/include/vapours/crypto/impl/crypto_memory_compare.arch.arm64.hpp +++ b/libraries/libvapours/source/crypto/crypto_memory_compare.arch.arm64.cpp @@ -13,14 +13,11 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ +#include -#pragma once -#include -#include +namespace ams::crypto { -namespace ams::crypto::impl { - - inline bool IsSameBytes(const void *lhs, const void *rhs, size_t size) { + bool IsSameBytes(const void *lhs, const void *rhs, size_t size) { bool result; u8 xor_acc, ltmp, rtmp; size_t index; diff --git a/libraries/libvapours/source/crypto/impl/crypto_bignum.cpp b/libraries/libvapours/source/crypto/impl/crypto_bignum.cpp new file mode 100644 index 000000000..4b4927fe1 --- /dev/null +++ b/libraries/libvapours/source/crypto/impl/crypto_bignum.cpp @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2018-2020 Atmosphère-NX + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#include + +namespace ams::crypto::impl { + + void BigNum::ImportImpl(Word *out, size_t out_size, const u8 *src, size_t src_size) { + size_t octet_ofs = src_size; + size_t word_ofs = 0; + + /* Parse octets into words. */ + while (word_ofs < out_size && octet_ofs > 0) { + Word w = 0; + for (size_t shift = 0; octet_ofs > 0 && shift < BITSIZEOF(Word); shift += BITSIZEOF(u8)) { + w |= static_cast(src[--octet_ofs]) << shift; + } + out[word_ofs++] = w; + } + + /* Zero-fill upper words. */ + while (word_ofs < out_size) { + out[word_ofs++] = 0; + } + } + + void BigNum::ExportImpl(u8 *out, size_t out_size, const Word *src, size_t src_size) { + size_t octet_ofs = out_size; + + /* Parse words into octets. */ + for (size_t word_ofs = 0; word_ofs < src_size && octet_ofs > 0; word_ofs++) { + const Word w = src[word_ofs]; + for (size_t shift = 0; octet_ofs > 0 && shift < BITSIZEOF(Word); shift += BITSIZEOF(u8)) { + out[--octet_ofs] = static_cast(w >> shift); + } + } + + /* Zero-clear remaining octets. */ + while (octet_ofs > 0) { + out[--octet_ofs] = 0; + } + } + + size_t BigNum::GetSize() const { + if (this->num_words == 0) { + return 0; + } + static_assert(sizeof(Word) == 4); + + size_t size = this->num_words * sizeof(Word); + const Word last = this->words[this->num_words - 1]; + AMS_ASSERT(last != 0); + if (last >= 0x01000000u) { + return size - 0; + } else if (last >= 0x00010000u) { + return size - 1; + } else if (last >= 0x00000100u) { + return size - 2; + } else { + return size - 3; + } + } + + bool BigNum::Import(const void *src, size_t src_size) { + AMS_ASSERT((src != nullptr) || (src_size != 0)); + + /* Ignore leading zeroes. */ + const u8 *data = static_cast(src); + while (src_size > 0 && *data == 0) { + ++data; + --src_size; + } + + /* Ensure we have space for the number. */ + AMS_ASSERT(src_size <= this->max_words * sizeof(Word)); + if (AMS_UNLIKELY(!(src_size <= this->max_words * sizeof(Word)))) { + return false; + } + + /* Import. */ + this->num_words = util::AlignUp(src_size, sizeof(Word)) / sizeof(Word); + + ImportImpl(this->words, this->max_words, data, src_size); + return true; + } + + void BigNum::Export(void *dst, size_t dst_size) { + AMS_ASSERT(dst_size >= this->GetSize()); + ExportImpl(static_cast(dst), dst_size, this->words, this->num_words); + } + + bool BigNum::ExpMod(void *dst, const void *src, size_t size, const BigNum &exp, u32 *work_buf, size_t work_buf_size) const { + /* Can't exponentiate with or about zero. */ + if (this->IsZero() || exp.IsZero()) { + return false; + } + AMS_ASSERT(size == this->GetSize()); + + /* Create an allocator. */ + WordAllocator allocator(work_buf, work_buf_size / sizeof(Word)); + ON_SCOPE_EXIT { ClearMemory(work_buf, allocator.GetMaxUsedSize()); }; + + /* Create a BigNum for the signature. */ + BigNum signature; + auto signature_words = allocator.Allocate(size / sizeof(Word)); + if (!signature_words.IsValid()) { + return false; + } + + /* Import data for the signature. */ + signature.ReserveStatic(signature_words.GetBuffer(), signature_words.GetCount()); + if (!signature.Import(src, size)) { + return false; + } + + /* Perform the exponentiation. */ + if (!ExpMod(signature.words, signature.words, exp.words, exp.num_words, this->words, this->num_words, std::addressof(allocator))) { + return false; + } + + /* We succeeded, so export. */ + signature.UpdateCount(); + signature.Export(dst, size); + + return true; + } + + void BigNum::ClearToZero() { + std::memset(this->words, 0, this->num_words * sizeof(Word)); + } + + void BigNum::UpdateCount() { + this->num_words = CountWords(this->words, this->max_words); + } + +} \ No newline at end of file diff --git a/libraries/libvapours/source/crypto/impl/crypto_bignum_operations.cpp b/libraries/libvapours/source/crypto/impl/crypto_bignum_operations.cpp new file mode 100644 index 000000000..0118f931d --- /dev/null +++ b/libraries/libvapours/source/crypto/impl/crypto_bignum_operations.cpp @@ -0,0 +1,425 @@ +/* + * Copyright (c) 2018-2020 Atmosphère-NX + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#include + +namespace ams::crypto::impl { + + namespace { + + constexpr ALWAYS_INLINE BigNum::Word GetTop2Bits(BigNum::Word w) { + return (w >> (BigNum::BitsPerWord - 2)) & 0x3u; + } + + constexpr ALWAYS_INLINE void MultWord(BigNum::Word *dst, BigNum::Word lhs, BigNum::Word rhs) { + static_assert(sizeof(BigNum::DoubleWord) == sizeof(BigNum::Word) * 2); + BigNum::DoubleWord result = static_cast(lhs) * static_cast(rhs); + dst[0] = static_cast(result & ~BigNum::Word()); + dst[1] = static_cast(result >> BITSIZEOF(BigNum::Word)); + } + + constexpr ALWAYS_INLINE BigNum::HalfWord GetUpperHalf(BigNum::Word word) { + static_assert(sizeof(BigNum::Word) == sizeof(BigNum::HalfWord) * 2); + return static_cast((word >> BITSIZEOF(BigNum::HalfWord)) & ~BigNum::HalfWord()); + } + + constexpr ALWAYS_INLINE BigNum::HalfWord GetLowerHalf(BigNum::Word word) { + static_assert(sizeof(BigNum::Word) == sizeof(BigNum::HalfWord) * 2); + return static_cast(word & ~BigNum::HalfWord()); + } + + constexpr ALWAYS_INLINE BigNum::Word ToUpperHalf(BigNum::HalfWord half) { + static_assert(sizeof(BigNum::Word) == sizeof(BigNum::HalfWord) * 2); + return static_cast(half) << BITSIZEOF(BigNum::HalfWord); + } + + constexpr ALWAYS_INLINE BigNum::Word ToLowerHalf(BigNum::HalfWord half) { + static_assert(sizeof(BigNum::Word) == sizeof(BigNum::HalfWord) * 2); + return static_cast(half); + } + + constexpr ALWAYS_INLINE BigNum::Word DivWord(const BigNum::Word *w, BigNum::Word div) { + using Word = BigNum::Word; + using HalfWord = BigNum::HalfWord; + + Word work[2] = { w[0], w[1] }; + HalfWord r_hi = 0, r_lo = 0; + + HalfWord d_hi = GetUpperHalf(div); + HalfWord d_lo = GetLowerHalf(div); + + if (d_hi == BigNum::MaxHalfWord) { + r_hi = GetUpperHalf(work[1]); + } else { + r_hi = GetLowerHalf(work[1] / (d_hi + 1)); + } + + { + const Word hh = static_cast(r_hi) * static_cast(d_hi); + const Word hl = static_cast(r_hi) * static_cast(d_lo); + + const Word uhl = ToUpperHalf(static_cast(hl)); + if ((work[0] -= uhl) > (BigNum::MaxWord - uhl)) { + work[1]--; + } + work[1] -= GetUpperHalf(hl); + work[1] -= hh; + + const Word udl = ToUpperHalf(d_lo); + while (work[1] > d_hi || (work[1] == d_hi && work[0] >= udl)) { + if ((work[0] -= udl) > (BigNum::MaxWord - udl)) { + work[1]--; + } + work[1] -= d_hi; + r_hi++; + } + } + + if (d_hi == BigNum::MaxHalfWord) { + r_lo = GetLowerHalf(work[1]); + } else { + r_lo = GetLowerHalf((ToUpperHalf(static_cast(work[1])) + GetUpperHalf(work[0])) / (d_hi + 1)); + } + + { + const Word ll = static_cast(r_lo) * static_cast(d_lo); + const Word lh = static_cast(r_lo) * static_cast(d_hi); + + if ((work[0] -= ll) > (BigNum::MaxWord - ll)) { + work[1]--; + } + + const Word ulh = ToUpperHalf(static_cast(lh)); + if ((work[0] -= ulh) > (BigNum::MaxWord - ulh)) { + work[1]--; + } + work[1] -= GetUpperHalf(lh); + + while ((work[1] > 0) || (work[1] == 0 && work[0] >= div)) { + if ((work[0] -= div) > (BigNum::MaxWord - div)) { + work[1]--; + } + r_lo++; + } + } + + return ToUpperHalf(r_hi) + r_lo; + } + + } + + bool BigNum::IsZero(const Word *w, size_t num_words) { + for (size_t i = 0; i < num_words; i++) { + if (w[i]) { + return false; + } + } + return true; + } + + int BigNum::Compare(const Word *lhs, const Word *rhs, size_t num_words) { + for (s32 i = static_cast(num_words) - 1; i >= 0; i--) { + if (lhs[i] > rhs[i]) { + return 1; + } else if (lhs[i] < rhs[i]) { + return -1; + } + } + return 0; + } + + size_t BigNum::CountWords(const Word *w, size_t num_words) { + s32 i = static_cast(num_words) - 1; + while (i >= 0 && !w[i]) { + i--; + } + return i + 1; + } + + size_t BigNum::CountSignificantBits(Word w) { + size_t i; + for (i = 0; i < BitsPerWord && w != 0; i++) { + w >>= 1; + } + return i; + } + + void BigNum::ClearToZero(Word *w, size_t num_words) { + for (size_t i = 0; i < num_words; i++) { + w[i] = 0; + } + } + + void BigNum::SetToWord(Word *w, size_t num_words, Word v) { + ClearToZero(w, num_words); + w[0] = v; + } + + void BigNum::Copy(Word *dst, const Word *src, size_t num_words) { + for (size_t i = 0; i < num_words; i++) { + dst[i] = src[i]; + } + } + + BigNum::Word BigNum::LeftShift(Word *dst, const Word *w, size_t num_words, const size_t shift) { + if (shift >= BitsPerWord) { + return 0; + } + + const size_t invshift = BitsPerWord - shift; + Word carry = 0; + for (size_t i = 0; i < num_words; i++) { + const Word cur = w[i]; + dst[i] = (cur << shift) | carry; + carry = shift ? (cur >> invshift) : 0; + } + + return carry; + } + + BigNum::Word BigNum::RightShift(Word *dst, const Word *w, size_t num_words, const size_t shift) { + if (shift >= BitsPerWord) { + return 0; + } + + const size_t invshift = BitsPerWord - shift; + Word carry = 0; + for (s32 i = static_cast(num_words) - 1; i >= 0; i--) { + const Word cur = w[i]; + dst[i] = (cur >> shift) | carry; + carry = shift ? (cur << invshift) : 0; + } + + return carry; + } + + BigNum::Word BigNum::MultSub(Word *dst, const Word *w, const Word *v, size_t num_words, Word mult) { + /* If multiplying by zero, nothing to do. */ + if (mult == 0) { + return 0; + } + + Word borrow = 0, work[2]; + for (size_t i = 0; i < num_words; i++) { + /* Multiply, calculate borrow for next. */ + MultWord(work, mult, v[i]); + if ((dst[i] = (w[i] - borrow)) > (MaxWord - borrow)) { + borrow = 1; + } else { + borrow = 0; + } + + if ((dst[i] -= work[0]) > (MaxWord - work[0])) { + borrow++; + } + borrow += work[1]; + } + + return borrow; + } + + bool BigNum::ExpMod(Word *dst, const Word *src, const Word *exp, size_t exp_words, const Word *mod, size_t mod_words, WordAllocator *allocator) { + /* Nintendo uses an algorithm that relies on powers of exp. */ + bool needs_exp[4] = {}; + if (exp_words > 1) { + needs_exp[2] = true; + needs_exp[3] = true; + } else { + Word exp_w = exp[0]; + + for (size_t i = 0; i < BitsPerWord / 2; i++) { + /* Nintendo at each step determines needed exponent from a pair of two bits. */ + needs_exp[exp_w & 0x3u] = true; + exp_w >>= 2; + } + + if (needs_exp[3]) { + needs_exp[2] = true; + } + } + + /* Allocate space for powers 1, 2, 3. */ + auto power_1 = allocator->Allocate(mod_words); + auto power_2 = allocator->Allocate(mod_words); + auto power_3 = allocator->Allocate(mod_words); + if (!(power_1.IsValid() && power_2.IsValid() && power_3.IsValid())) { + return false; + } + decltype(power_1)* powers[3] = { &power_1, &power_2, &power_3 }; + + /* Set the powers of src. */ + Copy(power_1.GetBuffer(), src, mod_words); + if (needs_exp[2]) { + if (!MultMod(power_2.GetBuffer(), power_1.GetBuffer(), src, mod, mod_words, allocator)) { + return false; + } + } + if (needs_exp[3]) { + if (!MultMod(power_3.GetBuffer(), power_2.GetBuffer(), src, mod, mod_words, allocator)) { + return false; + } + } + + /* Allocate space to work. */ + auto work = allocator->Allocate(mod_words); + if (!work.IsValid()) { + return false; + } + SetToWord(work.GetBuffer(), work.GetCount(), 1); + + /* Ensure we're working with the correct exponent word count. */ + exp_words = CountWords(exp, exp_words); + + for (s32 i = static_cast(exp_words - 1); i >= 0; i--) { + Word cur_word = exp[i]; + size_t cur_bits = BitsPerWord; + + /* Remove leading zeroes in first word. */ + if (i == static_cast(exp_words - 1)) { + while (!GetTop2Bits(cur_word)) { + cur_word <<= 2; + cur_bits -= 2; + } + } + + /* Compute current modular multiplicative step. */ + for (size_t j = 0; j < cur_bits; j += 2, cur_word <<= 2) { + /* Exponentiate current work to the 4th power. */ + if (!MultMod(work.GetBuffer(), work.GetBuffer(), work.GetBuffer(), mod, mod_words, allocator)) { + return false; + } + + if (!MultMod(work.GetBuffer(), work.GetBuffer(), work.GetBuffer(), mod, mod_words, allocator)) { + return false; + } + + if (const Word top = GetTop2Bits(cur_word)) { + if (!MultMod(work.GetBuffer(), work.GetBuffer(), powers[top - 1]->GetBuffer(), mod, mod_words, allocator)) { + return false; + } + } + } + } + + /* Copy work to output. */ + Copy(dst, work.GetBuffer(), mod_words); + + return true; + } + + bool BigNum::MultMod(Word *dst, const Word *src, const Word *mult, const Word *mod, size_t num_words, WordAllocator *allocator) { + /* Allocate work. */ + auto work = allocator->Allocate(2 * num_words); + if (!work.IsValid()) { + return false; + } + + /* Multiply. */ + if (!Mult(work.GetBuffer(), src, mult, num_words, allocator)) { + return false; + } + + /* Mod. */ + if (!Mod(dst, work.GetBuffer(), 2 * num_words, mod, num_words, allocator)) { + return false; + } + + return true; + } + + bool BigNum::Mod(Word *dst, const Word *src, size_t src_words, const Word *mod, size_t mod_words, WordAllocator *allocator) { + /* Allocate work. */ + auto work = allocator->Allocate(src_words); + if (!work.IsValid()) { + return false; + } + + if (!DivMod(work.GetBuffer(), dst, src, src_words, mod, mod_words, allocator)) { + return false; + } + + return true; + } + + bool BigNum::DivMod(Word *quot, Word *rem, const Word *top, size_t top_words, const Word *bot, size_t bot_words, WordAllocator *allocator) { + /* Allocate work. */ + auto top_work = allocator->Allocate(top_words + 1); + auto bot_work = allocator->Allocate(bot_words); + if (!(top_work.IsValid() && bot_work.IsValid())) { + return false; + } + + /* Prevent division by zero. */ + size_t bot_work_words = CountWords(bot, bot_words); + if (bot_work_words == 0) { + return false; + } + + ClearToZero(quot, top_words); + ClearToZero(top_work.GetBuffer(), bot_work_words); + + /* Align to edges. */ + const size_t shift = BitsPerWord - CountSignificantBits(bot[bot_work_words - 1]); + top_work.GetBuffer()[top_words] = LeftShift(top_work.GetBuffer(), top, top_words, shift); + LeftShift(bot_work.GetBuffer(), bot, bot_work_words, shift); + const Word tb = bot_work.GetBuffer()[bot_work_words - 1]; + + /* Repeatedly div + sub. */ + for (s32 i = (top_words - bot_work_words); i >= 0; i--) { + Word cur_word; + if (tb == MaxWord) { + cur_word = top_work.GetBuffer()[i + bot_work_words]; + } else { + cur_word = DivWord(top_work.GetBuffer() + i + bot_work_words - 1, tb + 1); + } + top_work.GetBuffer()[i + bot_work_words] -= MultSub(top_work.GetBuffer() + i, top_work.GetBuffer() + i, bot_work.GetBuffer(), bot_work_words, cur_word); + + while (top_work.GetBuffer()[i + bot_work_words] || Compare(top_work.GetBuffer() + i, bot_work.GetBuffer(), bot_work_words) >= 0) { + cur_word++; + top_work.GetBuffer()[i + bot_work_words] -= Sub(top_work.GetBuffer() + i, top_work.GetBuffer() + i, bot_work.GetBuffer(), bot_work_words); + } + quot[i] = cur_word; + } + + /* Calculate remainder. */ + ClearToZero(rem, bot_words); + RightShift(rem, top_work.GetBuffer(), bot_work_words, shift); + + return true; + } + + bool BigNum::Mult(Word *dst, const Word *lhs, const Word *rhs, size_t num_words, WordAllocator *allocator) { + /* Allocate work. */ + auto work = allocator->Allocate(2 * num_words); + if (!work.IsValid()) { + return false; + } + ClearToZero(work.GetBuffer(), work.GetCount()); + + /* Repeatedly add and multiply. */ + const size_t lhs_words = CountWords(lhs, num_words); + const size_t rhs_words = CountWords(rhs, num_words); + + for (size_t i = 0; i < lhs_words; i++) { + work.GetBuffer()[i + rhs_words] += MultAdd(work.GetBuffer() + i, rhs, rhs_words, lhs[i]); + } + + /* Copy to output. */ + Copy(dst, work.GetBuffer(), work.GetCount()); + + return true; + } + +} \ No newline at end of file diff --git a/libraries/libvapours/source/crypto/impl/crypto_bignum_operations_asm.arch.arm64.s b/libraries/libvapours/source/crypto/impl/crypto_bignum_operations_asm.arch.arm64.s new file mode 100644 index 000000000..d7c7ae52b --- /dev/null +++ b/libraries/libvapours/source/crypto/impl/crypto_bignum_operations_asm.arch.arm64.s @@ -0,0 +1,279 @@ +/* + * Copyright (c) 2018-2020 Atmosphère-NX + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/* ams::crypto::impl::BigNum::Add(Word *dst, const Word *lhs, const Word *rhs, size_t num_words) */ +.section .text._ZN3ams6crypto4impl6BigNum3AddEPjPKjS5_m, "ax", %progbits +.global _ZN3ams6crypto4impl6BigNum3AddEPjPKjS5_m +.type _ZN3ams6crypto4impl6BigNum3AddEPjPKjS5_m, %function +.balign 0x10 +_ZN3ams6crypto4impl6BigNum3AddEPjPKjPKjm: + /* Check if we have anything to do at all. */ + msr nzcv, xzr + cbz x3, 7f + + /* Save registers. */ + stp x16, x17, [sp, #-16]! + stp xzr, x19, [sp, #-16]! + stp x20, x21, [sp, #-16]! + + /* Check if we have less than 16 words to process. */ + lsr x20, x3, #4 + cbz x20, 2f + + sub x3, x3, x20, lsl #4 + +1: /* Process 16 words at a time. */ + /* NOTE: Nintendo uses X18 here, we will use X21 for EL1+ compat. */ + ldp x4, x5, [x1], #16 + ldp x12, x13, [x2], #16 + ldp x6, x7, [x1], #16 + ldp x14, x15, [x2], #16 + ldp x8, x9, [x1], #16 + ldp x16, x17, [x2], #16 + ldp x10, x11, [x1], #16 + ldp x21, x19, [x2], #16 + + adcs x4, x4, x12 + adcs x5, x5, x13 + stp x4, x5, [x0], #16 + + adcs x6, x6, x14 + adcs x7, x7, x15 + stp x6, x7, [x0], #16 + + adcs x8, x8, x16 + adcs x9, x9, x17 + stp x8, x9, [x0], #16 + + adcs x10, x10, x21 + adcs x11, x11, x19 + stp x10, x11, [x0], #16 + + sub x20, x20, #1 + cbnz x20, 1b + +2: /* We have less than 16 words to process. */ + lsr x15, x3, #2 + cbz x15, 4f + + sub x3, x3, x15, lsl #2 + +3: /* Process 4 words at a time. */ + ldp x4, x5, [x1], #16 + ldp x8, x9, [x2], #16 + + sub x15, x15, #1 + + adcs x4, x4, x8 + adcs x5, x5, x9 + + stp x4, x5, [x0], #16 + + cbnz x15, 3b + +4: /* We have less than 4 words to process. */ + cbz x3, 6f + +5: /* Process 1 word at a time. */ + ldr w4, [x1], #4 + ldr w8, [x2], #4 + adcs w4, w4, w8 + str w4, [x0], #4 + + sub x3, x3, #1 + cbnz x3, 5b + +6: /* Restore registers we used while adding. */ + ldp x20, x21, [sp], #16 + ldp xzr, x19, [sp], #16 + ldp x16, x17, [sp], #16 + +7: /* We're done. */ + adc x0, xzr, xzr + ret + +/* ams::crypto::impl::BigNum::Sub(Word *dst, const Word *lhs, const Word *rhs, size_t num_words) */ +.section .text._ZN3ams6crypto4impl6BigNum3SubEPjPKjS5_m, "ax", %progbits +.global _ZN3ams6crypto4impl6BigNum3SubEPjPKjS5_m +.type _ZN3ams6crypto4impl6BigNum3SubEPjPKjS5_m, %function +.balign 0x10 +_ZN3ams6crypto4impl6BigNum3SubEPjPKjS5_m: + /* Check if we have anything to do at all. */ + mov x4, #0x20000000 + msr nzcv, x4 + cbz x3, 7f + + /* Save registers. */ + stp x16, x17, [sp, #-16]! + stp xzr, x19, [sp, #-16]! + stp x20, x21, [sp, #-16]! + + /* Check if we have less than 16 words to process. */ + lsr x20, x3, #4 + cbz x20, 2f + + sub x3, x3, x20, lsl #4 + +1: /* Process 16 words at a time. */ + /* NOTE: Nintendo uses X18 here, we will use X21 for EL1+ compat. */ + ldp x4, x5, [x1], #16 + ldp x12, x13, [x2], #16 + ldp x6, x7, [x1], #16 + ldp x14, x15, [x2], #16 + ldp x8, x9, [x1], #16 + ldp x16, x17, [x2], #16 + ldp x10, x11, [x1], #16 + ldp x21, x19, [x2], #16 + + sbcs x4, x4, x12 + sbcs x5, x5, x13 + stp x4, x5, [x0], #16 + + sbcs x6, x6, x14 + sbcs x7, x7, x15 + stp x6, x7, [x0], #16 + + sbcs x8, x8, x16 + sbcs x9, x9, x17 + stp x8, x9, [x0], #16 + + sbcs x10, x10, x21 + sbcs x11, x11, x19 + stp x10, x11, [x0], #16 + + sub x20, x20, #1 + cbnz x20, 1b + +2: /* We have less than 16 words to process. */ + lsr x15, x3, #2 + cbz x15, 4f + + sub x3, x3, x15, lsl #2 + +3: /* Process 4 words at a time. */ + ldp x4, x5, [x1], #16 + ldp x8, x9, [x2], #16 + + sub x15, x15, #1 + + sbcs x4, x4, x8 + sbcs x5, x5, x9 + + stp x4, x5, [x0], #16 + + cbnz x15, 3b + +4: /* We have less than 4 words to process. */ + cbz x3, 6f + +5: /* Process 1 word at a time. */ + ldr w4, [x1], #4 + ldr w8, [x2], #4 + sbcs w4, w4, w8 + str w4, [x0], #4 + + sub x3, x3, #1 + cbnz x3, 5b + +6: /* Restore registers we used while adding. */ + ldp x20, x21, [sp], #16 + ldp xzr, x19, [sp], #16 + ldp x16, x17, [sp], #16 + +7: /* We're done. */ + cinc x0, xzr, cc + ret + +/* ams::crypto::impl::BigNum::MultAdd(Word *dst, const Word *w, size_t num_words, Word mult) */ +.section .text._ZN3ams6crypto4impl6BigNum7MultAddEPjPKjmj, "ax", %progbits +.global _ZN3ams6crypto4impl6BigNum7MultAddEPjPKjmj +.type _ZN3ams6crypto4impl6BigNum7MultAddEPjPKjmj, %function +.balign 0x10 +_ZN3ams6crypto4impl6BigNum7MultAddEPjPKjmj: + /* Check if we have anything to do at all. */ + mov x15, xzr + cbz x2, 5f + + /* Check if we have less than four words to process. */ + lsr x6, x2, #2 + cbz x6, 2f + + /* We have more than four words to process. */ + sub x2, x2, x6, lsl #2 + stp x16, x17, [sp, #-16]! + +1: /* Loop processing four words at a time. */ + ldp w4, w5, [x1], #8 + ldp w16, w7, [x1], #8 + ldp w8, w9, [x0] + ldp w10, w11, [x0, #8] + + umaddl x4, w3, w4, x8 + umaddl x5, w3, w5, x9 + umaddl x16, w3, w16, x10 + umaddl x7, w3, w7, x11 + + add x12, x4, x15, lsr #32 + add x13, x5, x12, lsr #32 + stp w12, w13, [x0], #8 + + add x14, x16, x13, lsr #32 + add x15, x7, x14, lsr #32 + stp w14, w15, [x0], #8 + + sub x6, x6, #1 + cbnz x6, 1b + + ldp x16, x17, [sp], #16 + +2: /* We have less than four words. Check if we have less than two. */ + lsr x6, x2, #1 + cbz x6, 4f + + /* We have more than two words to process. */ + sub x2, x2, x6, lsl #1 + +3: /* Loop processing two words at a time. */ + ldp w4, w5, [x1], #8 + ldp w8, w9, [x0] + + umaddl x4, w3, w4, x8 + umaddl x5, w3, w5, x9 + + sub x6, x6, #1 + + add x14, x4, x15, lsr #32 + add x15, x5, x14, lsr #32 + + stp w14, w15, [x0], #8 + + cbnz x6, 3b + +4: /* We have less than two words to process. */ + cbz x2, 5f + + /* We have one word to process. */ + ldr w4, [x1], #4 + ldr w8, [x0] + + umaddl x4, w3, w4, x8 + add x15, x4, x15, lsr #32 + + str w15, [x0], #4 + +5: /* We're done. */ + lsr x0, x15, #32 + ret