kernel_ldr: use unoptimized memcpy before mmu bringup (closes #1102)

Before the MMU is up, all reads/writes must be aligned; the optimized
memcpy implementation does not guarantee all reads/writes it performs
are aligned.

This commit splits the libc impl to be separate for kernel/kernel_ldr,
and so now only kernel will use the optimized impl. This is safe,
as the MMU is brought up before kernel begins executing.
This commit is contained in:
Michael Scire 2020-08-02 14:30:06 -07:00
parent a82914d58e
commit 29358dc593
10 changed files with 723 additions and 0 deletions

View file

@ -0,0 +1,24 @@
/*
* Copyright (c) 2018-2020 Atmosphère-NX
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
/* Definitions for libc genericity. */
#define MESOSPHERE_LIBC_MEMCPY_GENERIC 1
#define MESOSPHERE_LIBC_MEMCMP_GENERIC 1
#define MESOSPHERE_LIBC_MEMMOVE_GENERIC 1
#define MESOSPHERE_LIBC_MEMSET_GENERIC 1
#define MESOSPHERE_LIBC_STRNCPY_GENERIC 1
#define MESOSPHERE_LIBC_STRNCMP_GENERIC 1

View file

@ -0,0 +1,26 @@
/*
* Copyright (c) 2018-2020 Atmosphère-NX
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#if defined(ATMOSPHERE_ARCH_ARM64)
#include "kern_libc_config.arch.arm64.h"
#else
#error "Unknown architecture for libc"
#endif

View file

@ -0,0 +1,673 @@
/*
* Copyright (c) 2018-2020 Atmosphère-NX
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <string.h>
#include <stddef.h>
#include <limits.h>
#include "kern_libc_config.h"
/* Note: copied from newlib */
#ifdef __cplusplus
extern "C" {
#endif
/*
FUNCTION
<<memmove>>---move possibly overlapping memory
INDEX
memmove
SYNOPSIS
#include <string.h>
void *memmove(void *<[dst]>, const void *<[src]>, size_t <[length]>);
DESCRIPTION
This function moves <[length]> characters from the block of
memory starting at <<*<[src]>>> to the memory starting at
<<*<[dst]>>>. <<memmove>> reproduces the characters correctly
at <<*<[dst]>>> even if the two areas overlap.
RETURNS
The function returns <[dst]> as passed.
PORTABILITY
<<memmove>> is ANSI C.
<<memmove>> requires no supporting OS subroutines.
QUICKREF
memmove ansi pure
*/
/* Nonzero if either X or Y is not aligned on a "long" boundary. */
#define UNALIGNED(X, Y) \
(((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
/* How many bytes are copied each iteration of the 4X unrolled loop. */
#define BIGBLOCKSIZE (sizeof (long) << 2)
/* How many bytes are copied each iteration of the word copy loop. */
#define LITTLEBLOCKSIZE (sizeof (long))
/* Threshhold for punting to the byte copier. */
#undef TOO_SMALL
#define TOO_SMALL(LEN) ((LEN) < BIGBLOCKSIZE)
#if MESOSPHERE_LIBC_MEMMOVE_GENERIC
/*SUPPRESS 20*/
void *
//__inhibit_loop_to_libcall
__attribute__((weak))
memmove (void *dst_void,
const void *src_void,
size_t length)
{
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
char *dst = dst_void;
const char *src = src_void;
if (src < dst && dst < src + length)
{
/* Have to copy backwards */
src += length;
dst += length;
while (length--)
{
*--dst = *--src;
}
}
else
{
while (length--)
{
*dst++ = *src++;
}
}
return dst_void;
#else
char *dst = dst_void;
const char *src = src_void;
long *aligned_dst;
const long *aligned_src;
if (src < dst && dst < src + length)
{
/* Destructive overlap...have to copy backwards */
src += length;
dst += length;
while (length--)
{
*--dst = *--src;
}
}
else
{
/* Use optimizing algorithm for a non-destructive copy to closely
match memcpy. If the size is small or either SRC or DST is unaligned,
then punt into the byte copy loop. This should be rare. */
if (!TOO_SMALL(length) && !UNALIGNED (src, dst))
{
aligned_dst = (long*)dst;
aligned_src = (long*)src;
/* Copy 4X long words at a time if possible. */
while (length >= BIGBLOCKSIZE)
{
*aligned_dst++ = *aligned_src++;
*aligned_dst++ = *aligned_src++;
*aligned_dst++ = *aligned_src++;
*aligned_dst++ = *aligned_src++;
length -= BIGBLOCKSIZE;
}
/* Copy one long word at a time if possible. */
while (length >= LITTLEBLOCKSIZE)
{
*aligned_dst++ = *aligned_src++;
length -= LITTLEBLOCKSIZE;
}
/* Pick up any residual with a byte copier. */
dst = (char*)aligned_dst;
src = (char*)aligned_src;
}
while (length--)
{
*dst++ = *src++;
}
}
return dst_void;
#endif /* not PREFER_SIZE_OVER_SPEED */
}
#endif /* MESOSPHERE_LIBC_MEMMOVE_GENERIC */
/*
FUNCTION
<<memcpy>>---copy memory regions
SYNOPSIS
#include <string.h>
void* memcpy(void *restrict <[out]>, const void *restrict <[in]>,
size_t <[n]>);
DESCRIPTION
This function copies <[n]> bytes from the memory region
pointed to by <[in]> to the memory region pointed to by
<[out]>.
If the regions overlap, the behavior is undefined.
RETURNS
<<memcpy>> returns a pointer to the first byte of the <[out]>
region.
PORTABILITY
<<memcpy>> is ANSI C.
<<memcpy>> requires no supporting OS subroutines.
QUICKREF
memcpy ansi pure
*/
#if MESOSPHERE_LIBC_MEMCPY_GENERIC
void *
__attribute__((weak))
memcpy (void * dst0,
const void * __restrict src0,
size_t len0)
{
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
char *dst = (char *) dst0;
char *src = (char *) src0;
void *save = dst0;
while (len0--)
{
*dst++ = *src++;
}
return save;
#else
char *dst = dst0;
const char *src = src0;
long *aligned_dst;
const long *aligned_src;
/* If the size is small, or either SRC or DST is unaligned,
then punt into the byte copy loop. This should be rare. */
if (!TOO_SMALL(len0) && !UNALIGNED (src, dst))
{
aligned_dst = (long*)dst;
aligned_src = (long*)src;
/* Copy 4X long words at a time if possible. */
while (len0 >= BIGBLOCKSIZE)
{
*aligned_dst++ = *aligned_src++;
*aligned_dst++ = *aligned_src++;
*aligned_dst++ = *aligned_src++;
*aligned_dst++ = *aligned_src++;
len0 -= BIGBLOCKSIZE;
}
/* Copy one long word at a time if possible. */
while (len0 >= LITTLEBLOCKSIZE)
{
*aligned_dst++ = *aligned_src++;
len0 -= LITTLEBLOCKSIZE;
}
/* Pick up any residual with a byte copier. */
dst = (char*)aligned_dst;
src = (char*)aligned_src;
}
while (len0--)
*dst++ = *src++;
return dst0;
#endif /* not PREFER_SIZE_OVER_SPEED */
}
#endif /* MESOSPHERE_LIBC_MEMCPY_GENERIC */
/*
FUNCTION
<<memset>>---set an area of memory
INDEX
memset
SYNOPSIS
#include <string.h>
void *memset(void *<[dst]>, int <[c]>, size_t <[length]>);
DESCRIPTION
This function converts the argument <[c]> into an unsigned
char and fills the first <[length]> characters of the array
pointed to by <[dst]> to the value.
RETURNS
<<memset>> returns the value of <[dst]>.
PORTABILITY
<<memset>> is ANSI C.
<<memset>> requires no supporting OS subroutines.
QUICKREF
memset ansi pure
*/
#include <string.h>
#undef LBLOCKSIZE
#undef UNALIGNED
#undef TOO_SMALL
#define LBLOCKSIZE (sizeof(long))
#define UNALIGNED(X) ((long)X & (LBLOCKSIZE - 1))
#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE)
#if MESOSPHERE_LIBC_MEMSET_GENERIC
void *
__attribute__((weak))
memset (void *m,
int c,
size_t n)
{
char *s = (char *) m;
#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
unsigned int i;
unsigned long buffer;
unsigned long *aligned_addr;
unsigned int d = c & 0xff; /* To avoid sign extension, copy C to an
unsigned variable. */
while (UNALIGNED (s))
{
if (n--)
*s++ = (char) c;
else
return m;
}
if (!TOO_SMALL (n))
{
/* If we get this far, we know that n is large and s is word-aligned. */
aligned_addr = (unsigned long *) s;
/* Store D into each char sized location in BUFFER so that
we can set large blocks quickly. */
buffer = (d << 8) | d;
buffer |= (buffer << 16);
for (i = 32; i < LBLOCKSIZE * 8; i <<= 1)
buffer = (buffer << i) | buffer;
/* Unroll the loop. */
while (n >= LBLOCKSIZE*4)
{
*aligned_addr++ = buffer;
*aligned_addr++ = buffer;
*aligned_addr++ = buffer;
*aligned_addr++ = buffer;
n -= 4*LBLOCKSIZE;
}
while (n >= LBLOCKSIZE)
{
*aligned_addr++ = buffer;
n -= LBLOCKSIZE;
}
/* Pick up the remainder with a bytewise loop. */
s = (char*)aligned_addr;
}
#endif /* not PREFER_SIZE_OVER_SPEED */
while (n--)
*s++ = (char) c;
return m;
}
#endif /* MESOSPHERE_LIBC_MEMSET_GENERIC */
/*
FUNCTION
<<memcmp>>---compare two memory areas
INDEX
memcmp
SYNOPSIS
#include <string.h>
int memcmp(const void *<[s1]>, const void *<[s2]>, size_t <[n]>);
DESCRIPTION
This function compares not more than <[n]> characters of the
object pointed to by <[s1]> with the object pointed to by <[s2]>.
RETURNS
The function returns an integer greater than, equal to or
less than zero according to whether the object pointed to by
<[s1]> is greater than, equal to or less than the object
pointed to by <[s2]>.
PORTABILITY
<<memcmp>> is ANSI C.
<<memcmp>> requires no supporting OS subroutines.
QUICKREF
memcmp ansi pure
*/
#undef LBLOCKSIZE
#undef UNALIGNED
#undef TOO_SMALL
/* Nonzero if either X or Y is not aligned on a "long" boundary. */
#define UNALIGNED(X, Y) \
(((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
/* How many bytes are copied each iteration of the word copy loop. */
#define LBLOCKSIZE (sizeof (long))
/* Threshhold for punting to the byte copier. */
#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE)
#if MESOSPHERE_LIBC_MEMCMP_GENERIC
int
__attribute__((weak))
memcmp (const void *m1,
const void *m2,
size_t n)
{
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
unsigned char *s1 = (unsigned char *) m1;
unsigned char *s2 = (unsigned char *) m2;
while (n--)
{
if (*s1 != *s2)
{
return *s1 - *s2;
}
s1++;
s2++;
}
return 0;
#else
unsigned char *s1 = (unsigned char *) m1;
unsigned char *s2 = (unsigned char *) m2;
unsigned long *a1;
unsigned long *a2;
/* If the size is too small, or either pointer is unaligned,
then we punt to the byte compare loop. Hopefully this will
not turn up in inner loops. */
if (!TOO_SMALL(n) && !UNALIGNED(s1,s2))
{
/* Otherwise, load and compare the blocks of memory one
word at a time. */
a1 = (unsigned long*) s1;
a2 = (unsigned long*) s2;
while (n >= LBLOCKSIZE)
{
if (*a1 != *a2)
break;
a1++;
a2++;
n -= LBLOCKSIZE;
}
/* check m mod LBLOCKSIZE remaining characters */
s1 = (unsigned char*)a1;
s2 = (unsigned char*)a2;
}
while (n--)
{
if (*s1 != *s2)
return *s1 - *s2;
s1++;
s2++;
}
return 0;
#endif /* not PREFER_SIZE_OVER_SPEED */
}
#endif /* MESOSPHERE_LIBC_MEMCMP_GENERIC */
/*
FUNCTION
<<strncpy>>---counted copy string
INDEX
strncpy
SYNOPSIS
#include <string.h>
char *strncpy(char *restrict <[dst]>, const char *restrict <[src]>,
size_t <[length]>);
DESCRIPTION
<<strncpy>> copies not more than <[length]> characters from the
the string pointed to by <[src]> (including the terminating
null character) to the array pointed to by <[dst]>. If the
string pointed to by <[src]> is shorter than <[length]>
characters, null characters are appended to the destination
array until a total of <[length]> characters have been
written.
RETURNS
This function returns the initial value of <[dst]>.
PORTABILITY
<<strncpy>> is ANSI C.
<<strncpy>> requires no supporting OS subroutines.
QUICKREF
strncpy ansi pure
*/
#include <string.h>
#include <limits.h>
/*SUPPRESS 560*/
/*SUPPRESS 530*/
/* Nonzero if either X or Y is not aligned on a "long" boundary. */
#define UNALIGNED(X, Y) \
(((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
#if LONG_MAX == 2147483647L
#define DETECTNULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080)
#else
#if LONG_MAX == 9223372036854775807L
/* Nonzero if X (a long int) contains a NULL byte. */
#define DETECTNULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080)
#else
#error long int is not a 32bit or 64bit type.
#endif
#endif
#ifndef DETECTNULL
#error long int is not a 32bit or 64bit byte
#endif
#undef TOO_SMALL
#define TOO_SMALL(LEN) ((LEN) < sizeof (long))
#if MESOSPHERE_LIBC_STRNCMP_GENERIC
char *
strncpy (char *__restrict dst0,
const char *__restrict src0,
size_t count)
{
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
char *dscan;
const char *sscan;
dscan = dst0;
sscan = src0;
while (count > 0)
{
--count;
if ((*dscan++ = *sscan++) == '\0')
break;
}
while (count-- > 0)
*dscan++ = '\0';
return dst0;
#else
char *dst = dst0;
const char *src = src0;
long *aligned_dst;
const long *aligned_src;
/* If SRC and DEST is aligned and count large enough, then copy words. */
if (!UNALIGNED (src, dst) && !TOO_SMALL (count))
{
aligned_dst = (long*)dst;
aligned_src = (long*)src;
/* SRC and DEST are both "long int" aligned, try to do "long int"
sized copies. */
while (count >= sizeof (long int) && !DETECTNULL(*aligned_src))
{
count -= sizeof (long int);
*aligned_dst++ = *aligned_src++;
}
dst = (char*)aligned_dst;
src = (char*)aligned_src;
}
while (count > 0)
{
--count;
if ((*dst++ = *src++) == '\0')
break;
}
while (count-- > 0)
*dst++ = '\0';
return dst0;
#endif /* not PREFER_SIZE_OVER_SPEED */
}
#endif /* MESOSPHERE_LIBC_STRNCPY_GENERIC */
/*
FUNCTION
<<strncmp>>---character string compare
INDEX
strncmp
SYNOPSIS
#include <string.h>
int strncmp(const char *<[a]>, const char * <[b]>, size_t <[length]>);
DESCRIPTION
<<strncmp>> compares up to <[length]> characters
from the string at <[a]> to the string at <[b]>.
RETURNS
If <<*<[a]>>> sorts lexicographically after <<*<[b]>>>,
<<strncmp>> returns a number greater than zero. If the two
strings are equivalent, <<strncmp>> returns zero. If <<*<[a]>>>
sorts lexicographically before <<*<[b]>>>, <<strncmp>> returns a
number less than zero.
PORTABILITY
<<strncmp>> is ANSI C.
<<strncmp>> requires no supporting OS subroutines.
QUICKREF
strncmp ansi pure
*/
#include <string.h>
#include <limits.h>
/* Nonzero if either X or Y is not aligned on a "long" boundary. */
#define UNALIGNED(X, Y) \
(((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
/* DETECTNULL returns nonzero if (long)X contains a NULL byte. */
#if LONG_MAX == 2147483647L
#define DETECTNULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080)
#else
#if LONG_MAX == 9223372036854775807L
#define DETECTNULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080)
#else
#error long int is not a 32bit or 64bit type.
#endif
#endif
#ifndef DETECTNULL
#error long int is not a 32bit or 64bit byte
#endif
#if MESOSPHERE_LIBC_STRNCMP_GENERIC
int
strncmp (const char *s1,
const char *s2,
size_t n)
{
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
if (n == 0)
return 0;
while (n-- != 0 && *s1 == *s2)
{
if (n == 0 || *s1 == '\0')
break;
s1++;
s2++;
}
return (*(unsigned char *) s1) - (*(unsigned char *) s2);
#else
unsigned long *a1;
unsigned long *a2;
if (n == 0)
return 0;
/* If s1 or s2 are unaligned, then compare bytes. */
if (!UNALIGNED (s1, s2))
{
/* If s1 and s2 are word-aligned, compare them a word at a time. */
a1 = (unsigned long*)s1;
a2 = (unsigned long*)s2;
while (n >= sizeof (long) && *a1 == *a2)
{
n -= sizeof (long);
/* If we've run out of bytes or hit a null, return zero
since we already know *a1 == *a2. */
if (n == 0 || DETECTNULL (*a1))
return 0;
a1++;
a2++;
}
/* A difference was detected in last few bytes of s1, so search bytewise */
s1 = (char*)a1;
s2 = (char*)a2;
}
while (n-- > 0 && *s1 == *s2)
{
/* If we've run out of bytes or hit a null, return zero
since we already know *s1 == *s2. */
if (n == 0 || *s1 == '\0')
return 0;
s1++;
s2++;
}
return (*(unsigned char *) s1) - (*(unsigned char *) s2);
#endif /* not PREFER_SIZE_OVER_SPEED */
}
#endif /* MESOSPHERE_LIBC_STRNCMP_GENERIC */
#ifdef __cplusplus
} /* extern "C" */
#endif