diff --git a/builder/build.go b/builder/build.go index 5ef7b3e5..8a433ca2 100644 --- a/builder/build.go +++ b/builder/build.go @@ -12,7 +12,9 @@ import ( "errors" "fmt" "go/types" + "hash/crc32" "io/ioutil" + "math/bits" "os" "path/filepath" "runtime" @@ -566,6 +568,8 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil return err } } + + // Apply ELF patches if config.AutomaticStackSize() { // Modify the .tinygo_stacksizes section that contains a stack size // for each goroutine. @@ -574,6 +578,13 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil return fmt.Errorf("could not modify stack sizes: %w", err) } } + if config.RP2040BootPatch() { + // Patch the second stage bootloader CRC into the .boot2 section + err = patchRP2040BootCRC(executable) + if err != nil { + return fmt.Errorf("could not patch RP2040 second stage boot loader: %w", err) + } + } if config.Options.PrintSizes == "short" || config.Options.PrintSizes == "full" { sizes, err := loadProgramSize(executable) @@ -920,30 +931,7 @@ func determineStackSizes(mod llvm.Module, executable string) ([]string, map[stri // stack size information. Before this modification, all stack sizes in the // section assume the default stack size (which is relatively big). func modifyStackSizes(executable string, stackSizeLoads []string, stackSizes map[string]functionStackSize) error { - fp, err := os.OpenFile(executable, os.O_RDWR, 0) - if err != nil { - return err - } - defer fp.Close() - - elfFile, err := elf.NewFile(fp) - if err != nil { - return err - } - - section := elfFile.Section(".tinygo_stacksizes") - if section == nil { - return errors.New("could not find .tinygo_stacksizes section") - } - - if section.Size != section.FileSize { - // Sanity check. - return fmt.Errorf("expected .tinygo_stacksizes to have identical size and file size, got %d and %d", section.Size, section.FileSize) - } - - // Read all goroutine stack sizes. - data := make([]byte, section.Size) - _, err = fp.ReadAt(data, int64(section.Offset)) + data, fileHeader, err := getElfSectionData(executable, ".tinygo_stacksizes") if err != nil { return err } @@ -972,7 +960,7 @@ func modifyStackSizes(executable string, stackSizeLoads []string, stackSizes map stackSize += 4 // Add stack size used by interrupts. - switch elfFile.Machine { + switch fileHeader.Machine { case elf.EM_ARM: // On Cortex-M (assumed here), this stack size is 8 words or 32 // bytes. This is only to store the registers that the interrupt @@ -988,13 +976,7 @@ func modifyStackSizes(executable string, stackSizeLoads []string, stackSizes map } } - // Write back the modified stack sizes. - _, err = fp.WriteAt(data, int64(section.Offset)) - if err != nil { - return err - } - - return nil + return replaceElfSection(executable, ".tinygo_stacksizes", data) } // printStacks prints the maximum stack depth for functions that are started as @@ -1026,3 +1008,41 @@ func printStacks(calculatedStacks []string, stackSizes map[string]functionStackS } } } + +// RP2040 second stage bootloader CRC32 calculation +// +// Spec: https://datasheets.raspberrypi.org/rp2040/rp2040-datasheet.pdf +// Section: 2.8.1.3.1. Checksum +func patchRP2040BootCRC(executable string) error { + bytes, _, err := getElfSectionData(executable, ".boot2") + if err != nil { + return err + } + + if len(bytes) != 256 { + return fmt.Errorf("rp2040 .boot2 section must be exactly 256 bytes") + } + + // From the 'official' RP2040 checksum script: + // + // Our bootrom CRC32 is slightly bass-ackward but it's + // best to work around for now (FIXME) + // 100% worth it to save two Thumb instructions + revBytes := make([]byte, len(bytes)) + for i := range bytes { + revBytes[i] = bits.Reverse8(bytes[i]) + } + + // crc32.Update does an initial negate and negates the + // result, so to meet RP2040 spec, pass 0x0 as initial + // hash and negate returned value. + // + // Note: checksum is over 252 bytes (256 - 4) + hash := bits.Reverse32(crc32.Update(0x0, crc32.IEEETable, revBytes[:252]) ^ 0xFFFFFFFF) + + // Write the CRC to the end of the bootloader. + binary.LittleEndian.PutUint32(bytes[252:], hash) + + // Update the .boot2 section to included the CRC + return replaceElfSection(executable, ".boot2", bytes) +} diff --git a/builder/elfpatch.go b/builder/elfpatch.go new file mode 100644 index 00000000..6a407db6 --- /dev/null +++ b/builder/elfpatch.go @@ -0,0 +1,57 @@ +package builder + +import ( + "debug/elf" + "fmt" + "os" +) + +func getElfSectionData(executable string, sectionName string) ([]byte, elf.FileHeader, error) { + elfFile, err := elf.Open(executable) + if err != nil { + return nil, elf.FileHeader{}, err + } + defer elfFile.Close() + + section := elfFile.Section(sectionName) + if section == nil { + return nil, elf.FileHeader{}, fmt.Errorf("could not find %s section", sectionName) + } + + data, err := section.Data() + + return data, elfFile.FileHeader, err +} + +func replaceElfSection(executable string, sectionName string, data []byte) error { + fp, err := os.OpenFile(executable, os.O_RDWR, 0) + if err != nil { + return err + } + defer fp.Close() + + elfFile, err := elf.Open(executable) + if err != nil { + return err + } + defer elfFile.Close() + + section := elfFile.Section(sectionName) + if section == nil { + return fmt.Errorf("could not find %s section", sectionName) + } + + // Implicitly check for compressed sections + if section.Size != section.FileSize { + return fmt.Errorf("expected section %s to have identical size and file size, got %d and %d", sectionName, section.Size, section.FileSize) + } + + // Only permit complete replacement of section + if section.Size != uint64(len(data)) { + return fmt.Errorf("expected section %s to have size %d, was actually %d", sectionName, len(data), section.Size) + } + + // Write the replacement section data + _, err = fp.WriteAt(data, int64(section.Offset)) + return err +} diff --git a/compileopts/config.go b/compileopts/config.go index d29616f0..0bb839ff 100644 --- a/compileopts/config.go +++ b/compileopts/config.go @@ -176,6 +176,15 @@ func (c *Config) AutomaticStackSize() bool { return false } +// RP2040BootPatch returns whether the RP2040 boot patch should be applied that +// calculates and patches in the checksum for the 2nd stage bootloader. +func (c *Config) RP2040BootPatch() bool { + if c.Target.RP2040BootPatch != nil { + return *c.Target.RP2040BootPatch + } + return false +} + // CFlags returns the flags to pass to the C compiler. This is necessary for CGo // preprocessing. func (c *Config) CFlags() []string { diff --git a/compileopts/target.go b/compileopts/target.go index 81de4ed0..24cf4f39 100644 --- a/compileopts/target.go +++ b/compileopts/target.go @@ -40,6 +40,7 @@ type TargetSpec struct { LDFlags []string `json:"ldflags"` LinkerScript string `json:"linkerscript"` ExtraFiles []string `json:"extra-files"` + RP2040BootPatch *bool `json:"rp2040-boot-patch"` // Patch RP2040 2nd stage bootloader checksum Emulator []string `json:"emulator" override:"copy"` // inherited Emulator must not be append FlashCommand string `json:"flash-command"` GDB []string `json:"gdb"` diff --git a/targets/pico.ld b/targets/pico.ld index 267fbc4c..6ef32a3c 100644 --- a/targets/pico.ld +++ b/targets/pico.ld @@ -1,31 +1,10 @@ MEMORY { - FLASH_TEXT (rx) : ORIGIN = 0x10000000, LENGTH = 2048k -} - -SECTIONS -{ - /* Second stage bootloader is prepended to the image. It must be 256 bytes big - and checksummed. It is usually built by the boot_stage2 target - in the Raspberry Pi Pico SDK - */ - - .boot2 : { - __boot2_start__ = .; - KEEP (*(.boot2)) - __boot2_end__ = .; - } > FLASH_TEXT - - ASSERT(__boot2_end__ - __boot2_start__ == 256, - "ERROR: Pico second stage bootloader must be 256 bytes in size") - - /* The second stage will always enter the image at the start of .text. - The debugger will use the ELF entry point, which is the _entry_point - symbol if present, otherwise defaults to start of .text. - This can be used to transfer control back to the bootrom on debugger - launches only, to perform proper flash setup. - */ + /* Reserve exactly 256 bytes at start of flash for second stage bootloader */ + BOOT2_TEXT (rx) : ORIGIN = 0x10000000, LENGTH = 256 + FLASH_TEXT (rx) : ORIGIN = 0x10000000 + 256, LENGTH = 2048K - 256 + RAM (rwx) : ORIGIN = 0x20000000, LENGTH = 256k } INCLUDE "targets/rp2040.ld" diff --git a/targets/pico_boot_stage2.S b/targets/pico_boot_stage2.S index 4902d3bb..274845b1 100644 --- a/targets/pico_boot_stage2.S +++ b/targets/pico_boot_stage2.S @@ -1,23 +1,420 @@ -// Padded and checksummed version of: /home/rkanchan/src/pico-sdk/build/src/rp2_common/boot_stage2/bs2_default.bin +// +// Implementation of Pico stage 2 boot loader. This code is for the Winbond W25Q080 +// (as found in the Pico) from the official Pico SDK. +// +// This implementation has been made 'stand-alone' by including necessary code / +// symbols from the included files in the reference implementation directly into +// the source. Care has been taken to preserve ordering and it has been verified +// the generated binary is byte-for-byte identical to the reference code binary. +// +// Note: the stage 2 boot loader must be 256 bytes in length and have a checksum +// present. In TinyGo, the linker script is responsible for allocating 256 bytes +// for the .boot2 section and the build logic patches the checksum into the +// binary after linking, controlled by the '.json' flag 'rp2040-boot-patch'. +// +// The stage 2 bootstrap section can be inspected in an elf file using this command: +// objdump -s -j .boot2 .elf +// +// Original Source: +// https://github.com/raspberrypi/pico-sdk/blob/master/src/rp2_common/boot_stage2/boot2_w25q080.S +// +// Board Parameters +#define PICO_FLASH_SPI_CLKDIV 2 + + + +// ---------------------------------------------------------------------------- +// Second stage boot code +// Copyright (c) 2019-2021 Raspberry Pi (Trading) Ltd. +// SPDX-License-Identifier: BSD-3-Clause +// +// Device: Winbond W25Q080 +// Also supports W25Q16JV (which has some different SR instructions) +// Also supports AT25SF081 +// Also supports S25FL132K0 +// +// Description: Configures W25Q080 to run in Quad I/O continuous read XIP mode +// +// Details: * Check status register 2 to determine if QSPI mode is enabled, +// and perform an SR2 programming cycle if necessary. +// * Use SSI to perform a dummy 0xEB read command, with the mode +// continuation bits set, so that the flash will not require +// 0xEB instruction prefix on subsequent reads. +// * Configure SSI to write address, mode bits, but no instruction. +// SSI + flash are now jointly in a state where continuous reads +// can take place. +// * Jump to exit pointer passed in via lr. Bootrom passes null, +// in which case this code uses a default 256 byte flash offset +// +// Building: * This code must be position-independent, and use stack only +// * The code will be padded to a size of 256 bytes, including a +// 4-byte checksum. Therefore code size cannot exceed 252 bytes. +// ---------------------------------------------------------------------------- + + +// +// Expanded include files +// +#define CMD_WRITE_ENABLE 0x06 +#define CMD_READ_STATUS 0x05 +#define CMD_READ_STATUS2 0x35 +#define CMD_WRITE_STATUS 0x01 +#define SREG_DATA 0x02 // Enable quad-SPI mode + +#define XIP_BASE 0x10000000 +#define XIP_SSI_BASE 0x18000000 +#define PADS_QSPI_BASE 0x40020000 +#define PPB_BASE 0xe0000000 + +#define M0PLUS_VTOR_OFFSET 0x0000ed08 + +#define PADS_QSPI_GPIO_QSPI_SCLK_DRIVE_LSB 4 +#define PADS_QSPI_GPIO_QSPI_SCLK_SLEWFAST_BITS 0x00000001 +#define PADS_QSPI_GPIO_QSPI_SCLK_OFFSET 0x00000004 +#define PADS_QSPI_GPIO_QSPI_SD0_OFFSET 0x00000008 +#define PADS_QSPI_GPIO_QSPI_SD0_SCHMITT_BITS 0x00000002 +#define PADS_QSPI_GPIO_QSPI_SD1_OFFSET 0x0000000c +#define PADS_QSPI_GPIO_QSPI_SD2_OFFSET 0x00000010 +#define PADS_QSPI_GPIO_QSPI_SD3_OFFSET 0x00000014 + +#define SSI_CTRLR0_OFFSET 0x00000000 +#define SSI_CTRLR1_OFFSET 0x00000004 +#define SSI_SSIENR_OFFSET 0x00000008 +#define SSI_BAUDR_OFFSET 0x00000014 +#define SSI_SR_OFFSET 0x00000028 +#define SSI_DR0_OFFSET 0x00000060 +#define SSI_RX_SAMPLE_DLY_OFFSET 0x000000f0 + +#define SSI_CTRLR0_DFS_32_LSB 16 + +#define SSI_CTRLR0_SPI_FRF_VALUE_QUAD 0x2 +#define SSI_CTRLR0_SPI_FRF_LSB 21 + +#define SSI_CTRLR0_TMOD_VALUE_TX_AND_RX 0x0 +#define SSI_CTRLR0_TMOD_VALUE_EEPROM_READ 0x3 +#define SSI_CTRLR0_TMOD_LSB 8 + +#define SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_1C2A 0x1 +#define SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_2C2A 0x2 + +#define SSI_SPI_CTRLR0_OFFSET 0x000000f4 + +#define SSI_SPI_CTRLR0_INST_L_VALUE_NONE 0x0 +#define SSI_SPI_CTRLR0_INST_L_VALUE_8B 0x2 + +#define SSI_SPI_CTRLR0_TRANS_TYPE_LSB 0 +#define SSI_SPI_CTRLR0_ADDR_L_LSB 2 +#define SSI_SPI_CTRLR0_INST_L_LSB 8 +#define SSI_SPI_CTRLR0_WAIT_CYCLES_LSB 11 +#define SSI_SPI_CTRLR0_XIP_CMD_LSB 24 + +#define SSI_SR_BUSY_BITS 0x00000001 +#define SSI_SR_TFE_BITS 0x00000004 + + +// ---------------------------------------------------------------------------- +// Config section +// ---------------------------------------------------------------------------- +// It should be possible to support most flash devices by modifying this section + +// The serial flash interface will run at clk_sys/PICO_FLASH_SPI_CLKDIV. +// This must be a positive, even integer. +// The bootrom is very conservative with SPI frequency, but here we should be +// as aggressive as possible. + +#ifndef PICO_FLASH_SPI_CLKDIV +#define PICO_FLASH_SPI_CLKDIV 4 +#endif +#if PICO_FLASH_SPI_CLKDIV & 1 +#error PICO_FLASH_SPI_CLKDIV must be even +#endif + +// Define interface width: single/dual/quad IO +#define FRAME_FORMAT SSI_CTRLR0_SPI_FRF_VALUE_QUAD + +// For W25Q080 this is the "Read data fast quad IO" instruction: +#define CMD_READ 0xeb + +// "Mode bits" are 8 special bits sent immediately after +// the address bits in a "Read Data Fast Quad I/O" command sequence. +// On W25Q080, the four LSBs are don't care, and if MSBs == 0xa, the +// next read does not require the 0xeb instruction prefix. +#define MODE_CONTINUOUS_READ 0xa0 + +// The number of address + mode bits, divided by 4 (always 4, not function of +// interface width). +#define ADDR_L 8 + +// How many clocks of Hi-Z following the mode bits. For W25Q080, 4 dummy cycles +// are required. +#define WAIT_CYCLES 4 + +// If defined, we will read status reg, compare to SREG_DATA, and overwrite +// with our value if the SR doesn't match. +// We do a two-byte write to SR1 (01h cmd) rather than a one-byte write to +// SR2 (31h cmd) as the latter command isn't supported by WX25Q080. +// This isn't great because it will remove block protections. +// A better solution is to use a volatile SR write if your device supports it. +#define PROGRAM_STATUS_REG + +.syntax unified .cpu cortex-m0plus .thumb - .section .boot2, "ax" -.byte 0x00, 0xb5, 0x32, 0x4b, 0x21, 0x20, 0x58, 0x60, 0x98, 0x68, 0x02, 0x21, 0x88, 0x43, 0x98, 0x60 -.byte 0xd8, 0x60, 0x18, 0x61, 0x58, 0x61, 0x2e, 0x4b, 0x00, 0x21, 0x99, 0x60, 0x02, 0x21, 0x59, 0x61 -.byte 0x01, 0x21, 0xf0, 0x22, 0x99, 0x50, 0x2b, 0x49, 0x19, 0x60, 0x01, 0x21, 0x99, 0x60, 0x35, 0x20 -.byte 0x00, 0xf0, 0x44, 0xf8, 0x02, 0x22, 0x90, 0x42, 0x14, 0xd0, 0x06, 0x21, 0x19, 0x66, 0x00, 0xf0 -.byte 0x34, 0xf8, 0x19, 0x6e, 0x01, 0x21, 0x19, 0x66, 0x00, 0x20, 0x18, 0x66, 0x1a, 0x66, 0x00, 0xf0 -.byte 0x2c, 0xf8, 0x19, 0x6e, 0x19, 0x6e, 0x19, 0x6e, 0x05, 0x20, 0x00, 0xf0, 0x2f, 0xf8, 0x01, 0x21 -.byte 0x08, 0x42, 0xf9, 0xd1, 0x00, 0x21, 0x99, 0x60, 0x1b, 0x49, 0x19, 0x60, 0x00, 0x21, 0x59, 0x60 -.byte 0x1a, 0x49, 0x1b, 0x48, 0x01, 0x60, 0x01, 0x21, 0x99, 0x60, 0xeb, 0x21, 0x19, 0x66, 0xa0, 0x21 -.byte 0x19, 0x66, 0x00, 0xf0, 0x12, 0xf8, 0x00, 0x21, 0x99, 0x60, 0x16, 0x49, 0x14, 0x48, 0x01, 0x60 -.byte 0x01, 0x21, 0x99, 0x60, 0x01, 0xbc, 0x00, 0x28, 0x00, 0xd0, 0x00, 0x47, 0x12, 0x48, 0x13, 0x49 -.byte 0x08, 0x60, 0x03, 0xc8, 0x80, 0xf3, 0x08, 0x88, 0x08, 0x47, 0x03, 0xb5, 0x99, 0x6a, 0x04, 0x20 -.byte 0x01, 0x42, 0xfb, 0xd0, 0x01, 0x20, 0x01, 0x42, 0xf8, 0xd1, 0x03, 0xbd, 0x02, 0xb5, 0x18, 0x66 -.byte 0x18, 0x66, 0xff, 0xf7, 0xf2, 0xff, 0x18, 0x6e, 0x18, 0x6e, 0x02, 0xbd, 0x00, 0x00, 0x02, 0x40 -.byte 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x07, 0x00, 0x00, 0x03, 0x5f, 0x00, 0x21, 0x22, 0x00, 0x00 -.byte 0xf4, 0x00, 0x00, 0x18, 0x22, 0x20, 0x00, 0xa0, 0x00, 0x01, 0x00, 0x10, 0x08, 0xed, 0x00, 0xe0 -.byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x74, 0xb2, 0x4e, 0x7a +// The exit point is passed in lr. If entered from bootrom, this will be the +// flash address immediately following this second stage (0x10000100). +// Otherwise it will be a return address -- second stage being called as a +// function by user code, after copying out of XIP region. r3 holds SSI base, +// r0...2 used as temporaries. Other GPRs not used. +.global _stage2_boot +.type _stage2_boot,%function +.thumb_func +_stage2_boot: + push {lr} + + // Set pad configuration: + // - SCLK 8mA drive, no slew limiting + // - SDx disable input Schmitt to reduce delay + + ldr r3, =PADS_QSPI_BASE + movs r0, #(2 << PADS_QSPI_GPIO_QSPI_SCLK_DRIVE_LSB | PADS_QSPI_GPIO_QSPI_SCLK_SLEWFAST_BITS) + str r0, [r3, #PADS_QSPI_GPIO_QSPI_SCLK_OFFSET] + ldr r0, [r3, #PADS_QSPI_GPIO_QSPI_SD0_OFFSET] + movs r1, #PADS_QSPI_GPIO_QSPI_SD0_SCHMITT_BITS + bics r0, r1 + str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD0_OFFSET] + str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD1_OFFSET] + str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD2_OFFSET] + str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD3_OFFSET] + + ldr r3, =XIP_SSI_BASE + + // Disable SSI to allow further config + movs r1, #0 + str r1, [r3, #SSI_SSIENR_OFFSET] + + // Set baud rate + movs r1, #PICO_FLASH_SPI_CLKDIV + str r1, [r3, #SSI_BAUDR_OFFSET] + + // Set 1-cycle sample delay. If PICO_FLASH_SPI_CLKDIV == 2 then this means, + // if the flash launches data on SCLK posedge, we capture it at the time that + // the next SCLK posedge is launched. This is shortly before that posedge + // arrives at the flash, so data hold time should be ok. For + // PICO_FLASH_SPI_CLKDIV > 2 this pretty much has no effect. + + movs r1, #1 + movs r2, #SSI_RX_SAMPLE_DLY_OFFSET // == 0xf0 so need 8 bits of offset significance + str r1, [r3, r2] + +// On QSPI parts we usually need a 01h SR-write command to enable QSPI mode +// (i.e. turn WPn and HOLDn into IO2/IO3) +#ifdef PROGRAM_STATUS_REG +program_sregs: +#define CTRL0_SPI_TXRX \ + (7 << SSI_CTRLR0_DFS_32_LSB) | /* 8 bits per data frame */ \ + (SSI_CTRLR0_TMOD_VALUE_TX_AND_RX << SSI_CTRLR0_TMOD_LSB) + + ldr r1, =(CTRL0_SPI_TXRX) + str r1, [r3, #SSI_CTRLR0_OFFSET] + + // Enable SSI and select slave 0 + movs r1, #1 + str r1, [r3, #SSI_SSIENR_OFFSET] + + // Check whether SR needs updating + movs r0, #CMD_READ_STATUS2 + bl read_flash_sreg + movs r2, #SREG_DATA + cmp r0, r2 + beq skip_sreg_programming + + // Send write enable command + movs r1, #CMD_WRITE_ENABLE + str r1, [r3, #SSI_DR0_OFFSET] + + // Poll for completion and discard RX + bl wait_ssi_ready + ldr r1, [r3, #SSI_DR0_OFFSET] + + // Send status write command followed by data bytes + movs r1, #CMD_WRITE_STATUS + str r1, [r3, #SSI_DR0_OFFSET] + movs r0, #0 + str r0, [r3, #SSI_DR0_OFFSET] + str r2, [r3, #SSI_DR0_OFFSET] + + bl wait_ssi_ready + ldr r1, [r3, #SSI_DR0_OFFSET] + ldr r1, [r3, #SSI_DR0_OFFSET] + ldr r1, [r3, #SSI_DR0_OFFSET] + + // Poll status register for write completion +1: + movs r0, #CMD_READ_STATUS + bl read_flash_sreg + movs r1, #1 + tst r0, r1 + bne 1b + +skip_sreg_programming: + + // Disable SSI again so that it can be reconfigured + movs r1, #0 + str r1, [r3, #SSI_SSIENR_OFFSET] +#endif + +// Currently the flash expects an 8 bit serial command prefix on every +// transfer, which is a waste of cycles. Perform a dummy Fast Read Quad I/O +// command, with mode bits set such that the flash will not expect a serial +// command prefix on *subsequent* transfers. We don't care about the results +// of the read, the important part is the mode bits. + +dummy_read: +#define CTRLR0_ENTER_XIP \ + (FRAME_FORMAT /* Quad I/O mode */ \ + << SSI_CTRLR0_SPI_FRF_LSB) | \ + (31 << SSI_CTRLR0_DFS_32_LSB) | /* 32 data bits */ \ + (SSI_CTRLR0_TMOD_VALUE_EEPROM_READ /* Send INST/ADDR, Receive Data */ \ + << SSI_CTRLR0_TMOD_LSB) + + ldr r1, =(CTRLR0_ENTER_XIP) + str r1, [r3, #SSI_CTRLR0_OFFSET] + + movs r1, #0x0 // NDF=0 (single 32b read) + str r1, [r3, #SSI_CTRLR1_OFFSET] + +#define SPI_CTRLR0_ENTER_XIP \ + (ADDR_L << SSI_SPI_CTRLR0_ADDR_L_LSB) | /* Address + mode bits */ \ + (WAIT_CYCLES << SSI_SPI_CTRLR0_WAIT_CYCLES_LSB) | /* Hi-Z dummy clocks following address + mode */ \ + (SSI_SPI_CTRLR0_INST_L_VALUE_8B \ + << SSI_SPI_CTRLR0_INST_L_LSB) | /* 8-bit instruction */ \ + (SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_1C2A /* Send Command in serial mode then address in Quad I/O mode */ \ + << SSI_SPI_CTRLR0_TRANS_TYPE_LSB) + + ldr r1, =(SPI_CTRLR0_ENTER_XIP) + ldr r0, =(XIP_SSI_BASE + SSI_SPI_CTRLR0_OFFSET) // SPI_CTRL0 Register + str r1, [r0] + + movs r1, #1 // Re-enable SSI + str r1, [r3, #SSI_SSIENR_OFFSET] + + movs r1, #CMD_READ + str r1, [r3, #SSI_DR0_OFFSET] // Push SPI command into TX FIFO + movs r1, #MODE_CONTINUOUS_READ // 32-bit: 24 address bits (we don't care, so 0) and M[7:4]=1010 + str r1, [r3, #SSI_DR0_OFFSET] // Push Address into TX FIFO - this will trigger the transaction + + // Poll for completion + bl wait_ssi_ready + +// The flash is in a state where we can blast addresses in parallel, and get +// parallel data back. Now configure the SSI to translate XIP bus accesses +// into QSPI transfers of this form. + + movs r1, #0 + str r1, [r3, #SSI_SSIENR_OFFSET] // Disable SSI (and clear FIFO) to allow further config + +// Note that the INST_L field is used to select what XIP data gets pushed into +// the TX FIFO: +// INST_L_0_BITS {ADDR[23:0],XIP_CMD[7:0]} Load "mode bits" into XIP_CMD +// Anything else {XIP_CMD[7:0],ADDR[23:0]} Load SPI command into XIP_CMD +configure_ssi: +#define SPI_CTRLR0_XIP \ + (MODE_CONTINUOUS_READ /* Mode bits to keep flash in continuous read mode */ \ + << SSI_SPI_CTRLR0_XIP_CMD_LSB) | \ + (ADDR_L << SSI_SPI_CTRLR0_ADDR_L_LSB) | /* Total number of address + mode bits */ \ + (WAIT_CYCLES << SSI_SPI_CTRLR0_WAIT_CYCLES_LSB) | /* Hi-Z dummy clocks following address + mode */ \ + (SSI_SPI_CTRLR0_INST_L_VALUE_NONE /* Do not send a command, instead send XIP_CMD as mode bits after address */ \ + << SSI_SPI_CTRLR0_INST_L_LSB) | \ + (SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_2C2A /* Send Address in Quad I/O mode (and Command but that is zero bits long) */ \ + << SSI_SPI_CTRLR0_TRANS_TYPE_LSB) + + ldr r1, =(SPI_CTRLR0_XIP) + + ldr r0, =(XIP_SSI_BASE + SSI_SPI_CTRLR0_OFFSET) + str r1, [r0] + + movs r1, #1 + str r1, [r3, #SSI_SSIENR_OFFSET] // Re-enable SSI + +// Bus accesses to the XIP window will now be transparently serviced by the +// external flash on cache miss. We are ready to run code from flash. + + +// +// Helper Includes +// + +// +// #include "boot2_helpers/exit_from_boot2.S" +// + +// If entered from the bootrom, lr (which we earlier pushed) will be 0, +// and we vector through the table at the start of the main flash image. +// Any regular function call will have a nonzero value for lr. +check_return: + pop {r0} + cmp r0, #0 + beq vector_into_flash + bx r0 +vector_into_flash: + ldr r0, =(XIP_BASE + 0x100) + ldr r1, =(PPB_BASE + M0PLUS_VTOR_OFFSET) + str r0, [r1] + ldmia r0, {r0, r1} + msr msp, r0 + bx r1 + +// +// #include "boot2_helpers/wait_ssi_ready.S" +// +wait_ssi_ready: + push {r0, r1, lr} + + // Command is complete when there is nothing left to send + // (TX FIFO empty) and SSI is no longer busy (CSn deasserted) +1: + ldr r1, [r3, #SSI_SR_OFFSET] + movs r0, #SSI_SR_TFE_BITS + tst r1, r0 + beq 1b + movs r0, #SSI_SR_BUSY_BITS + tst r1, r0 + bne 1b + + pop {r0, r1, pc} + + +#ifdef PROGRAM_STATUS_REG + +// +// #include "boot2_helpers/read_flash_sreg.S" +// + +// Pass status read cmd into r0. +// Returns status value in r0. +.global read_flash_sreg +.type read_flash_sreg,%function +.thumb_func +read_flash_sreg: + push {r1, lr} + str r0, [r3, #SSI_DR0_OFFSET] + // Dummy byte: + str r0, [r3, #SSI_DR0_OFFSET] + + bl wait_ssi_ready + // Discard first byte and combine the next two + ldr r0, [r3, #SSI_DR0_OFFSET] + ldr r0, [r3, #SSI_DR0_OFFSET] + + pop {r1, pc} + +#endif + +.global literals +literals: +.ltorg + +.end diff --git a/targets/rp2040.json b/targets/rp2040.json index ce49ba35..c679758e 100644 --- a/targets/rp2040.json +++ b/targets/rp2040.json @@ -6,6 +6,7 @@ "msd-firmware-name": "firmware.uf2", "binary-format": "uf2", "uf2-family-id": "0xe48bff56", + "rp2040-boot-patch": true, "extra-files": [ "src/device/rp/rp2040.s" ] diff --git a/targets/rp2040.ld b/targets/rp2040.ld index 186db68f..5aa57ce6 100644 --- a/targets/rp2040.ld +++ b/targets/rp2040.ld @@ -1,9 +1,29 @@ -MEMORY -{ - RAM (rwx) : ORIGIN = 0x20000000, LENGTH = 256k -} - _stack_size = 2K; +SECTIONS +{ + /* Second stage bootloader is prepended to the image. It must be 256 bytes + and checksummed. The gap to the checksum is zero-padded. + */ + .boot2 : { + __boot2_start__ = .; + KEEP (*(.boot2)); + + /* Explicitly allocate space for CRC32 checksum at end of second stage + bootloader + */ + . = __boot2_start__ + 256 - 4; + LONG(0) + } > BOOT2_TEXT = 0x0 + + /* The second stage will always enter the image at the start of .text. + The debugger will use the ELF entry point, which is the _entry_point + symbol if present, otherwise defaults to start of .text. + This can be used to transfer control back to the bootrom on debugger + launches only, to perform proper flash setup. + */ +} + + INCLUDE "targets/arm.ld"