| 1 | ;; ----------------------------------------------------------------------- |
|---|
| 2 | ;; |
|---|
| 3 | ;; Copyright 1994-2009 H. Peter Anvin - All Rights Reserved |
|---|
| 4 | ;; Copyright 2009-2010 Intel Corporation; author: H. Peter Anvin |
|---|
| 5 | ;; |
|---|
| 6 | ;; This program is free software; you can redistribute it and/or modify |
|---|
| 7 | ;; it under the terms of the GNU General Public License as published by |
|---|
| 8 | ;; the Free Software Foundation, Inc., 53 Temple Place Ste 330, |
|---|
| 9 | ;; Boston MA 02111-1307, USA; either version 2 of the License, or |
|---|
| 10 | ;; (at your option) any later version; incorporated herein by reference. |
|---|
| 11 | ;; |
|---|
| 12 | ;; ----------------------------------------------------------------------- |
|---|
| 13 | |
|---|
| 14 | ;; |
|---|
| 15 | ;; bcopy32xx.inc |
|---|
| 16 | ;; |
|---|
| 17 | |
|---|
| 18 | |
|---|
| 19 | ; |
|---|
| 20 | ; 32-bit bcopy routine |
|---|
| 21 | ; |
|---|
| 22 | ; This is the actual 32-bit portion of the bcopy and shuffle and boot |
|---|
| 23 | ; routines. ALL THIS CODE NEEDS TO BE POSITION-INDEPENDENT, with the |
|---|
| 24 | ; sole exception being the actual relocation code at the beginning of |
|---|
| 25 | ; pm_shuffle_boot. |
|---|
| 26 | ; |
|---|
| 27 | ; It also really needs to live all in a single segment, for the |
|---|
| 28 | ; address calculcations to actually work. |
|---|
| 29 | ; |
|---|
| 30 | |
|---|
| 31 | bits 32 |
|---|
| 32 | section .bcopyxx.text |
|---|
| 33 | align 16 |
|---|
| 34 | ; |
|---|
| 35 | ; pm_bcopy: |
|---|
| 36 | ; |
|---|
| 37 | ; This is the protected-mode core of the "bcopy" routine. |
|---|
| 38 | ; Try to do aligned transfers; if the src and dst are relatively |
|---|
| 39 | ; misaligned, align the dst. |
|---|
| 40 | ; |
|---|
| 41 | ; ECX is guaranteed to not be zero on entry. |
|---|
| 42 | ; |
|---|
| 43 | ; Clobbers ESI, EDI, ECX. |
|---|
| 44 | ; |
|---|
| 45 | |
|---|
| 46 | pm_bcopy: |
|---|
| 47 | push ebx |
|---|
| 48 | push edx |
|---|
| 49 | push eax |
|---|
| 50 | |
|---|
| 51 | cmp esi,-1 |
|---|
| 52 | je .bzero |
|---|
| 53 | |
|---|
| 54 | cmp esi,edi ; If source < destination, we might |
|---|
| 55 | jb .reverse ; have to copy backwards |
|---|
| 56 | |
|---|
| 57 | .forward: |
|---|
| 58 | ; Initial alignment |
|---|
| 59 | mov edx,edi |
|---|
| 60 | shr edx,1 |
|---|
| 61 | jnc .faa1 |
|---|
| 62 | movsb |
|---|
| 63 | dec ecx |
|---|
| 64 | .faa1: |
|---|
| 65 | mov al,cl |
|---|
| 66 | cmp ecx,2 |
|---|
| 67 | jb .f_tiny |
|---|
| 68 | |
|---|
| 69 | shr edx,1 |
|---|
| 70 | jnc .faa2 |
|---|
| 71 | movsw |
|---|
| 72 | sub ecx,2 |
|---|
| 73 | .faa2: |
|---|
| 74 | |
|---|
| 75 | ; Bulk transfer |
|---|
| 76 | mov al,cl ; Save low bits |
|---|
| 77 | shr ecx,2 ; Convert to dwords |
|---|
| 78 | rep movsd ; Do our business |
|---|
| 79 | ; At this point ecx == 0 |
|---|
| 80 | |
|---|
| 81 | test al,2 |
|---|
| 82 | jz .fab2 |
|---|
| 83 | movsw |
|---|
| 84 | .fab2: |
|---|
| 85 | .f_tiny: |
|---|
| 86 | test al,1 |
|---|
| 87 | jz .fab1 |
|---|
| 88 | movsb |
|---|
| 89 | .fab1: |
|---|
| 90 | .done: |
|---|
| 91 | pop eax |
|---|
| 92 | pop edx |
|---|
| 93 | pop ebx |
|---|
| 94 | ret |
|---|
| 95 | |
|---|
| 96 | .reverse: |
|---|
| 97 | lea eax,[esi+ecx-1] ; Point to final byte |
|---|
| 98 | cmp edi,eax |
|---|
| 99 | ja .forward ; No overlap, do forward copy |
|---|
| 100 | |
|---|
| 101 | std ; Reverse copy |
|---|
| 102 | lea edi,[edi+ecx-1] |
|---|
| 103 | mov esi,eax |
|---|
| 104 | |
|---|
| 105 | ; Initial alignment |
|---|
| 106 | mov edx,edi |
|---|
| 107 | shr edx,1 |
|---|
| 108 | jc .raa1 |
|---|
| 109 | movsb |
|---|
| 110 | dec ecx |
|---|
| 111 | .raa1: |
|---|
| 112 | |
|---|
| 113 | dec esi |
|---|
| 114 | dec edi |
|---|
| 115 | mov al,cl |
|---|
| 116 | cmp ecx,2 |
|---|
| 117 | jb .r_tiny |
|---|
| 118 | shr edx,1 |
|---|
| 119 | jc .raa2 |
|---|
| 120 | movsw |
|---|
| 121 | sub ecx,2 |
|---|
| 122 | .raa2: |
|---|
| 123 | |
|---|
| 124 | ; Bulk copy |
|---|
| 125 | sub esi,2 |
|---|
| 126 | sub edi,2 |
|---|
| 127 | mov al,cl ; Save low bits |
|---|
| 128 | shr ecx,2 |
|---|
| 129 | rep movsd |
|---|
| 130 | |
|---|
| 131 | ; Final alignment |
|---|
| 132 | .r_final: |
|---|
| 133 | add esi,2 |
|---|
| 134 | add edi,2 |
|---|
| 135 | test al,2 |
|---|
| 136 | jz .rab2 |
|---|
| 137 | movsw |
|---|
| 138 | .rab2: |
|---|
| 139 | .r_tiny: |
|---|
| 140 | inc esi |
|---|
| 141 | inc edi |
|---|
| 142 | test al,1 |
|---|
| 143 | jz .rab1 |
|---|
| 144 | movsb |
|---|
| 145 | .rab1: |
|---|
| 146 | cld |
|---|
| 147 | jmp short .done |
|---|
| 148 | |
|---|
| 149 | .bzero: |
|---|
| 150 | xor eax,eax |
|---|
| 151 | |
|---|
| 152 | ; Initial alignment |
|---|
| 153 | mov edx,edi |
|---|
| 154 | shr edx,1 |
|---|
| 155 | jnc .zaa1 |
|---|
| 156 | stosb |
|---|
| 157 | dec ecx |
|---|
| 158 | .zaa1: |
|---|
| 159 | |
|---|
| 160 | mov bl,cl |
|---|
| 161 | cmp ecx,2 |
|---|
| 162 | jb .z_tiny |
|---|
| 163 | shr edx,1 |
|---|
| 164 | jnc .zaa2 |
|---|
| 165 | stosw |
|---|
| 166 | sub ecx,2 |
|---|
| 167 | .zaa2: |
|---|
| 168 | |
|---|
| 169 | ; Bulk |
|---|
| 170 | mov bl,cl ; Save low bits |
|---|
| 171 | shr ecx,2 |
|---|
| 172 | rep stosd |
|---|
| 173 | |
|---|
| 174 | test bl,2 |
|---|
| 175 | jz .zab2 |
|---|
| 176 | stosw |
|---|
| 177 | .zab2: |
|---|
| 178 | .z_tiny: |
|---|
| 179 | test bl,1 |
|---|
| 180 | jz .zab1 |
|---|
| 181 | stosb |
|---|
| 182 | .zab1: |
|---|
| 183 | jmp short .done |
|---|
| 184 | |
|---|
| 185 | ; |
|---|
| 186 | ; shuffle_and_boot: |
|---|
| 187 | ; |
|---|
| 188 | ; This routine is used to shuffle memory around, followed by |
|---|
| 189 | ; invoking an entry point somewhere in low memory. This routine |
|---|
| 190 | ; can clobber any memory outside the bcopy special area. |
|---|
| 191 | ; |
|---|
| 192 | ; IMPORTANT: This routine does not set up any registers. |
|---|
| 193 | ; It is the responsibility of the caller to generate an appropriate entry |
|---|
| 194 | ; stub; *especially* when going to real mode. |
|---|
| 195 | ; |
|---|
| 196 | ; Inputs: |
|---|
| 197 | ; ESI -> Pointer to list of (dst, src, len) pairs(*) |
|---|
| 198 | ; EDI -> Pointer to safe area for list + shuffler |
|---|
| 199 | ; (must not overlap this code nor the RM stack) |
|---|
| 200 | ; ECX -> Byte count of list area (for initial copy) |
|---|
| 201 | ; |
|---|
| 202 | ; If src == -1: then the memory pointed to by (dst, len) is bzeroed; |
|---|
| 203 | ; this is handled inside the bcopy routine. |
|---|
| 204 | ; |
|---|
| 205 | ; If len == 0: this marks the end of the list; dst indicates |
|---|
| 206 | ; the entry point and src the mode (0 = pm, 1 = rm) |
|---|
| 207 | ; |
|---|
| 208 | ; (*) dst, src, and len are four bytes each |
|---|
| 209 | ; |
|---|
| 210 | pm_shuffle: |
|---|
| 211 | cli ; End interrupt service (for good) |
|---|
| 212 | mov ebx,edi ; EBX <- descriptor list |
|---|
| 213 | lea edx,[edi+ecx+15] ; EDX <- where to relocate our code to |
|---|
| 214 | and edx,~15 ; Align 16 to benefit the GDT |
|---|
| 215 | call pm_bcopy |
|---|
| 216 | mov esi,__bcopyxx_start ; Absolute source address |
|---|
| 217 | mov edi,edx ; Absolute target address |
|---|
| 218 | sub edx,esi ; EDX <- address delta |
|---|
| 219 | mov ecx,__bcopyxx_dwords |
|---|
| 220 | lea eax,[edx+.safe] ; Resume point |
|---|
| 221 | ; Relocate this code |
|---|
| 222 | rep movsd |
|---|
| 223 | jmp eax ; Jump to safe location |
|---|
| 224 | .safe: |
|---|
| 225 | ; Give ourselves a safe stack |
|---|
| 226 | lea esp,[edx+bcopyxx_stack+__bcopyxx_end] |
|---|
| 227 | add edx,bcopy_gdt ; EDX <- new GDT |
|---|
| 228 | mov [edx+2],edx ; GDT self-pointer |
|---|
| 229 | lgdt [edx] ; Switch to local GDT |
|---|
| 230 | |
|---|
| 231 | ; Now for the actual shuffling... |
|---|
| 232 | .loop: |
|---|
| 233 | mov edi,[ebx] |
|---|
| 234 | mov esi,[ebx+4] |
|---|
| 235 | mov ecx,[ebx+8] |
|---|
| 236 | add ebx,12 |
|---|
| 237 | jecxz .done |
|---|
| 238 | call pm_bcopy |
|---|
| 239 | jmp .loop |
|---|
| 240 | .done: |
|---|
| 241 | lidt [edx+RM_IDT_ptr-bcopy_gdt] ; RM-like IDT |
|---|
| 242 | push ecx ; == 0, for cleaning the flags register |
|---|
| 243 | and esi,esi |
|---|
| 244 | jz pm_shuffle_16 |
|---|
| 245 | popfd ; Clean the flags |
|---|
| 246 | jmp edi ; Protected mode entry |
|---|
| 247 | |
|---|
| 248 | ; We have a 16-bit entry point, so we need to return |
|---|
| 249 | ; to 16-bit mode. Note: EDX already points to the GDT. |
|---|
| 250 | pm_shuffle_16: |
|---|
| 251 | mov eax,edi |
|---|
| 252 | mov [edx+PM_CS16+2],ax |
|---|
| 253 | mov [edx+PM_DS16+2],ax |
|---|
| 254 | shr eax,16 |
|---|
| 255 | mov [edx+PM_CS16+4],al |
|---|
| 256 | mov [edx+PM_CS16+7],ah |
|---|
| 257 | mov [edx+PM_DS16+4],al |
|---|
| 258 | mov [edx+PM_DS16+7],ah |
|---|
| 259 | mov eax,cr0 |
|---|
| 260 | and al,~1 |
|---|
| 261 | popfd ; Clean the flags |
|---|
| 262 | ; No flag-changing instructions below... |
|---|
| 263 | mov dx,PM_DS16 |
|---|
| 264 | mov ds,edx |
|---|
| 265 | mov es,edx |
|---|
| 266 | mov fs,edx |
|---|
| 267 | mov gs,edx |
|---|
| 268 | mov ss,edx |
|---|
| 269 | jmp PM_CS16:0 |
|---|
| 270 | |
|---|
| 271 | section .bcopyxx.data |
|---|
| 272 | |
|---|
| 273 | alignz 16 |
|---|
| 274 | ; GDT descriptor entry |
|---|
| 275 | %macro desc 1 |
|---|
| 276 | bcopy_gdt.%1: |
|---|
| 277 | PM_%1 equ bcopy_gdt.%1-bcopy_gdt |
|---|
| 278 | %endmacro |
|---|
| 279 | |
|---|
| 280 | bcopy_gdt: |
|---|
| 281 | dw bcopy_gdt_size-1 ; Null descriptor - contains GDT |
|---|
| 282 | dd bcopy_gdt ; pointer for LGDT instruction |
|---|
| 283 | dw 0 |
|---|
| 284 | |
|---|
| 285 | ; TSS segment to keep Intel VT happy. Intel VT is |
|---|
| 286 | ; unhappy about anything that doesn't smell like a |
|---|
| 287 | ; full-blown 32-bit OS. |
|---|
| 288 | desc TSS |
|---|
| 289 | dw 104-1, DummyTSS ; 08h 32-bit task state segment |
|---|
| 290 | dd 00008900h ; present, dpl 0, 104 bytes @DummyTSS |
|---|
| 291 | |
|---|
| 292 | desc CS16 |
|---|
| 293 | dd 0000ffffh ; 10h Code segment, use16, readable, |
|---|
| 294 | dd 00009b00h ; present, dpl 0, cover 64K |
|---|
| 295 | desc DS16 |
|---|
| 296 | dd 0000ffffh ; 18h Data segment, use16, read/write, |
|---|
| 297 | dd 00009300h ; present, dpl 0, cover 64K |
|---|
| 298 | desc CS32 |
|---|
| 299 | dd 0000ffffh ; 20h Code segment, use32, readable, |
|---|
| 300 | dd 00cf9b00h ; present, dpl 0, cover all 4G |
|---|
| 301 | desc DS32 |
|---|
| 302 | dd 0000ffffh ; 28h Data segment, use32, read/write, |
|---|
| 303 | dd 00cf9300h ; present, dpl 0, cover all 4G |
|---|
| 304 | |
|---|
| 305 | bcopy_gdt_size: equ $-bcopy_gdt |
|---|
| 306 | ; |
|---|
| 307 | ; Space for a dummy task state segment. It should never be actually |
|---|
| 308 | ; accessed, but just in case it is, point to a chunk of memory that |
|---|
| 309 | ; has a chance to not be used for anything real... |
|---|
| 310 | ; |
|---|
| 311 | DummyTSS equ 0x580 |
|---|
| 312 | |
|---|
| 313 | align 4 |
|---|
| 314 | RM_IDT_ptr: dw 0FFFFh ; Length (nonsense, but matches CPU) |
|---|
| 315 | dd 0 ; Offset |
|---|
| 316 | |
|---|
| 317 | bcopyxx_stack equ 128 ; We want this much stack |
|---|
| 318 | |
|---|
| 319 | bits 16 |
|---|
| 320 | section .text16 |
|---|