(**************************************************************************) (* *) (* OCaml *) (* *) (* Xavier Leroy, projet Gallium, INRIA Rocquencourt *) (* Benedikt Meurer, University of Siegen *) (* *) (* Copyright 2013 Institut National de Recherche en Informatique et *) (* en Automatique. *) (* Copyright 2012 Benedikt Meurer. *) (* *) (* All rights reserved. This file is distributed under the terms of *) (* the GNU Lesser General Public License version 2.1, with the *) (* special exception on linking described in the file LICENSE. *) (* *) (**************************************************************************) (* Emission of ARM assembly code, 64-bit mode *) open Misc open Cmm open Arch open Proc open Reg open Mach open Linear open Emitaux open Emitenv (* Names for special regs *) let reg_domain_state_ptr = phys_reg 25 (* x28 *) let reg_trap_ptr = phys_reg 23 (* x26 *) let reg_alloc_ptr = phys_reg 24 (* x27 *) let reg_tmp1 = phys_reg 26 (* x16 *) let reg_x8 = phys_reg 8 (* x8 *) let reg_stack_arg_begin = phys_reg 17 (* x20 *) let reg_stack_arg_end = phys_reg 18 (* x21 *) (* Output a label *) let label_prefix = if macosx then "L" else ".L" let emit_label lbl = emit_string label_prefix; emit_int lbl (* Object types *) let emit_label_type lbl ty = if Config.asm_size_type_directives then begin ` .type {emit_label lbl}, {emit_string ty}\n` end (* Output a pseudo-register *) let emit_reg = function {loc = Reg r} -> emit_string (register_name r) | _ -> fatal_error "Emit.emit_reg" (* Likewise, but with the 32-bit name of the register *) let int_reg_name_w = [| "w0"; "w1"; "w2"; "w3"; "w4"; "w5"; "w6"; "w7"; "w8"; "w9"; "w10"; "w11"; "w12"; "w13"; "w14"; "w15"; "w19"; "w20"; "w21"; "w22"; "w23"; "w24"; "w25"; "w26"; "w27"; "w28"; "w16"; "w17" |] let emit_wreg = function {loc = Reg r} -> emit_string int_reg_name_w.(r) | _ -> fatal_error "Emit.emit_wreg" let fp = Config.with_frame_pointers let initial_stack_offset f = 8 * f.fun_num_stack_slots.(0) + (* Local int variables *) 8 * f.fun_num_stack_slots.(1) + (* Local float variables *) (if f.fun_frame_required then 8 + (if fp then 8 else 0) (* Return address plus optional Frame Pointer *) else 0) let frame_size env = let sz = env.stack_offset + initial_stack_offset env.f in Misc.align sz 16 let slot_offset env loc cl = match loc with Incoming n -> assert (n >= 0); frame_size env + n | Local n -> if cl = 0 then env.stack_offset + n * 8 else env.stack_offset + (env.f.fun_num_stack_slots.(0) + n) * 8 | Outgoing n -> assert (n >= 0); n | Domainstate _ -> assert false (* not a stack slot *) (* Output a stack reference *) let emit_stack env r = match r.loc with | Stack (Domainstate n) -> let ofs = n + Domainstate.(idx_of_field Domain_extra_params) * 8 in `[{emit_reg reg_domain_state_ptr}, #{emit_int ofs}]` | Stack s -> let ofs = slot_offset env s (register_class r) in `[sp, #{emit_int ofs}]` | _ -> fatal_error "Emit.emit_stack" (* Output an addressing mode *) let emit_symbol_offset s ofs = emit_symbol s; if ofs > 0 then `+{emit_int ofs}` else if ofs < 0 then `-{emit_int (-ofs)}` else () let emit_addressing addr r = match addr with | Iindexed ofs -> `[{emit_reg r}, #{emit_int ofs}]` | Ibased(s, ofs) -> assert (not !Clflags.dlcode); (* see selection.ml *) `[{emit_reg r}, #:lo12:{emit_symbol_offset s ofs}]` (* Record live pointers at call points *) let record_frame_label env live dbg = let lbl = new_label () in let live_offset = ref [] in Reg.Set.iter (function | {typ = Val; loc = Reg r} -> live_offset := ((r lsl 1) + 1) :: !live_offset | {typ = Val; loc = Stack s} as reg -> live_offset := slot_offset env s (register_class reg) :: !live_offset | {typ = Addr} as r -> Misc.fatal_error ("bad GC root " ^ Reg.name r) | _ -> ()) live; record_frame_descr ~label:lbl ~frame_size:(frame_size env) ~live_offset:!live_offset dbg; lbl let record_frame env live dbg = let lbl = record_frame_label env live dbg in `{emit_label lbl}:` let emit_call_gc gc = `{emit_label gc.gc_lbl}: bl {emit_symbol "caml_call_gc"}\n`; `{emit_label gc.gc_frame_lbl}: b {emit_label gc.gc_return_lbl}\n` let bound_error_label env dbg = if !Clflags.debug || env.bound_error_sites = [] then begin let lbl_bound_error = new_label() in let lbl_frame = record_frame_label env Reg.Set.empty (Dbg_other dbg) in env.bound_error_sites <- { bd_lbl = lbl_bound_error; bd_frame = lbl_frame; } :: env.bound_error_sites; lbl_bound_error end else begin let bd = List.hd env.bound_error_sites in bd.bd_lbl end let emit_call_bound_error bd = `{emit_label bd.bd_lbl}: bl {emit_symbol "caml_ml_array_bound_error"}\n`; `{emit_label bd.bd_frame}:\n` (* Names of various instructions *) let name_for_comparison = function | Isigned Ceq -> "eq" | Isigned Cne -> "ne" | Isigned Cle -> "le" | Isigned Cge -> "ge" | Isigned Clt -> "lt" | Isigned Cgt -> "gt" | Iunsigned Ceq -> "eq" | Iunsigned Cne -> "ne" | Iunsigned Cle -> "ls" | Iunsigned Cge -> "cs" | Iunsigned Clt -> "cc" | Iunsigned Cgt -> "hi" let name_for_int_operation = function | Iadd -> "add" | Isub -> "sub" | Imul -> "mul" | Idiv -> "sdiv" | Iand -> "and" | Ior -> "orr" | Ixor -> "eor" | Ilsl -> "lsl" | Ilsr -> "lsr" | Iasr -> "asr" | _ -> assert false (* Decompose an integer constant into four 16-bit shifted fragments. Omit the fragments that are equal to "default" (16 zeros or 16 ones). *) let decompose_int default n = let rec decomp n pos = if pos >= 64 then [] else begin let frag = Nativeint.logand n 0xFFFFn and rem = Nativeint.shift_right_logical n 16 in if frag = default then decomp rem (pos + 16) else (frag, pos) :: decomp rem (pos + 16) end in decomp n 0 (* Load an integer constant into a register *) let emit_movk dst (f, p) = ` movk {emit_reg dst}, #{emit_nativeint f}, lsl #{emit_int p}\n` let emit_intconst dst n = if is_logical_immediate n then ` orr {emit_reg dst}, xzr, #{emit_nativeint n}\n` else begin let dz = decompose_int 0x0000n n and dn = decompose_int 0xFFFFn n in if List.length dz <= List.length dn then begin match dz with | [] -> ` mov {emit_reg dst}, xzr\n` | (f, p) :: l -> ` movz {emit_reg dst}, #{emit_nativeint f}, lsl #{emit_int p}\n`; List.iter (emit_movk dst) l end else begin match dn with | [] -> ` movn {emit_reg dst}, #0\n` | (f, p) :: l -> let nf = Nativeint.logxor f 0xFFFFn in ` movn {emit_reg dst}, #{emit_nativeint nf}, lsl #{emit_int p}\n`; List.iter (emit_movk dst) l end end let num_instructions_for_intconst n = if is_logical_immediate n then 1 else begin let dz = decompose_int 0x0000n n and dn = decompose_int 0xFFFFn n in max 1 (min (List.length dz) (List.length dn)) end (* Recognize float constants appropriate for FMOV dst, #fpimm instruction: "a normalized binary floating point encoding with 1 sign bit, 4 bits of fraction and a 3-bit exponent" *) let is_immediate_float bits = let exp = (Int64.(to_int (shift_right_logical bits 52)) land 0x7FF) - 1023 in let mant = Int64.logand bits 0xF_FFFF_FFFF_FFFFL in exp >= -3 && exp <= 4 && Int64.logand mant 0xF_0000_0000_0000L = mant (* Adjust sp (up or down) by the given byte amount *) let emit_stack_adjustment n = let instr = if n < 0 then "sub" else "add" in let m = abs n in assert (m < 0x1_000_000); let ml = m land 0xFFF and mh = m land 0xFFF_000 in if mh <> 0 then ` {emit_string instr} sp, sp, #{emit_int mh}\n`; if ml <> 0 then ` {emit_string instr} sp, sp, #{emit_int ml}\n`; if n <> 0 then cfi_adjust_cfa_offset (-n) (* Deallocate the stack frame and reload the return address before a return or tail call *) let output_epilogue env f = let n = frame_size env in if n > 0 then emit_stack_adjustment n; if env.f.fun_frame_required then if fp then ( ` ldp x29, x30, [sp, #-16]\n`; ) else ( ` ldr x30, [sp, #-8]\n`; ); f(); (* reset CFA back because function body may continue *) if n > 0 then cfi_adjust_cfa_offset n (* Output add-immediate / sub-immediate / cmp-immediate instructions *) let rec emit_addimm rd rs n = if n < 0 then emit_subimm rd rs (-n) else if n <= 0xFFF then ` add {emit_reg rd}, {emit_reg rs}, #{emit_int n}\n` else begin assert (n <= 0xFFF_FFF); let nl = n land 0xFFF and nh = n land 0xFFF_000 in ` add {emit_reg rd}, {emit_reg rs}, #{emit_int nh}\n`; if nl <> 0 then ` add {emit_reg rd}, {emit_reg rd}, #{emit_int nl}\n` end and emit_subimm rd rs n = if n < 0 then emit_addimm rd rs (-n) else if n <= 0xFFF then ` sub {emit_reg rd}, {emit_reg rs}, #{emit_int n}\n` else begin assert (n <= 0xFFF_FFF); let nl = n land 0xFFF and nh = n land 0xFFF_000 in ` sub {emit_reg rd}, {emit_reg rs}, #{emit_int nh}\n`; if nl <> 0 then ` sub {emit_reg rd}, {emit_reg rd}, #{emit_int nl}\n` end let emit_cmpimm rs n = if n >= 0 then ` cmp {emit_reg rs}, #{emit_int n}\n` else ` cmn {emit_reg rs}, #{emit_int (-n)}\n` (* Label a floating-point literal *) let float_literal env fl = try let x = List.find (fun x -> Int64.equal x.fl fl) env.float_literals in x.lbl with Not_found -> let lbl = new_label() in env.float_literals <- { fl; lbl } :: env.float_literals; lbl (* Emit all pending literals *) let emit_literals env = if env.float_literals <> [] then begin if macosx then ` .section __TEXT,__literal8,8byte_literals\n` else ` .section .rodata\n`; ` .align 3\n`; List.iter (fun { fl; lbl } -> `{emit_label lbl}:`; emit_float64_directive ".quad" fl) env.float_literals; env.float_literals <- [] end (* Emit code to load the address of a symbol *) let emit_load_symbol_addr dst s = if macosx then begin ` adrp {emit_reg dst}, {emit_symbol s}@GOTPAGE\n`; ` ldr {emit_reg dst}, [{emit_reg dst}, {emit_symbol s}@GOTPAGEOFF]\n` end else if not !Clflags.dlcode then begin ` adrp {emit_reg dst}, {emit_symbol s}\n`; ` add {emit_reg dst}, {emit_reg dst}, #:lo12:{emit_symbol s}\n` end else begin ` adrp {emit_reg dst}, :got:{emit_symbol s}\n`; ` ldr {emit_reg dst}, [{emit_reg dst}, #:got_lo12:{emit_symbol s}]\n` end (* The following functions are used for calculating the sizes of the call GC and bounds check points emitted out-of-line from the function body. See branch_relaxation.mli. *) let num_call_gc_and_check_bound_points env = let rec loop instr ((call_gc, check_bound) as totals) = match instr.desc with | Lend -> totals | Lop (Ialloc _) when env.f.fun_fast -> loop instr.next (call_gc + 1, check_bound) | Lop (Ipoll _) -> loop instr.next (call_gc + 1, check_bound) | Lop (Iintop Icheckbound) | Lop (Iintop_imm (Icheckbound, _)) | Lop (Ispecific (Ishiftcheckbound _)) -> let check_bound = (* When not in debug mode, there is at most one check-bound point. *) if not !Clflags.debug then 1 else check_bound + 1 in loop instr.next (call_gc, check_bound) (* The following four should never be seen, since this function is run before branch relaxation. *) | Lop (Ispecific (Ialloc_far _)) | Lop (Ispecific (Ipoll_far _)) | Lop (Ispecific Icheckbound_far) | Lop (Ispecific (Icheckbound_imm_far _)) | Lop (Ispecific (Ishiftcheckbound_far _)) -> assert false | _ -> loop instr.next totals in loop env.f.fun_body (0, 0) let max_out_of_line_code_offset ~num_call_gc ~num_check_bound = if num_call_gc < 1 && num_check_bound < 1 then 0 else begin let size_of_call_gc = 2 in let size_of_check_bound = 1 in let size_of_last_thing = (* Call-GC points come before check-bound points. *) if num_check_bound >= 1 then size_of_check_bound else size_of_call_gc in let total_size = size_of_call_gc*num_call_gc + size_of_check_bound*num_check_bound in let max_offset = total_size - size_of_last_thing in assert (max_offset >= 0); max_offset end module Size = struct (* CR-someday mshinwell: B and BL have +/- 128Mb ranges; for the moment we assume we will never exceed this. It would seem to be most likely to occur for branches between functions; in this case, the linker should be able to insert veneers anyway. (See section 4.6.7 of the document "ELF for the ARM 64-bit architecture (AArch64)".) *) type distance = int module Cond_branch = struct type t = TB | CB | Bcc let all = [TB; CB; Bcc] (* AArch64 instructions are 32 bits wide, so [distance] in this module means units of 32-bit words. *) let max_displacement = function | TB -> 32 * 1024 / 4 (* +/- 32Kb *) | CB | Bcc -> 1 * 1024 * 1024 / 4 (* +/- 1Mb *) let classify_instr = function | Lop (Ialloc _) | Lop (Ipoll _) | Lop (Iintop Icheckbound) | Lop (Iintop_imm (Icheckbound, _)) | Lop (Ispecific (Ishiftcheckbound _)) -> Some Bcc (* The various "far" variants in [specific_operation] don't need to return [Some] here, since their code sequences never contain any conditional branches that might need relaxing. *) | Lcondbranch (Itruetest, _) | Lcondbranch (Ifalsetest, _) -> Some CB | Lcondbranch (Iinttest _, _) | Lcondbranch (Iinttest_imm _, _) | Lcondbranch (Ifloattest _, _) -> Some Bcc | Lcondbranch (Ioddtest, _) | Lcondbranch (Ieventest, _) -> Some TB | Lcondbranch3 _ -> Some Bcc | _ -> None end let offset_pc_at_branch = 0 let addsub_size n = let m = abs n in assert (m < 0x1_000_000); let ml = m land 0xFFF and mh = m land 0xFFF_000 in max 1 ((if mh <> 0 then 1 else 0) + (if ml <> 0 then 1 else 0)) let stack_adj_size n = (* see emit_stack_adjustment *) addsub_size n let prologue_size f = let stk = initial_stack_offset f in (if stk > 0 then stack_adj_size (-stk) else 0) + (if f.fun_frame_required then (if fp then 2 else 1) else 0) let epilogue_size f = let stk = initial_stack_offset f in (if stk > 0 then stack_adj_size stk else 0) + (if f.fun_frame_required then 1 else 0) + 1 let instr_size f = function | Lend -> 0 | Lprologue -> prologue_size f | Lop (Imove | Ispill | Ireload) -> 1 | Lop (Iconst_int n) -> num_instructions_for_intconst n | Lop (Iconst_float f) -> if f = 0L || is_immediate_float f then 1 else 2 | Lop (Iconst_symbol _) -> 2 | Lop (Icall_ind) -> 1 | Lop (Icall_imm _) -> 1 | Lop (Itailcall_ind) -> epilogue_size f | Lop (Itailcall_imm { func; _ }) -> if func = f.fun_name then 1 else epilogue_size f | Lop (Iextcall {alloc; stack_ofs} ) -> if stack_ofs > 0 then 5 else if alloc then 3 else 5 | Lop (Istackoffset n) -> stack_adj_size (-n) | Lop (Iload { memory_chunk; addressing_mode; is_atomic }) -> let based = match addressing_mode with Iindexed _ -> 0 | Ibased _ -> 1 and barrier = if is_atomic then 1 else 0 and single = match memory_chunk with Single -> 2 | _ -> 1 in based + barrier + single | Lop (Istore (memory_chunk, addressing_mode, assignment)) -> let based = match addressing_mode with | Iindexed _ -> 0 | Ibased _ -> 1 and pre_store = match memory_chunk, assignment, macosx, addressing_mode with | (Word_int | Word_val), true, true, Iindexed 0 -> 0 | (Word_int | Word_val), true, true, _ -> 1 (* Compute dest address *) | (Word_int | Word_val), true, false, _ -> 1 (* Barrier instruction *) | _ -> 0 and store = match memory_chunk with Single -> 2 | _ -> 1 in based + pre_store + store | Lop (Ialloc _) when f.fun_fast -> 5 | Lop (Ispecific (Ialloc_far _)) when f.fun_fast -> 6 | Lop (Ipoll {return_label=None}) -> 3 | Lop (Ipoll {return_label=Some _}) -> 4 | Lop (Ispecific (Ipoll_far {return_label=None})) -> 4 | Lop (Ispecific (Ipoll_far {return_label=Some _})) -> 5 | Lop (Ialloc { bytes = num_bytes; _ }) | Lop (Ispecific (Ialloc_far { bytes = num_bytes; _ })) -> begin match num_bytes with | 16 | 24 | 32 -> 2 | _ -> 2 + num_instructions_for_intconst (Nativeint.of_int num_bytes) end | Lop (Iintop (Icomp _)) -> 2 | Lop (Icompf _) -> 2 | Lop (Iintop_imm (Icomp _, _)) -> 2 | Lop (Iintop (Icheckbound)) -> 2 | Lop (Ispecific (Icheckbound_far)) -> 3 | Lop (Iintop_imm (Icheckbound, _)) -> 2 | Lop (Ispecific (Icheckbound_imm_far _)) -> 3 | Lop (Ispecific (Ishiftcheckbound _)) -> 2 | Lop (Ispecific (Ishiftcheckbound_far _)) -> 3 | Lop (Iintop Imod) -> 2 | Lop (Iintop Imulh) -> 1 | Lop (Iintop_imm ((Iadd|Isub), n)) -> addsub_size n | Lop (Iintop _) -> 1 | Lop (Iintop_imm _) -> 1 | Lop (Ifloatofint | Iintoffloat | Iabsf | Inegf | Ispecific Isqrtf) -> 1 | Lop (Iaddf | Isubf | Imulf | Idivf | Ispecific Inegmulf) -> 1 | Lop (Iopaque) -> 0 | Lop (Ispecific (Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf)) -> 1 | Lop (Ispecific (Ishiftarith _)) -> 1 | Lop (Ispecific (Imuladd | Imulsub)) -> 1 | Lop (Ispecific (Ibswap 16)) -> 2 | Lop (Ispecific (Ibswap _)) -> 1 | Lop (Ispecific Imove32) -> 1 | Lop (Ispecific (Isignext _)) -> 1 | Lop (Idls_get) -> 1 | Lop (Ireturn_addr) -> 1 | Lreloadretaddr -> 0 | Lreturn -> epilogue_size f + (if top_bits_ignore then 0 else 1) | Llabel _ -> 0 | Lbranch _ -> 1 | Lcondbranch (tst, _) -> begin match tst with | Itruetest -> 1 | Ifalsetest -> 1 | Iinttest _ -> 2 | Iinttest_imm _ -> 2 | Ifloattest _ -> 2 | Ioddtest -> 1 | Ieventest -> 1 end | Lcondbranch3 (lbl0, lbl1, lbl2) -> 1 + begin match lbl0 with None -> 0 | Some _ -> 1 end + begin match lbl1 with None -> 0 | Some _ -> 1 end + begin match lbl2 with None -> 0 | Some _ -> 1 end | Lswitch jumptbl -> 3 + Array.length jumptbl | Lentertrap -> if fp then 1 else 0 | Ladjust_trap_depth _ -> 0 | Lpushtrap _ -> 3 | Lpoptrap -> 1 | Lraise k -> begin match k with | Lambda.Raise_regular -> 1 | Lambda.Raise_reraise -> 1 | Lambda.Raise_notrace -> 3 end let relax_poll ~return_label = Lop (Ispecific (Ipoll_far { return_label })) let relax_allocation ~num_bytes ~dbginfo = Lop (Ispecific (Ialloc_far { bytes = num_bytes; dbginfo })) let relax_intop_checkbound () = Lop (Ispecific (Icheckbound_far)) let relax_intop_imm_checkbound ~bound = Lop (Ispecific (Icheckbound_imm_far { bound; })) let relax_specific_op = function | Ishiftcheckbound { shift; } -> Lop (Ispecific (Ishiftcheckbound_far { shift; })) | _ -> assert false end module BR = Branch_relaxation.Make (Size) (* Output the assembly code for allocation. *) let assembly_code_for_allocation env i ~n ~far ~dbginfo = let lbl_frame = record_frame_label env i.live (Dbg_alloc dbginfo) in if env.f.fun_fast then begin let lbl_after_alloc = new_label() in let lbl_call_gc = new_label() in (* n is at most Max_young_whsize * 8, i.e. currently 0x808, so it is reasonable to assume n < 0x1_000. This makes the generated code simpler. *) assert (16 <= n && n < 0x1_000 && n land 0x7 = 0); let offset = Domainstate.(idx_of_field Domain_young_limit) * 8 in ` ldr {emit_reg reg_tmp1}, [{emit_reg reg_domain_state_ptr}, #{emit_int offset}]\n`; ` sub {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, #{emit_int n}\n`; ` cmp {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp1}\n`; if not far then begin ` b.lo {emit_label lbl_call_gc}\n` end else begin let lbl = new_label () in ` b.cs {emit_label lbl}\n`; ` b {emit_label lbl_call_gc}\n`; `{emit_label lbl}:\n` end; `{emit_label lbl_after_alloc}:`; ` add {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, #8\n`; env.call_gc_sites <- { gc_lbl = lbl_call_gc; gc_return_lbl = lbl_after_alloc; gc_frame_lbl = lbl_frame; } :: env.call_gc_sites end else begin begin match n with | 16 -> ` bl {emit_symbol "caml_alloc1"}\n` | 24 -> ` bl {emit_symbol "caml_alloc2"}\n` | 32 -> ` bl {emit_symbol "caml_alloc3"}\n` | _ -> emit_intconst reg_x8 (Nativeint.of_int n); ` bl {emit_symbol "caml_allocN"}\n` end; `{emit_label lbl_frame}: add {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, #8\n` end let assembly_code_for_poll env i ~far ~return_label = let lbl_frame = record_frame_label env i.live (Dbg_alloc []) in let lbl_call_gc = new_label() in let lbl_after_poll = match return_label with | None -> new_label() | Some lbl -> lbl in let offset = Domainstate.(idx_of_field Domain_young_limit) * 8 in ` ldr {emit_reg reg_tmp1}, [{emit_reg reg_domain_state_ptr}, #{emit_int offset}]\n`; ` cmp {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp1}\n`; if not far then begin match return_label with | None -> ` b.ls {emit_label lbl_call_gc}\n`; `{emit_label lbl_after_poll}:\n` | Some return_label -> ` b.hi {emit_label return_label}\n`; ` b {emit_label lbl_call_gc}\n`; end else begin match return_label with | None -> ` b.hi {emit_label lbl_after_poll}\n`; ` b {emit_label lbl_call_gc}\n`; `{emit_label lbl_after_poll}:\n` | Some return_label -> let lbl = new_label () in ` b.ls {emit_label lbl}\n`; ` b {emit_label return_label}\n`; `{emit_label lbl}: b {emit_label lbl_call_gc}\n` end; env.call_gc_sites <- { gc_lbl = lbl_call_gc; gc_return_lbl = lbl_after_poll; gc_frame_lbl = lbl_frame; } :: env.call_gc_sites let emit_named_text_section func_name = Emitaux.emit_named_text_section func_name '%' (* Emit code to load an emitted literal *) let emit_load_literal dst lbl = if macosx then begin ` adrp {emit_reg reg_tmp1}, {emit_label lbl}@PAGE\n`; ` ldr {emit_reg dst}, [{emit_reg reg_tmp1}, {emit_label lbl}@PAGEOFF]\n` end else begin ` adrp {emit_reg reg_tmp1}, {emit_label lbl}\n`; ` ldr {emit_reg dst}, [{emit_reg reg_tmp1}, #:lo12:{emit_label lbl}]\n` end let name_for_float_comparison = function | CFeq -> "eq" | CFneq -> "ne" | CFlt -> "cc" | CFnlt -> "cs" | CFle -> "ls" | CFnle -> "hi" | CFgt -> "gt" | CFngt -> "le" | CFge -> "ge" | CFnge -> "lt" (* Output the assembly code for an instruction *) let emit_stlr src base addr = assert macosx; let dest_reg = match addr with | Iindexed 0 -> base | Iindexed ofs -> ` add {emit_reg reg_tmp1}, {emit_reg base}, #{emit_int ofs}\n`; reg_tmp1 | Ibased _ -> assert false (* Ibased is not emitted under macOS *) in (* Release store for assignments on macOS. See https://github.com/ocaml/ocaml/issues/13262. *) ` stlr {emit_reg src}, [{emit_reg dest_reg}]\n` let emit_instr env i = emit_debug_info i.dbg; match i.desc with | Lend -> () | Lprologue -> let n = frame_size env in if env.f.fun_frame_required then begin if fp then ( ` stp x29, x30, [sp, #-16]\n`; cfi_offset ~reg:29 (* frame pointer *) ~offset:(-16); cfi_offset ~reg:30 (* return address *) ~offset:(-8) ) else ( ` str x30, [sp, #-8]\n`; cfi_offset ~reg:30 (* return address *) ~offset:(-8) ); end; if n > 0 then begin emit_stack_adjustment (-n); if env.f.fun_frame_required && fp then ` add x29, sp, #{emit_int (n-16)}\n`; end | Lop(Imove | Ispill | Ireload) -> let src = i.arg.(0) and dst = i.res.(0) in if src.loc <> dst.loc then begin match (src, dst) with | {loc = Reg _; typ = Float}, {loc = Reg _} -> ` fmov {emit_reg dst}, {emit_reg src}\n` | {loc = Reg _}, {loc = Reg _} -> ` mov {emit_reg dst}, {emit_reg src}\n` | {loc = Reg _}, {loc = Stack _} -> ` str {emit_reg src}, {emit_stack env dst}\n` | {loc = Stack _}, {loc = Reg _} -> ` ldr {emit_reg dst}, {emit_stack env src}\n` | _ -> assert false end | Lop(Ispecific Imove32) -> let src = i.arg.(0) and dst = i.res.(0) in if src.loc <> dst.loc then begin match (src, dst) with | {loc = Reg _}, {loc = Reg _} -> ` mov {emit_wreg dst}, {emit_wreg src}\n` | {loc = Reg _}, {loc = Stack _} -> ` str {emit_wreg src}, {emit_stack env dst}\n` | {loc = Stack _}, {loc = Reg _} -> ` ldr {emit_wreg dst}, {emit_stack env src}\n` | _ -> assert false end | Lop(Iconst_int n) -> emit_intconst i.res.(0) n | Lop(Iconst_float f) -> if f = 0L then ` fmov {emit_reg i.res.(0)}, xzr\n` else if is_immediate_float f then ` fmov {emit_reg i.res.(0)}, #{emit_printf "%.7f" (Int64.float_of_bits f)}\n` else begin let lbl = float_literal env f in emit_load_literal i.res.(0) lbl end | Lop(Iconst_symbol s) -> emit_load_symbol_addr i.res.(0) s | Lop(Icall_ind) -> ` blr {emit_reg i.arg.(0)}\n`; `{record_frame env i.live (Dbg_other i.dbg)}\n` | Lop(Icall_imm { func; }) -> ` bl {emit_symbol func}\n`; `{record_frame env i.live (Dbg_other i.dbg)}\n` | Lop(Itailcall_ind) -> output_epilogue env (fun () -> ` br {emit_reg i.arg.(0)}\n`) | Lop(Itailcall_imm { func; }) -> if func = env.f.fun_name then ` b {emit_label env.f.fun_tailrec_entry_point_label}\n` else output_epilogue env (fun () -> ` b {emit_symbol func}\n`) | Lop(Iextcall {func; alloc; stack_ofs}) -> if stack_ofs > 0 then begin ` mov {emit_reg reg_stack_arg_begin}, sp\n`; ` add {emit_reg reg_stack_arg_end}, sp, #{emit_int (Misc.align stack_ofs 16)}\n`; emit_load_symbol_addr reg_x8 func; ` bl {emit_symbol "caml_c_call_stack_args"}\n`; `{record_frame env i.live (Dbg_other i.dbg)}\n` end else if alloc then begin emit_load_symbol_addr reg_x8 func; ` bl {emit_symbol "caml_c_call"}\n`; `{record_frame env i.live (Dbg_other i.dbg)}\n` end else begin (* Store OCaml stack in x19 register and restore later. *) ` mov x19, sp\n`; cfi_remember_state (); cfi_def_cfa_register ~reg:19; let offset = Domainstate.(idx_of_field Domain_c_stack) * 8 in ` ldr {emit_reg reg_tmp1}, [{emit_reg reg_domain_state_ptr}, {emit_int offset}]\n`; ` mov sp, {emit_reg reg_tmp1}\n`; ` bl {emit_symbol func}\n`; ` mov sp, x19\n`; cfi_restore_state () end | Lop(Istackoffset n) -> assert (n mod 16 = 0); emit_stack_adjustment (-n); env.stack_offset <- env.stack_offset + n | Lop(Iload { memory_chunk; addressing_mode; is_atomic }) -> assert(memory_chunk = Word_int || memory_chunk = Word_val || is_atomic = false); let dst = i.res.(0) in let base = match addressing_mode with | Iindexed _ -> i.arg.(0) | Ibased(s, ofs) -> assert (not !Clflags.dlcode); (* see selection.ml *) ` adrp {emit_reg reg_tmp1}, {emit_symbol_offset s ofs}\n`; reg_tmp1 in begin match memory_chunk with | Byte_unsigned -> ` ldrb {emit_wreg dst}, {emit_addressing addressing_mode base}\n` | Byte_signed -> ` ldrsb {emit_reg dst}, {emit_addressing addressing_mode base}\n` | Sixteen_unsigned -> ` ldrh {emit_wreg dst}, {emit_addressing addressing_mode base}\n` | Sixteen_signed -> ` ldrsh {emit_reg dst}, {emit_addressing addressing_mode base}\n` | Thirtytwo_unsigned -> ` ldr {emit_wreg dst}, {emit_addressing addressing_mode base}\n` | Thirtytwo_signed -> ` ldrsw {emit_reg dst}, {emit_addressing addressing_mode base}\n` | Single -> ` ldr s7, {emit_addressing addressing_mode base}\n`; ` fcvt {emit_reg dst}, s7\n` | Sixtyfour | Word_int | Word_val -> if is_atomic then begin assert (addressing_mode = Iindexed 0); assert (not (memory_chunk = Sixtyfour)); ` dmb ishld\n`; ` ldar {emit_reg dst}, [{emit_reg i.arg.(0)}]\n` end else ` ldr {emit_reg dst}, {emit_addressing addressing_mode base}\n` | Double -> ` ldr {emit_reg dst}, {emit_addressing addressing_mode base}\n` end | Lop(Istore(size, addr, assignment)) -> (* NB: assignments other than Word_int and Word_val do not follow the Multicore OCaml memory model and so do not emit a barrier *) let src = i.arg.(0) in let base = match addr with | Iindexed _ -> i.arg.(1) | Ibased(s, ofs) -> assert (not !Clflags.dlcode); ` adrp {emit_reg reg_tmp1}, {emit_symbol_offset s ofs}\n`; reg_tmp1 in begin match size with | Byte_unsigned | Byte_signed -> ` strb {emit_wreg src}, {emit_addressing addr base}\n` | Sixteen_unsigned | Sixteen_signed -> ` strh {emit_wreg src}, {emit_addressing addr base}\n` | Thirtytwo_unsigned | Thirtytwo_signed -> ` str {emit_wreg src}, {emit_addressing addr base}\n` | Single -> ` fcvt s7, {emit_reg src}\n`; ` str s7, {emit_addressing addr base}\n`; | Sixtyfour -> ` str {emit_reg src}, {emit_addressing addr base}\n` | Word_int | Word_val -> begin match assignment, macosx with | true, true -> emit_stlr src base addr | true, false -> (* Memory model barrier for assignments. *) ` dmb ishld\n`; ` str {emit_reg src}, {emit_addressing addr base}\n` | _, _ -> (* Initializing store *) ` str {emit_reg src}, {emit_addressing addr base}\n` end | Double -> ` str {emit_reg src}, {emit_addressing addr base}\n` end | Lop(Ialloc { bytes = n; dbginfo }) -> assembly_code_for_allocation env i ~n ~far:false ~dbginfo | Lop(Ispecific (Ialloc_far { bytes = n; dbginfo })) -> assembly_code_for_allocation env i ~n ~far:true ~dbginfo | Lop(Ipoll { return_label }) -> assembly_code_for_poll env i ~far:false ~return_label | Lop(Ispecific (Ipoll_far { return_label })) -> assembly_code_for_poll env i ~far:true ~return_label | Lop(Iintop_imm(Iadd, n)) -> emit_addimm i.res.(0) i.arg.(0) n | Lop(Iintop_imm(Isub, n)) -> emit_subimm i.res.(0) i.arg.(0) n | Lop(Iintop(Icomp cmp)) -> ` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; ` cset {emit_reg i.res.(0)}, {emit_string (name_for_comparison cmp)}\n` | Lop(Icompf cmp) -> let comp = name_for_float_comparison cmp in ` fcmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; ` cset {emit_reg i.res.(0)}, {emit_string comp}\n` | Lop(Iintop_imm(Icomp cmp, n)) -> emit_cmpimm i.arg.(0) n; ` cset {emit_reg i.res.(0)}, {emit_string (name_for_comparison cmp)}\n` | Lop(Iintop (Icheckbound)) -> let lbl = bound_error_label env i.dbg in ` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; ` b.ls {emit_label lbl}\n` | Lop(Ispecific Icheckbound_far) -> let lbl = bound_error_label env i.dbg in let lbl2 = new_label () in ` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; ` b.hi {emit_label lbl2}\n`; ` b {emit_label lbl}\n`; `{emit_label lbl2}:\n`; | Lop(Iintop_imm(Icheckbound, n)) -> let lbl = bound_error_label env i.dbg in emit_cmpimm i.arg.(0) n; ` b.ls {emit_label lbl}\n` | Lop(Ispecific(Icheckbound_imm_far { bound; })) -> let lbl = bound_error_label env i.dbg in let lbl2 = new_label () in ` cmp {emit_reg i.arg.(0)}, #{emit_int bound}\n`; ` b.hi {emit_label lbl2}\n`; ` b {emit_label lbl}\n`; `{emit_label lbl2}:\n`; | Lop(Ispecific(Ishiftcheckbound { shift; })) -> let lbl = bound_error_label env i.dbg in ` cmp {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}, lsr #{emit_int shift}\n`; ` b.cs {emit_label lbl}\n` | Lop(Ispecific(Ishiftcheckbound_far { shift; })) -> let lbl = bound_error_label env i.dbg in let lbl2 = new_label () in ` cmp {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}, lsr #{emit_int shift}\n`; ` b.lo {emit_label lbl2}\n`; ` b {emit_label lbl}\n`; `{emit_label lbl2}:\n`; | Lop(Iintop Imod) -> ` sdiv {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; ` msub {emit_reg i.res.(0)}, {emit_reg reg_tmp1}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n` | Lop(Iintop Imulh) -> ` smulh {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` | Lop(Iintop op) -> let instr = name_for_int_operation op in ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` | Lop(Iintop_imm(op, n)) -> let instr = name_for_int_operation op in ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n` | Lop(Ifloatofint | Iintoffloat | Iabsf | Inegf | Ispecific Isqrtf as op) -> let instr = (match op with | Ifloatofint -> "scvtf" | Iintoffloat -> "fcvtzs" | Iabsf -> "fabs" | Inegf -> "fneg" | Ispecific Isqrtf -> "fsqrt" | _ -> assert false) in ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n` | Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific Inegmulf as op) -> let instr = (match op with | Iaddf -> "fadd" | Isubf -> "fsub" | Imulf -> "fmul" | Idivf -> "fdiv" | Ispecific Inegmulf -> "fnmul" | _ -> assert false) in ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` | Lop(Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf as op)) -> let instr = (match op with | Imuladdf -> "fmadd" | Inegmuladdf -> "fnmadd" | Imulsubf -> "fmsub" | Inegmulsubf -> "fnmsub" | _ -> assert false) in ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}, {emit_reg i.arg.(0)}\n` | Lop(Iopaque) -> assert (i.arg.(0).loc = i.res.(0).loc) | Lop(Ispecific(Ishiftarith(op, shift))) -> let instr = (match op with Ishiftadd -> "add" | Ishiftsub -> "sub") in ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}`; if shift >= 0 then `, lsl #{emit_int shift}\n` else `, asr #{emit_int (-shift)}\n` | Lop(Ispecific(Imuladd | Imulsub as op)) -> let instr = (match op with Imuladd -> "madd" | Imulsub -> "msub" | _ -> assert false) in ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n` | Lop(Ispecific(Ibswap size)) -> begin match size with | 16 -> ` rev16 {emit_wreg i.res.(0)}, {emit_wreg i.arg.(0)}\n`; ` ubfm {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, #0, #15\n` | 32 -> ` rev {emit_wreg i.res.(0)}, {emit_wreg i.arg.(0)}\n` | 64 -> ` rev {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n` | _ -> assert false end | Lop(Ispecific(Isignext size)) -> ` sbfm {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #0, #{emit_int (size - 1)}\n` | Lop(Idls_get) -> let offset = Domainstate.(idx_of_field Domain_dls_root) * 8 in ` ldr {emit_reg i.res.(0)}, [{emit_reg reg_domain_state_ptr}, {emit_int offset}]\n` | Lop(Ireturn_addr) -> let n = frame_size env in if env.f.fun_frame_required then ` ldr {emit_reg i.res.(0)}, [sp, #{emit_int (n-8)}]\n` else ` mov {emit_reg i.res.(0)}, x30\n` | Lreloadretaddr -> () | Lreturn -> output_epilogue env begin fun () -> if not top_bits_ignore then begin (* mask the top 8 bits ourselves *) ` and x30, x30, #0x00FFFFFFFFFFFFFF\n`; end; ` ret\n` end | Llabel lbl -> `{emit_label lbl}:\n` | Lbranch lbl -> ` b {emit_label lbl}\n` | Lcondbranch(tst, lbl) -> begin match tst with | Itruetest -> ` cbnz {emit_reg i.arg.(0)}, {emit_label lbl}\n` | Ifalsetest -> ` cbz {emit_reg i.arg.(0)}, {emit_label lbl}\n` | Iinttest cmp -> ` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; let comp = name_for_comparison cmp in ` b.{emit_string comp} {emit_label lbl}\n` | Iinttest_imm(cmp, n) -> emit_cmpimm i.arg.(0) n; let comp = name_for_comparison cmp in ` b.{emit_string comp} {emit_label lbl}\n` | Ifloattest cmp -> let comp = name_for_float_comparison cmp in ` fcmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; ` b.{emit_string comp} {emit_label lbl}\n` | Ioddtest -> ` tbnz {emit_reg i.arg.(0)}, #0, {emit_label lbl}\n` | Ieventest -> ` tbz {emit_reg i.arg.(0)}, #0, {emit_label lbl}\n` end | Lcondbranch3(lbl0, lbl1, lbl2) -> ` cmp {emit_reg i.arg.(0)}, #1\n`; begin match lbl0 with None -> () | Some lbl -> ` b.lt {emit_label lbl}\n` end; begin match lbl1 with None -> () | Some lbl -> ` b.eq {emit_label lbl}\n` end; begin match lbl2 with None -> () | Some lbl -> ` b.gt {emit_label lbl}\n` end | Lswitch jumptbl -> let lbltbl = new_label() in ` adr {emit_reg reg_tmp1}, {emit_label lbltbl}\n`; ` add {emit_reg reg_tmp1}, {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, lsl #2\n`; ` br {emit_reg reg_tmp1}\n`; `{emit_label lbltbl}:`; for j = 0 to Array.length jumptbl - 1 do ` b {emit_label jumptbl.(j)}\n` done (* Alternative: let lbltbl = new_label() in ` adr {emit_reg reg_tmp1}, {emit_label lbltbl}\n`; ` ldr {emit_wreg reg_tmp2}, [{emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, lsl #2]\n`; ` add {emit_reg reg_tmp1}, {emit_wreg reg_tmp2}, sxtb\n`; ` br {emit_reg reg_tmp1}\n`; `{emit_label lbltbl}:\n`; for j = 0 to Array.length jumptbl - 1 do ` .word {emit_label jumptbl.(j)} - {emit_label lbltbl}\n` done *) | Lentertrap -> if fp then begin let delta = (frame_size env) - 16 (* return address + frame pointer *) in ` add x29, sp, #{emit_int delta}\n` end | Ladjust_trap_depth { delta_traps } -> (* each trap occupies 16 bytes on the stack *) let delta = 16 * delta_traps in cfi_adjust_cfa_offset delta; env.stack_offset <- env.stack_offset + delta | Lpushtrap { lbl_handler; } -> ` adr {emit_reg reg_tmp1}, {emit_label lbl_handler}\n`; env.stack_offset <- env.stack_offset + 16; ` stp {emit_reg reg_trap_ptr}, {emit_reg reg_tmp1}, [sp, -16]!\n`; cfi_adjust_cfa_offset 16; ` mov {emit_reg reg_trap_ptr}, sp\n` | Lpoptrap -> ` ldr {emit_reg reg_trap_ptr}, [sp], 16\n`; cfi_adjust_cfa_offset (-16); env.stack_offset <- env.stack_offset - 16 | Lraise k -> begin match k with | Lambda.Raise_regular -> ` bl {emit_symbol "caml_raise_exn"}\n`; `{record_frame env Reg.Set.empty (Dbg_raise i.dbg)}\n` | Lambda.Raise_reraise -> ` bl {emit_symbol "caml_reraise_exn"}\n`; `{record_frame env Reg.Set.empty (Dbg_raise i.dbg)}\n` | Lambda.Raise_notrace -> ` mov sp, {emit_reg reg_trap_ptr}\n`; ` ldp {emit_reg reg_trap_ptr}, {emit_reg reg_tmp1}, [sp], 16\n`; ` br {emit_reg reg_tmp1}\n` end (* Emission of an instruction sequence *) (* for debugging instr_size errors *) let emit_instr_debug env i = let lbl = new_label () in `{emit_label lbl}:\n`; emit_instr env i; let sz = Size.instr_size env.f i.desc * 4 in ` .ifne (. - {emit_label lbl}) - {emit_int sz}\n`; ` .error \"Emit.instr_size: instruction length mismatch\"\n`; ` .endif\n` let rec emit_all env lbl_start acc i = let debug = Config.with_codegen_invariants in match i.desc with | Lend -> if debug then begin (* acc measures in units of 32-bit instructions *) let sz = acc * 4 in ` .ifne (. - {emit_label lbl_start}) - {emit_int sz}\n`; ` .error \"Emit.instr_size: instruction length mismatch\"\n`; ` .endif\n`; end else () | _ -> if debug then emit_instr_debug env i else emit_instr env i; emit_all env lbl_start (acc + Size.instr_size env.f i.desc) i.next let emit_all env i = let lbl = new_label () in `{emit_label lbl}:\n`; emit_all env lbl 0 i (* Emission of a function declaration *) let fundecl fundecl = let env = mk_env fundecl in emit_named_text_section fundecl.fun_name; ` .align 3\n`; ` .globl {emit_symbol fundecl.fun_name}\n`; emit_type_directive fundecl.fun_name "%function"; (* Dynamic stack checking *) let stack_threshold_size = Config.stack_threshold * 8 in (* bytes *) let max_frame_size = frame_size env + fundecl.fun_extra_stack_used in let handle_overflow, stack_check_size = if fundecl.fun_contains_nontail_calls || max_frame_size >= stack_threshold_size then begin let overflow = new_label () in `{emit_label overflow}:\n`; (* Pass the desired frame size on the stack, since all of the argument-passing registers may be in use. *) let s = (Config.stack_threshold + max_frame_size / 8) in ` mov {emit_reg reg_tmp1}, #{emit_int s}\n`; ` stp {emit_reg reg_tmp1}, x30, [sp, #-16]!\n`; ` bl {emit_symbol "caml_call_realloc_stack"}\n`; ` ldp {emit_reg reg_tmp1}, x30, [sp], #16\n`; (* fall through function entry point *) Some overflow, 5 end else None, 0 in `{emit_symbol fundecl.fun_name}:\n`; emit_debug_info fundecl.fun_dbg; cfi_startproc(); begin match handle_overflow with | None -> () | Some overflow -> let threshold_offset = Domainstate.stack_ctx_words * 8 + stack_threshold_size in let f = max_frame_size + threshold_offset in let offset = Domainstate.(idx_of_field Domain_current_stack) * 8 in ` ldr {emit_reg reg_tmp1}, [{emit_reg reg_domain_state_ptr}, #{emit_int offset}]\n`; emit_addimm reg_tmp1 reg_tmp1 f; ` cmp sp, {emit_reg reg_tmp1}\n`; ` bcc {emit_label overflow}\n` end; let num_call_gc, num_check_bound = num_call_gc_and_check_bound_points env in let max_out_of_line_code_offset = stack_check_size + max_out_of_line_code_offset ~num_call_gc ~num_check_bound in BR.relax fundecl ~max_out_of_line_code_offset; emit_all env fundecl.fun_body; List.iter emit_call_gc env.call_gc_sites; List.iter emit_call_bound_error env.bound_error_sites; assert (List.length env.call_gc_sites = num_call_gc); assert (List.length env.bound_error_sites = num_check_bound); cfi_endproc(); emit_type_directive fundecl.fun_name "%function"; emit_size_directive fundecl.fun_name; emit_literals env (* Emission of data *) let emit_item = function | Cglobal_symbol s -> ` .globl {emit_symbol s}\n`; | Cdefine_symbol s -> if !Clflags.dlcode then begin (* GOT relocations against non-global symbols don't seem to work properly: GOT entries are not created for the symbols and the relocations evaluate to random other GOT entries. For the moment force all symbols to be global. *) ` .globl {emit_symbol s}\n`; end; `{emit_symbol s}:\n` | Cint8 n -> ` .byte {emit_int n}\n` | Cint16 n -> ` .short {emit_int n}\n` | Cint32 n -> ` .long {emit_nativeint n}\n` | Cint n -> ` .quad {emit_nativeint n}\n` | Csingle f -> emit_float32_directive ".long" (Int32.bits_of_float f) | Cdouble f -> emit_float64_directive ".quad" (Int64.bits_of_float f) | Csymbol_address s -> ` .quad {emit_symbol s}\n` | Cstring s -> emit_string_directive " .ascii " s | Cskip n -> if n > 0 then ` .space {emit_int n}\n` | Calign n -> ` .align {emit_int(Misc.log2 n)}\n` let data l = ` .data\n`; ` .align 3\n`; List.iter emit_item l (* Beginning / end of an assembly file *) let begin_assembly() = reset_debug_info(); ` .file \"\"\n`; (* PR#7037 *) let lbl_begin = Compilenv.make_symbol (Some "data_begin") in ` .data\n`; ` .globl {emit_symbol lbl_begin}\n`; `{emit_symbol lbl_begin}:\n`; let lbl_begin = Compilenv.make_symbol (Some "code_begin") in emit_named_text_section lbl_begin; ` .globl {emit_symbol lbl_begin}\n`; `{emit_symbol lbl_begin}:\n`; (* we need to pad here to avoid collision for the unwind test between the code_begin symbol and the first function. (See also #4690) Alignment is needed to avoid linker warnings for shared_startup__code_{begin,end} (e.g. tests/lib-dynlink-pr4839). *) if macosx then begin ` nop\n`; ` .align 3\n` end; () let end_assembly () = let lbl_end = Compilenv.make_symbol (Some "code_end") in emit_named_text_section lbl_end; ` .globl {emit_symbol lbl_end}\n`; `{emit_symbol lbl_end}:\n`; let lbl_end = Compilenv.make_symbol (Some "data_end") in ` .data\n`; ` .quad 0\n`; (* PR#6329 *) ` .globl {emit_symbol lbl_end}\n`; `{emit_symbol lbl_end}:\n`; ` .quad 0\n`; ` .align 3\n`; (* #7887 *) let lbl = Compilenv.make_symbol (Some "frametable") in ` .globl {emit_symbol lbl}\n`; `{emit_symbol lbl}:\n`; emit_frames { efa_code_label = (fun lbl -> emit_label_type lbl "%function"; ` .quad {emit_label lbl}\n`); efa_data_label = (fun lbl -> emit_label_type lbl "%object"; ` .quad {emit_label lbl}\n`); efa_8 = (fun n -> ` .byte {emit_int n}\n`); efa_16 = (fun n -> ` .short {emit_int n}\n`); efa_32 = (fun n -> ` .long {emit_int32 n}\n`); efa_word = (fun n -> ` .quad {emit_int n}\n`); efa_align = (fun n -> ` .align {emit_int(Misc.log2 n)}\n`); efa_label_rel = (fun lbl ofs -> ` .long {emit_label lbl} - . + {emit_int32 ofs}\n`); efa_def_label = (fun lbl -> `{emit_label lbl}:\n`); efa_string = (fun s -> emit_string_directive " .asciz " s) }; emit_type_directive lbl "%object"; emit_size_directive lbl; emit_nonexecstack_note ()