How to get environment variables without syscalls or linking with libc in x64 assembly

Working with command line arguments or environment variables is always a question when it comes to writing CLI tools in x64 assembly. We already know that it is rather trivial to check the command line arguments and well documented, but how do we get environment variables? Here is some x64 assembly code that skips passed argc, argv[x] and prints each environment variable on a separate line.

# ---------- DATA ----------

# These are created for the print_str helper
.set STDOUT, 1
.set __NR_write, 1

newline:
     .asciz "\n"

# ---------- CODE ----------  
     .globl _start
     .text

_start:
    # The OS creates a stack frame for us, pushes command line arguments
    # and environment variables onto the stack. Then registers are setup with 
    # %rsp pointing to the top of the stack.
    #
    # So now we can know that (%rsp) is our argc and then an 8 byte 
    # displacement of (%rsp) will give us argv[0]
    #
    # (%rsp)  contains argc
    # 8(%rsp) argv[0]
    movq      (%rsp), %rdi

    # To get past argv[x] + NULL and point to envp[0] we do leaq to load %rdx
    #
    # Why displacement of 16? Remember after argv[argc-1] there is a NULL
    # and finally envp[0]
    #
    # So if we have our base %rsp with argc loaded into %rdi we are pointing to
    # the last argument we need to skip argc and NULL to get to envp[0]. If we 
    # just did 8(%rsp) or 16(%rsp) we would be pointing to argv[0] and argv[1]
    leaq      16(%rsp, %rdi, 8), %rdx
    #          |    |     |  |
    #         DISP  |     |  |
    #              BASE   |  |
    #                  INDEX |
    #                       SCALE
    #
    # Base is pointing to the top of the stack, %rdi contains argument count 
    # and our scale is 8 bytes. So we have (8*argc+16)(%rsp)
    # 
    # Now %rdx is pointing to envp[0]

1:
    # Here we load the address of the current envp string into %rdi. Remember
    # we are getting a pointer to the envp string. We test if it's null if that
    # equates to true we exit, else the address is loaded in %rdi and we 
    # print_str the string pointed to in %rdi to stdout. 
    # 
    # We increment %rdx which is pointing to an address that holds an envp
    # string and since we are on a 64 bit system where each address is 8 bytes 
    # long. We are incrementing to the next address that points to an envp 
    # string.
    movq      (%rdx), %rdi
    testq     %rdi, %rdi    # Have we reached the end of envp, NULL?
    jz        1f
    call      print_str
    leaq      newline, %rdi
    call      print_str
    addq      $8, %rdx      # Increment %rdx to the next string address of envp
    jmp       1b

1:
    movq      $60, %rax
    xorq      %rdi, %rdi
    syscall

I have documented the code with comments to explain how we are getting the environment variables. It should be noted in GNU assembler we are using local labels where we can reference the numerical label with 1f (forward) to reference the 1 label forward from the current instruction or 1b (backward) to reference the label 1 backward from the current direction.

Lastly, we have the print_str helper function that will print a string that %rdi is pointing to. We use this to print the environment variable strings.


# Print a string to STDOUT = 1
# Input:
#   %rdi holds the address of the string
#
# We need to find the length of the string first and then print using
# syscall __NR_write (sys_write) 
print_str:
    push      %rcx
    push      %rax
    push      %rdx

    xor       %rcx, %rcx
.L_strlen:
    movb      (%rdi, %rcx), %al
    test      %al, %al
    jz        .L_write
    inc       %rcx
    jmp       .L_strlen
.L_write:
    # At this point %rcx holds the length of the null terminated string
    mov       %rcx, %rdx
    mov       %rdi, %rsi
    mov       $STDOUT, %rdi
    mov       $__NR_write, %rax
    syscall

    pop       %rdx
    pop       %rax
    pop       %rcx
    ret