What is a union in C?

#include <stdio.h>
#include <stddef.h>
#include <assert.h>

// Example `union` in C
union int_or_char {
  int i;
  char c;
};

// Compare it to this to the similar `struct`
struct int_and_char {
  int i;
  char c;
};

int main(void) {
  // The struct makes room for both fields:
  assert(sizeof(struct int_and_char) >= sizeof(int) + sizeof(char));

  // But the union has a weaker constraint. It only makes room for each field individually:
  assert(sizeof(union int_or_char) >= sizeof(int));
  assert(sizeof(union int_or_char) >= sizeof(char));

  // Let's see the real sizes my compiler chooses.
  printf("sizeof(int) = %zu\n", sizeof(int));
  printf("sizeof(char) = %zu\n", sizeof(char));
  printf("sizeof(struct int_and_char) = %zu\n", sizeof(struct int_and_char));
  printf("sizeof(union int_or_char) = %zu\n", sizeof(union int_or_char));

  // This prints:
  //
  //   sizeof(int) = 4
  //   sizeof(char) = 1
  //   sizeof(struct int_and_char) = 8
  //   sizeof(union int_or_char) = 4
  //
  // The struct must be at least 4+1=5 bytes. My compiler chooses to pad the struct to 8 bytes.
  // The union must be at least max(4,1)=4 bytes. My compiler does no padding.

  // Now let's make some!
  struct int_and_char iac;
  union int_or_char ioc;

  // We already know how structs work:
  iac.i = 42;
  iac.c = 'c';
  printf("iac.i = %d; iac.c = '%c'\n", iac.i, iac.c);
  // Prints: iac.i = 42; iac.c = 'c'

  // Now let's try the same with the union:
  ioc.i = 42;
  ioc.c = 'c';
  printf("ioc.i = %d; ioc.c = '%c'\n", ioc.i, ioc.c);

  // This prints: ioc.i = 99; ioc.c = 'c'

  // Wait, where did 99 come from?! We need to understand how fields correspond
  // to byte offsets. Struct fields have a fixed byte offset. We can print out
  // the offsets:

  printf("Offset of iac.i = %ld\n", (void*)&iac.i - (void*)&iac);
  printf("Offset of iac.c = %ld\n", (void*)&iac.c - (void*)&iac);
  // Prints:
  //   Offset of iac.i = 0
  //   Offset of iac.c = 4

  // Union fields also have a fixed byte offset. Let's print those, too:
  printf("Offset of ioc.i = %ld\n", (void*)&ioc.i - (void*)&ioc);
  printf("Offset of ioc.c = %ld\n", (void*)&ioc.c - (void*)&ioc);
  // Prints:
  //   Offset of ioc.i = 0
  //   Offset of ioc.c = 0

  // Both offsets are 0! This explains the weird result above: by doing `ioc.c = 'c'`,
  // we wrote over part of our integer! We can see this in more detail by printing the bytes:

  unsigned char * bytes = (unsigned char *) &ioc;
  printf("Bytes of ioc: %d %d %d %d\n", bytes[0], bytes[1], bytes[2], bytes[3]);
  // Prints:
  //   Bytes of ioc: 99 0 0 0

  // By doing `ioc.c = 'c'`, we set the first byte to `99` (the ASCII code for 'c').
  // The integer `i` is then interpreted as `99` (since my x86 machine uses an
  // LSB representation).

  // By the way, there's a nicer way of getting field offsets without declaring
  // a value of that type: the `offsetof` macro, from stddef.h.
  printf("offsetof(struct int_and_char, c) = %zu\n", offsetof(struct int_and_char, c));
  // Prints: offsetof(struct int_and_char, c) = 4

  // You may wonder whether union field offsets are guaranteed to be 0. The
  // answer is YES:
  //
  //     "... A pointer to a union object, suitably converted, points to each of
  //     its members (or if a member is a bit-field, then to the unit in which
  //     it resides), and vice versa."
  //
  //                  — ANSI/ISO 9899:1990 (the ANSI C standard) Section 6.5.2.1

  return 0;
}
Tagged #c, #programming, #data-structures, #semantics.
👋 I'm Jim, a full-stack product engineer. Want to build an amazing product and a profitable business? Read more about me or Get in touch!

More by Jim

This page copyright James Fisher 2017. Content is not associated with my employer. Found an error? Edit this page.