Re: powerpc _tls_get_addr call optimization

View: New views
1 Messages — Rating Filter:   Alert me  

Re: powerpc _tls_get_addr call optimization

by Alan Modra :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Ping http://sourceware.org/ml/libc-alpha/2009-03/msg00053.html

Updated diff against current sources follows, with one additional
file to define __tls_get_addr_opt.  If there is a preferred method to
define an alias for __tls_get_addr, please let me know.

2009-03-30  Alan Modra  <amodra@...>

        * elf/dl-open.c (dl_open_worker): Delay running _dl_update_slotinfo
        until all loaded modules have called _dl_add_to_slotinfo, and then
        only call it once.
        * elf/elf.h (R_PPC_TLSGD, R_PPC_TLSLD, DT_PPC_TLSOPT): Define.
        (R_PPC64_TLSGD, R_PPC64_TLSLD, DT_PPC64_TLSOPT): Define.
        (DT_PPC_NUM, DT_PPC64_NUM): Increment.
        * sysdeps/powerpc/powerpc32/dl-machine.h (elf_machine_rela): Optimize
        TLS descriptors.
        * sysdeps/powerpc/powerpc64/dl-machine.h (elf_machine_rela): Likewise.
        * sysdeps/powerpc/dl-tls.c: New file.

diff --git a/elf/dl-open.c b/elf/dl-open.c
index e920c77..0038567 100644
--- a/elf/dl-open.c
+++ b/elf/dl-open.c
@@ -374,7 +374,7 @@ dl_open_worker (void *a)
 
   /* If the file is not loaded now as a dependency, add the search
      list of the newly loaded object to the scope.  */
-  bool any_tls = false;
+  struct link_map *any_tls = NULL;
   for (unsigned int i = 0; i < new->l_searchlist.r_nlist; ++i)
     {
       struct link_map *imap = new->l_searchlist.r_list[i];
@@ -466,27 +466,37 @@ dl_open_worker (void *a)
       if (! RTLD_SINGLE_THREAD_P && imap->l_tls_modid > DTV_SURPLUS)
  _dl_signal_error (0, "dlopen", NULL, N_("\
 cannot load any more object with static TLS"));
+    }
 
-      imap->l_need_tls_init = 0;
+  any_tls = imap;
+ }
+    }
+
+  if (any_tls != NULL)
+    {
 #ifdef SHARED
-      /* Update the slot information data for at least the
- generation of the DSO we are allocating data for.  */
-      _dl_update_slotinfo (imap->l_tls_modid);
+      /* Update the slot information data for at least the
+ generation of the DSO we are allocating data for.  */
+      _dl_update_slotinfo (any_tls->l_tls_modid);
 #endif
 
+      for (unsigned int i = 0; i < new->l_searchlist.r_nlist; ++i)
+ {
+  struct link_map *imap = new->l_searchlist.r_list[i];
+
+  if (!imap->l_init_called && imap->l_need_tls_init)
+    {
+      imap->l_need_tls_init = 0;
       GL(dl_init_static_tls) (imap);
       assert (imap->l_need_tls_init == 0);
     }
-
-  /* We have to bump the generation counter.  */
-  any_tls = true;
  }
-    }
 
-  /* Bump the generation number if necessary.  */
-  if (any_tls && __builtin_expect (++GL(dl_tls_generation) == 0, 0))
-    _dl_fatal_printf (N_("\
+      /* Bump the generation number.  */
+      if (__builtin_expect (++GL(dl_tls_generation) == 0, 0))
+ _dl_fatal_printf (N_("\
 TLS generation counter wrapped!  Please report this."));
+    }
 
   /* Run the initializer functions of new objects.  */
   _dl_init (new, args->argc, args->argv, args->env);
diff --git a/elf/elf.h b/elf/elf.h
index c772ff4..e3bce36 100644
--- a/elf/elf.h
+++ b/elf/elf.h
@@ -2040,6 +2040,8 @@ typedef Elf32_Addr Elf32_Conflict;
 #define R_PPC_GOT_DTPREL16_LO 92 /* half16* (sym+add)@got@dtprel@l */
 #define R_PPC_GOT_DTPREL16_HI 93 /* half16* (sym+add)@got@dtprel@h */
 #define R_PPC_GOT_DTPREL16_HA 94 /* half16* (sym+add)@got@dtprel@ha */
+#define R_PPC_TLSGD 95 /* none (sym+add)@tlsgd */
+#define R_PPC_TLSLD 96 /* none (sym+add)@tlsld */
 
 /* The remaining relocs are from the Embedded ELF ABI, and are not
    in the SVR4 ELF ABI.  */
@@ -2083,7 +2085,8 @@ typedef Elf32_Addr Elf32_Conflict;
 
 /* PowerPC specific values for the Dyn d_tag field.  */
 #define DT_PPC_GOT (DT_LOPROC + 0)
-#define DT_PPC_NUM 1
+#define DT_PPC_TLSOPT (DT_LOPROC + 1)
+#define DT_PPC_NUM 2
 
 /* PowerPC64 relocations defined by the ABIs */
 #define R_PPC64_NONE R_PPC_NONE
@@ -2196,6 +2199,8 @@ typedef Elf32_Addr Elf32_Conflict;
 #define R_PPC64_DTPREL16_HIGHERA 104 /* half16 (sym+add)@dtprel@highera */
 #define R_PPC64_DTPREL16_HIGHEST 105 /* half16 (sym+add)@dtprel@highest */
 #define R_PPC64_DTPREL16_HIGHESTA 106 /* half16 (sym+add)@dtprel@highesta */
+#define R_PPC64_TLSGD 107 /* none (sym+add)@tlsgd */
+#define R_PPC64_TLSLD 108 /* none (sym+add)@tlsld */
 
 /* GNU extension to support local ifunc.  */
 #define R_PPC64_JMP_IREL 247
@@ -2209,7 +2214,8 @@ typedef Elf32_Addr Elf32_Conflict;
 #define DT_PPC64_GLINK  (DT_LOPROC + 0)
 #define DT_PPC64_OPD (DT_LOPROC + 1)
 #define DT_PPC64_OPDSZ (DT_LOPROC + 2)
-#define DT_PPC64_NUM    3
+#define DT_PPC64_TLSOPT (DT_LOPROC + 3)
+#define DT_PPC64_NUM    4
 
 
 /* ARM specific declarations */
diff --git a/sysdeps/powerpc/dl-tls.c b/sysdeps/powerpc/dl-tls.c
new file mode 100644
index 0000000..f61e91c
--- /dev/null
+++ b/sysdeps/powerpc/dl-tls.c
@@ -0,0 +1,24 @@
+/* Thread-local storage handling in the ELF dynamic linker.  PowerPC version.
+   Copyright (C) 2009 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "elf/dl-tls.c"
+
+#ifdef SHARED
+strong_alias(__tls_get_addr, __tls_get_addr_opt)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/dl-machine.h b/sysdeps/powerpc/powerpc32/dl-machine.h
index 6f8d0f5..df5d377 100644
--- a/sysdeps/powerpc/powerpc32/dl-machine.h
+++ b/sysdeps/powerpc/powerpc32/dl-machine.h
@@ -343,6 +343,31 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
 # endif
 
     case R_PPC_DTPMOD32:
+      if (map->l_info[DT_PPC(TLSOPT)] != 0)
+ {
+  if (!NOT_BOOTSTRAP)
+    {
+      reloc_addr[0] = 0;
+      reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET
+       + TLS_DTV_OFFSET);
+      break;
+    }
+  else if (sym_map != NULL)
+    {
+# ifndef SHARED
+      CHECK_STATIC_TLS (map, sym_map);
+# else
+      if (TRY_STATIC_TLS (map, sym_map))
+# endif
+ {
+  reloc_addr[0] = 0;
+  /* Set up for local dynamic.  */
+  reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET
+   + TLS_DTV_OFFSET);
+  break;
+ }
+    }
+ }
       if (!NOT_BOOTSTRAP)
  /* During startup the dynamic linker is always index 1.  */
  *reloc_addr = 1;
@@ -352,6 +377,26 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
  *reloc_addr = sym_map->l_tls_modid;
       break;
     case R_PPC_DTPREL32:
+      if (map->l_info[DT_PPC(TLSOPT)] != 0)
+ {
+  if (!NOT_BOOTSTRAP)
+    {
+      *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc);
+      break;
+    }
+  else if (sym_map != NULL)
+    {
+# ifndef SHARED
+      CHECK_STATIC_TLS (map, sym_map);
+# else
+      if (TRY_STATIC_TLS (map, sym_map))
+# endif
+ {
+  *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc);
+  break;
+ }
+    }
+ }
       /* During relocation all TLS symbols are defined and used.
  Therefore the offset is already correct.  */
       if (NOT_BOOTSTRAP && sym_map != NULL)
diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h
index 8a720ae..401cd15 100644
--- a/sysdeps/powerpc/powerpc64/dl-machine.h
+++ b/sysdeps/powerpc/powerpc64/dl-machine.h
@@ -611,6 +611,31 @@ elf_machine_rela (struct link_map *map,
 
 #if !defined RTLD_BOOTSTRAP || USE___THREAD
     case R_PPC64_DTPMOD64:
+      if (map->l_info[DT_PPC64(TLSOPT)] != 0)
+ {
+# ifdef RTLD_BOOTSTRAP
+  reloc_addr[0] = 0;
+  reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET
+   + TLS_DTV_OFFSET);
+  return;
+# else
+  if (sym_map != NULL)
+    {
+#  ifndef SHARED
+      CHECK_STATIC_TLS (map, sym_map);
+#  else
+      if (TRY_STATIC_TLS (map, sym_map))
+#  endif
+ {
+  reloc_addr[0] = 0;
+  /* Set up for local dynamic.  */
+  reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET
+   + TLS_DTV_OFFSET);
+  return;
+ }
+    }
+# endif
+ }
 # ifdef RTLD_BOOTSTRAP
       /* During startup the dynamic linker is always index 1.  */
       *reloc_addr = 1;
@@ -623,6 +648,26 @@ elf_machine_rela (struct link_map *map,
       return;
 
     case R_PPC64_DTPREL64:
+      if (map->l_info[DT_PPC64(TLSOPT)] != 0)
+ {
+# ifdef RTLD_BOOTSTRAP
+  *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc);
+  return;
+# else
+  if (sym_map != NULL)
+    {
+#  ifndef SHARED
+      CHECK_STATIC_TLS (map, sym_map);
+#  else
+      if (TRY_STATIC_TLS (map, sym_map))
+#  endif
+ {
+  *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc);
+  return;
+ }
+    }
+# endif
+ }
       /* During relocation all TLS symbols are defined and used.
          Therefore the offset is already correct.  */
 # ifndef RTLD_BOOTSTRAP

--
Alan Modra
Australia Development Lab, IBM