This is the mail archive of the gdb-patches@sourceware.org mailing list for the GDB project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH v4] C++ify gdb/common/environ.c


On Friday, June 16 2017, Pedro Alves wrote:

> On 06/14/2017 08:22 PM, Sergio Durigan Junior wrote:
>
>> diff --git a/gdb/Makefile.in b/gdb/Makefile.in
>> index 5e5fcaa..133db1a 100644
>> --- a/gdb/Makefile.in
>> +++ b/gdb/Makefile.in
>> @@ -530,14 +530,16 @@ SUBDIR_UNITTESTS_SRCS = \
>>  	unittests/offset-type-selftests.c \
>>  	unittests/optional-selftests.c \
>>  	unittests/ptid-selftests.c \
>> -	unittests/scoped_restore-selftests.c
>> +	unittests/scoped_restore-selftests.c \
>> +	unittests/environ-selftests.c
>
> Please keep the list sorted.
>
> (I'm guilty of missing that before too.)

Done.

>>  
>>  SUBDIR_UNITTESTS_OBS = \
>>  	function-view-selftests.o \
>>  	offset-type-selftests.o \
>>  	optional-selftests.o \
>>  	ptid-selftests.o \
>> -	scoped_restore-selftests.o
>> +	scoped_restore-selftests.o \
>> +	environ-selftests.o
>
> Ditto.

Done.

>> diff --git a/gdb/common/environ.c b/gdb/common/environ.c
>> index 3145d01..657f2e0 100644
>> --- a/gdb/common/environ.c
>> +++ b/gdb/common/environ.c
>> @@ -18,165 +18,102 @@
>>  #include "common-defs.h"
>>  #include "environ.h"
>>  #include <algorithm>
>> -
>> +#include <utility>
>>  
>> -/* Return a new environment object.  */
>> +/* See common/environ.h.  */
>>  
>> -struct gdb_environ *
>> -make_environ (void)
>> +gdb_environ::gdb_environ ()
>>  {
>> -  struct gdb_environ *e;
>> +}
>>  
>> -  e = XNEW (struct gdb_environ);
>> +/* See common/environ.h.  */
>>  
>> -  e->allocated = 10;
>> -  e->vector = (char **) xmalloc ((e->allocated + 1) * sizeof (char *));
>> -  e->vector[0] = 0;
>> -  return e;
>> +gdb_environ::~gdb_environ ()
>> +{
>> +  clear ();
>>  }
>>  
>> -/* Free an environment and all the strings in it.  */
>> +/* See common/environ.h.  */
>>  
>> -void
>> -free_environ (struct gdb_environ *e)
>> +gdb_environ::gdb_environ (gdb_environ &&e)
>> +  : m_environ_vector (std::move (e.m_environ_vector))
>>  {
>> -  char **vector = e->vector;
>> +}
>>  
>> -  while (*vector)
>> -    xfree (*vector++);
>> +/* See common/environ.h.  */
>>  
>> -  xfree (e->vector);
>> -  xfree (e);
>> +gdb_environ &
>> +gdb_environ::operator= (gdb_environ &&e)
>> +{
>> +  clear ();
>> +  m_environ_vector = std::move (e.m_environ_vector);
>> +  return *this;
>>  }
>>  
>> -/* Copy the environment given to this process into E.
>> -   Also copies all the strings in it, so we can be sure
>> -   that all strings in these environments are safe to free.  */
>> +/* See common/environ.h.  */
>>  
>>  void
>> -init_environ (struct gdb_environ *e)
>> +gdb_environ::clear ()
>>  {
>> -  extern char **environ;
>> -  int i;
>> -
>> -  if (environ == NULL)
>> -    return;
>> -
>> -  for (i = 0; environ[i]; i++) /*EMPTY */ ;
>> +  for (char *v : m_environ_vector)
>> +    xfree (v);
>> +  m_environ_vector.clear ();
>> +}
>>  
>> -  if (e->allocated < i)
>> -    {
>> -      e->allocated = std::max (i, e->allocated + 10);
>> -      e->vector = (char **) xrealloc ((char *) e->vector,
>> -				      (e->allocated + 1) * sizeof (char *));
>> -    }
>> +/* See common/environ.h.  */
>>  
>> -  memcpy (e->vector, environ, (i + 1) * sizeof (char *));
>> +const char *
>> +gdb_environ::get (const std::string &var) const
>> +{
>
> Does this need to be a std::string instead of "const char *"?
> Callers always pass a string literal down, so this is going to
> force a deep string dup for no good reason, AFAICS.

It doesn't.  Changed to const char *.

>
>> +  size_t len = var.size ();
>> +  const char *var_str = var.c_str ();
>>  
>> -  while (--i >= 0)
>> -    {
>> -      int len = strlen (e->vector[i]);
>> -      char *newobj = (char *) xmalloc (len + 1);
>> +  for (char *el : m_environ_vector)
>> +    if (el != NULL && strncmp (el, var_str, len) == 0 && el[len] == '=')
>> +      return &el[len + 1];
>>  
>> -      memcpy (newobj, e->vector[i], len + 1);
>> -      e->vector[i] = newobj;
>> -    }
>> +  return NULL;
>>  }
>
>> -char *
>> -get_in_environ (const struct gdb_environ *e, const char *var)
>> +void
>> +gdb_environ::set (const std::string &var, const std::string &value)
>
> Ditto.

Likewise.

>>  {
>> -  int len = strlen (var);
>> -  char **vector = e->vector;
>> -  char *s;
>> -
>> -  for (; (s = *vector) != NULL; vector++)
>> -    if (strncmp (s, var, len) == 0 && s[len] == '=')
>> -      return &s[len + 1];
>> +  /* We have to unset the variable in the vector if it exists.  */
>> +  unset (var);
>>  
>> -  return 0;
>> +  /* Insert the element before the last one, which is always NULL.  */
>> +  m_environ_vector.insert (m_environ_vector.end () - 1,
>> +			   concat (var.c_str (), "=",
>> +				   value.c_str (), NULL));
>>  }
>>  
>> -/* Store the value in E of VAR as VALUE.  */
>> +/* See common/environ.h.  */
>>  
>>  void
>> -set_in_environ (struct gdb_environ *e, const char *var, const char *value)
>> +gdb_environ::unset (const std::string &var)
>
> Ditto.

Likewise.

>
>> -
>> -  return;
>> +  std::string match = var + '=';
>> +  const char *match_str = match.c_str ();
>> +
>> +  /* We iterate until '.cend () - 1' because the last element is
>> +     always NULL.  */
>> +  for (std::vector<char *>::const_iterator el = m_environ_vector.cbegin ();
>> +       el != m_environ_vector.cend () - 1;
>> +       ++el)
>> +    if (startswith (*el, match_str))
>
> In gdb_environ::set you used:

I assume you meant gdb_environ::get, right?

>
>     strncmp (el, var_str, len) == 0 && el[len] == '='
>
> It'd be better if places used the same matching code.  Maybe even put
> this in a separate helper function.  The gdb_environ::set version
> looks better to me for avoiding temporary heap-allocated strings.

Right, done.

>
>> -/* Remove the setting for variable VAR from environment E.  */
>> +/* See common/environ.h.  */
>>  
>> -void
>> -unset_in_environ (struct gdb_environ *e, const char *var)
>> +char **
>> +gdb_environ::get_char_vector () const
>
> So far, getters in gdb's classes don't have a "get_" prefix.
> (except "get()" or course, but that's really a getter in
> the same sense.)  Can we drop it here?  Like:

Yeah, sure.  Simon made this observation in a previous review about the
other methods, but I thought it'd make sense to keep the "get_" prefix
for this specific one.

>
>  char **gdb_environ::char_vector () const
>
> Though I'd rename it like this instead:
>
>  char ** gdb_environ::envp () const
>
> Because that's what the env vector is traditionally called, e.g.,
> from "man 2 execve":
>
>      int execve(const char *filename, char *const argv[],
>                   char *const envp[]);
>
>      int main(int argc, char *argv[], char *envp[])
>
> Likewise I'd use that name for local variables where
> we call gdb_environ::get_char_vector, just to follow
> traditional terminology throughout.

OK, fair enough.  I'll rename it to envp then.  There's just one place
where we assign the return of gdb_environ::get_char_vector to a local
variable (in infcmd.c), so I renamed it accordingly.

>> +/* Class that represents the environment variables as seen by the
>> +   inferior.  */
>> +
>> +class gdb_environ
>> +{
>> +public:
>> +  /* Regular constructor and destructor.  */
>> +  gdb_environ ();
>> +  ~gdb_environ ();
>> +
>> +  /* Move constructor.  */
>> +  gdb_environ (gdb_environ &&e);
>> +
>> +  /* Move assignment.  */
>> +  gdb_environ &operator= (gdb_environ &&e);
>> +
>> +  /* Create a gdb_environ object using the host's environment
>> +     variables.  */
>> +  static gdb_environ from_host_environ ()
>>    {
>
> Nit: I find it a bit odd that the ctors/dtors are short but 
> defined out of line, while this function is defined inline.
> If I was looking at controlling what the compiler could inline,
> then I'd do it the other way around -- small ctor/dtor in
> the header, and this larger function out of line in the .c file.

Question: if I define a method inside the class, does this implicitly
tell the compiler that I want to inline it, as oppose to defining the
method outside?

I'll follow your advice and define the short ctors inside the class, and
move the definition of from_host_environ to the C file.

>> -    /* Number of usable slots allocated in VECTOR.
>> -       VECTOR always has one slot not counted here,
>> -       to hold the terminating zero.  */
>> -    int allocated;
>> -    /* A vector of slots, ALLOCATED + 1 of them.
>> -       The first few slots contain strings "VAR=VALUE"
>> -       and the next one contains zero.
>> -       Then come some unused slots.  */
>> -    char **vector;
>> -  };
>> +    extern char **environ;
>> +    gdb_environ e;
>> +
>> +    if (environ == NULL)
>> +      return e;
>> +
>> +    for (int i = 0; environ[i] != NULL; ++i)
>> +      e.m_environ_vector.push_back (xstrdup (environ[i]));
>> +
>> +    /* The last element of the vector is always going to be NULL.  */
>> +    e.m_environ_vector.push_back (NULL);
>>  
>> -extern struct gdb_environ *make_environ (void);
>> +    return e;
>> +  }
>
>>    run_target->to_create_inferior (run_target, exec_file,
>>  				  std::string (get_inferior_args ()),
>> -				  environ_vector (current_inferior ()->environment),
>> +				  current_inferior ()
>> +				  ->environment.get_char_vector (),
>>  				  from_tty);
>> @@ -270,7 +270,6 @@ mi_cmd_inferior_tty_show (const char *command, char **argv, int argc)
>>  void 
>>  _initialize_mi_cmd_env (void)
>>  {
>> -  struct gdb_environ *environment;
>>    const char *env;
>>  
>>    /* We want original execution path to reset to, if desired later.
>> @@ -278,13 +277,10 @@ _initialize_mi_cmd_env (void)
>>       current_inferior ()->environment.  Also, there's no obvious
>>       place where this code can be moved such that it surely run
>>       before any code possibly mangles original PATH.  */
>> -  environment = make_environ ();
>> -  init_environ (environment);
>> -  env = get_in_environ (environment, path_var_name);
>> +  env = getenv (path_var_name);
>>  
>>    /* Can be null if path is not set.  */
>>    if (!env)
>>      env = "";
>>    orig_path = xstrdup (env);
>> -  free_environ (environment);
>>  }
>
> Please split this change to a separate patch.  Don't we need to
> update the comment just above?

Sure, I'll split and send a separate patch.  And yeah, the comment needs
updating, thanks for noticing that.

>
>> diff --git a/gdb/unittests/environ-selftests.c b/gdb/unittests/environ-selftests.c
>> new file mode 100644
>> index 0000000..948eacf
>> --- /dev/null
>> +++ b/gdb/unittests/environ-selftests.c
>> @@ -0,0 +1,79 @@
>> +/* Self tests for gdb_environ for GDB, the GNU debugger.
>> +
>> +   Copyright (C) 2017 Free Software Foundation, Inc.
>> +
>> +   This file is part of GDB.
>> +
>> +   This program is free software; you can redistribute it and/or modify
>> +   it under the terms of the GNU General Public License as published by
>> +   the Free Software Foundation; either version 3 of the License, or
>> +   (at your option) any later version.
>> +
>> +   This program is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> +   GNU General Public License for more details.
>> +
>> +   You should have received a copy of the GNU General Public License
>> +   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
>> +
>> +#include "defs.h"
>> +#include "selftest.h"
>> +#include "common/environ.h"
>> +
>> +namespace selftests {
>> +namespace environ {
>
> "environ" as an identifier will be problematic.  Please
> pick some other name here.
>
> On some hosts "environ" is a define.  E.g., on my Fedora's
> mingw-w64 install I see:
>
>  /usr/x86_64-w64-mingw32/sys-root/mingw/include/stdlib.h:624:#define environ _environ
>
> and gnulib has too:
>
>  $ srcgrep -rn environ gnulib/import/ | grep define
>  gnulib/import/extra/snippet/warn-on-use.h:61:   # define environ (*rpl_environ ())
>  gnulib/import/unistd.in.h:411:#   define environ (*_NSGetEnviron ())
>  gnulib/import/unistd.in.h:432:#  define environ (*rpl_environ ())
>
> I don't think "namespace (*_NSGetEnviron ())" would compile.  :-)

Hah, right :-).  I'll pick "namespace gdb_environ" then.

>
>> +
>> +static void
>> +run_tests ()
>> +{
>> +  if (setenv ("GDB_SELFTEST_ENVIRON", "1", 1) != 0)
>> +    error ("Could not set environment variable for testing.");
>> +
>> +  gdb_environ env;
>> +
>
> Please add a test that that checks that get_char_vector() on an
> empty gdb_environ works as expected.  I.e., something like:
>
>    gdb_environ env;
>    SELF_CHECK (env.get_char_vector()[0] == NULL);

Hm, right, makes sense.  I'll add that.

> AFAICS from:
>
>  gdb_environ::gdb_environ ()
>  {
>  }
>
>  char **
>  gdb_environ::get_char_vector () const
>  {
>    return const_cast<char **> (&m_environ_vector[0]);
>  }
>
> we end up with a bogus envp, because it points at
> m_environ_vector.end(), which is not a valid pointer
> to dereference.  Even ignoring that, it does not point
> to NULL.  So if we pass such a pointer to execve or some syscall
> that accepts an envp, then it'll try iterating the vector until
> it finds a NULL entry, and of course do the wrong thing,
> maybe crash if you're lucky.
>
> Note we can get this situation from here too:
>
>   static gdb_environ from_host_environ ()
>   {
>     extern char **environ;
>     gdb_environ e;
>
>     if (environ == NULL)
>       return e;
>
>
> The old code did not suffer from this because it always
> allocated gdb_environ::vector:
>
>  struct gdb_environ *
>  make_environ (void)
>  {
>    struct gdb_environ *e;
>
>    e = XNEW (struct gdb_environ);
>
>    e->allocated = 10;
>    e->vector = (char **) xmalloc ((e->allocated + 1) * sizeof (char *));
>    e->vector[0] = 0;
>    return e;
>  }
>
> So we either always add a NULL to the vector, or we
> change gdb_environ::get_char_vector instead, like:
>
>  char **
>  gdb_environ::get_char_vector () const
>  {
>    if (m_environ_vector.empty ())
>      {
>        static const char *const empty_envp[1] = { NULL };
>        return const_cast<char **> (empty_envp);
>      }
>    return const_cast<char **> (&m_environ_vector[0]);
>  }
>
> This is OK because execve etc. are garanteed to never change
> the envp they're passed.

Oh, good catch.  I prefer to just initialize the vector with a NULL
value in the ctor; will do that now.

>> +  SELF_CHECK (env.get ("PWD") == NULL);
>> +
>> +  env = gdb_environ::from_host_environ ();
>> +
>> +  SELF_CHECK (strcmp (env.get ("GDB_SELFTEST_ENVIRON"), "1") == 0);
>> +
>> +  env.set ("GDB_SELFTEST_ENVIRON", "test");
>> +  SELF_CHECK (strcmp (env.get ("GDB_SELFTEST_ENVIRON"), "test") == 0);
>> +
>> +  env.unset ("GDB_SELFTEST_ENVIRON");
>> +  SELF_CHECK (env.get ("GDB_SELFTEST_ENVIRON") == NULL);
>> +
>> +  env.set ("GDB_SELFTEST_ENVIRON", "1");
>> +  SELF_CHECK (strcmp (env.get ("GDB_SELFTEST_ENVIRON"), "1") == 0);
>> +
>> +  env.clear ();
>> +  SELF_CHECK (env.get ("GDB_SELFTEST_ENVIRON") == NULL);
>
> Like above, after env.clear() check that this works:
>
>    SELF_CHECK (env.get_char_vector()[0] == NULL);

Will do.

>> +
>> +  if (setenv ("GDB_SELFTEST_ENVIRON", "1", 1) != 0)
>> +    error ("Could not set environment variable for testing.");
>
> This setenv looks like a stale copy from the one at the top?
> I'm not seeing why it's necessary here.

It is indeed, thanks.  Removed.

>> +
>> +  env = gdb_environ::from_host_environ ();
>> +  char **penv = env.get_char_vector ();
>> +  bool found_var = false, found_twice = false;
>> +
>> +  for (size_t i = 0; penv[i] != NULL; ++i)
>> +    if (strcmp (penv[i], "GDB_SELFTEST_ENVIRON=1") == 0)
>> +      {
>> +	if (found_var)
>> +	  found_twice = true;
>> +	found_var = true;
>> +      }
>> +  SELF_CHECK (found_var == true);
>> +  SELF_CHECK (found_twice == false);
>
> Why not simply a count:
>
>   int found_count = 0;
>   for (size_t i = 0; penv[i] != NULL; ++i)
>     if (strcmp (penv[i], "GDB_SELFTEST_ENVIRON=1") == 0)
>       found_count++;
>   SELF_CHECK (found_count == 1);

Good idea, thanks.

> I think that no test actually explicitly sets more than one
> var in the vector.  I think you should exercise that.
> Also check that removing some other var doesn't remove the
> first.  Etc.  E.g., set var 1, set var 2, unset var 1,
> check that  var 2 is still there.

Will do.

Thanks for the review.  I'll send v5 soon.

-- 
Sergio
GPG key ID: 237A 54B1 0287 28BF 00EF  31F4 D0EB 7628 65FC 5E36
Please send encrypted e-mail if possible
http://sergiodj.net/


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]