ldgen: additional documentations re. internal workings

2024-10-05 20:47:46 -04:00 · 2021-03-05 17:49:10 +08:00 · 2021-03-05 17:49:10 +08:00 · 6088af1193
commit 6088af1193
parent acd2385d0e
7 changed files with 248 additions and 29 deletions
--- a/tools/ldgen/README.md
+++ b/tools/ldgen/README.md
@ -0,0 +1,42 @@
+## Linker Script Generator
+
+Contains code that implements linker script generation, `ldgen`. For more information about the feature,
+see `docs/en/api-guides/linker-script-generation.rst`.
+
+### Source Files
+
+The following are the source files in the directory:
+
+- `ldgen.py` - Python executable that gets called during build.
+- `entity.py` - contains classes related to entities (library, object, symbol or combination of the above) with mappable input sections.
+- `fragments.py` - contains classes for parsing the different types of fragments in linker fragment files.
+- `generation.py` - contains bulk of the logic used to process fragments into output commands.
+- `sdkconfig.py` - used for evaluating conditionals in fragment files.
+- `linker_script.py` - augments the input linker script template with output commands from generation process to produce the output linker script.
+- `output_commands.py` - contains classes that represent the output commands in the output linker script.
+- `ldgen_common.py` - contains miscellaneous utilities/definitions that can be used in the files mentioned above.
+
+### Tests
+
+Unit tests are in the `test` directory. These tests are run as part of CI in the job `test_ldgen_on_host`.
+
+There is also a test app for `ldgen` in `tools/test_apps/build_system/ldgen_test`.
+
+### Build System
+
+Linker script generation is a part of the build process. The build scripts `tools/cmake/ldgen.cmake`
+and `make/ldgen.mk` contain the build-system-side implementation for CMake and Make, respectively.
+
+### Basic Flow
+
+The build system invokes `ldgen.py`, passing some information from the build.
+
+The linker fragment files are parsed by `fragments.py`, evaluating conditional expressions
+with `sdkconfig.py`. 
+
+From the parsed fragments, `generation.py` generates output commands defined in `output_commands.py`,
+with some help from `entity.py`.
+
+`linker_script.py` writes the output linker script, replacing markers with output commands generated.
+
+More details about the implementation are in the respective source files.
--- a/tools/ldgen/entity.py
+++ b/tools/ldgen/entity.py
@ -27,8 +27,13 @@ from pyparsing import (Group, Literal, OneOrMore, ParseException, SkipTo, Suppre
@total_ordering
 class Entity():
    """
-    Definition of an entity which can be placed or excluded
-    from placement.
+    An entity refers to a library, object, symbol whose input
+    sections can be placed or excluded from placement.
+
+    An important property of an entity is its specificity - the granularity
+    of the the entity to be placed. Specificity increases in the following
+    order: library, object, symbol. An entity with no specificity refers
+    to all entities.
    """

    ALL = '*'
@ -105,8 +110,10 @@ class Entity():

 class EntityDB():
    """
-    Encapsulates an output of objdump. Contains information about the static library sections
-    and names
+    Collection of entities extracted from libraries known in the build.
+    Allows retrieving a list of archives, a list of object files in an archive
+    or a list of symbols in an archive; as well as allows for checking if an
+    entity exists in the collection.
    """

    __info = collections.namedtuple('__info', 'filename content')
--- a/tools/ldgen/fragments.py
+++ b/tools/ldgen/fragments.py
@ -26,11 +26,11 @@ from pyparsing import (Combine, Forward, Group, Keyword, Literal, OneOrMore, Opt
 from sdkconfig import SDKConfig


-
 class FragmentFile():
    """
-    Fragment file internal representation. Parses and stores instances of the fragment definitions
-    contained within the file.
+    Processes a fragment file and stores all parsed fragments. For
+    more information on how this class interacts with classes for the different fragment types,
+    see description of Fragment.
    """

    def __init__(self, fragment_file, sdkconfig):
@ -185,11 +185,34 @@ class FragmentFile():


 class Fragment():
+    """
+    Base class for a fragment that can be parsed from a fragment file. All fragments
+    share the common grammar:
+
+    [type:name]
+    key1:value1
+    key2:value2
+    ...
+
+    Supporting a new fragment type means deriving a concrete class which specifies
+    key-value pairs that the fragment supports and what to do with the parsed key-value pairs.
+
+    The new fragment must also be appended to FRAGMENT_TYPES, specifying the
+    keyword for the type and the derived class.
+
+    The key of the key-value pair is a simple keyword string. Other parameters
+    that describe the key-value pair is specified in Fragment.KeyValue:
+        1. grammar - pyparsing grammar to parse the value of key-value pair
+        2. min - the minimum number of value in the key entry, None means no minimum
+        3. max - the maximum number of value in the key entry, None means no maximum
+        4. required - if the key-value pair is required in the fragment
+
+    Setting min=max=1 means that the key has a single value.
+
+    FragmentFile provides conditional expression evaluation, enforcing
+    the parameters for Fragment.Keyvalue.
+    """
    __metaclass__ = abc.ABCMeta
-    """
-    Encapsulates a fragment as defined in the generator syntax. Sets values common to all fragment and performs processing
-    such as checking the validity of the fragment name and getting the entry values.
-    """

    KeyValue = namedtuple('KeyValue', 'grammar min max required')

@ -206,6 +229,15 @@ class Fragment():


 class Sections(Fragment):
+    """
+    Fragment which contains list of input sections.
+
+    [sections:<name>]
+    entries:
+        .section1
+        .section2
+        ...
+    """

    # Unless quoted, symbol names start with a letter, underscore, or point
    # and may include any letters, underscores, digits, points, and hyphens.
@ -248,7 +280,14 @@ class Sections(Fragment):

 class Scheme(Fragment):
    """
-    Encapsulates a scheme fragment, which defines what target input sections are placed under.
+    Fragment which defines where the input sections defined in a Sections fragment
+    is going to end up, the target. The targets are markers in a linker script template
+    (see LinkerScript in linker_script.py).
+
+    [scheme:<name>]
+    entries:
+        sections1 -> target1
+        ...
    """

    grammars = {
@ -268,7 +307,23 @@ class Scheme(Fragment):

 class Mapping(Fragment):
    """
-    Encapsulates a mapping fragment, which defines what targets the input sections of mappable entties are placed under.
+    Fragment which attaches a scheme to entities (see Entity in entity.py), specifying where the input
+    sections of the entity will end up.
+
+    [mapping:<name>]
+    archive: lib1.a
+    entries:
+        obj1:symbol1 (scheme1); section1 -> target1 KEEP SURROUND(sym1) ...
+        obj2 (scheme2)
+        ...
+
+    Ultimately, an `entity (scheme)` entry generates an
+    input section description (see https://sourceware.org/binutils/docs/ld/Input-Section.html)
+    in the output linker script. It is possible to attach 'flags' to the
+    `entity (scheme)` to generate different output commands or to
+    emit additional keywords in the generated input section description. The
+    input section description, as well as other output commands, is defined in
+    output_commands.py.
    """

    class Flag():
@ -276,7 +331,6 @@ class Mapping(Fragment):
                    Optional(Suppress(',') + Suppress('post').setParseAction(lambda: True).setResultsName('post')))

    class Surround(Flag):
-
        def __init__(self, symbol):
            self.symbol = symbol
            self.pre = True
@ -286,7 +340,7 @@ class Mapping(Fragment):
        def get_grammar():
            # SURROUND(symbol)
            #
-            # __symbol_start, __symbol_end is generated before and after
+            # '__symbol_start', '__symbol_end' is generated before and after
            # the corresponding input section description, respectively.
            grammar = (Keyword('SURROUND').suppress() +
                       Suppress('(') +
@ -309,7 +363,11 @@ class Mapping(Fragment):

        @staticmethod
        def get_grammar():
-            # ALIGN(alignment, [, pre, post])
+            # ALIGN(alignment, [, pre, post]).
+            #
+            # Generates alignment command before and/or after the corresponding
+            # input section description, depending whether pre, post or
+            # both are specified.
            grammar = (Keyword('ALIGN').suppress() +
                       Suppress('(') +
                       Word(nums).setResultsName('alignment') +
@ -344,6 +402,9 @@ class Mapping(Fragment):

        @staticmethod
        def get_grammar():
+            # KEEP()
+            #
+            # Surrounds input section description with KEEP command.
            grammar = Keyword('KEEP()').setParseAction(Mapping.Keep)
            return grammar

@ -362,7 +423,12 @@ class Mapping(Fragment):

        @staticmethod
        def get_grammar():
-            # SORT(sort_by_first [, sort_by_second])
+            # SORT([sort_by_first, sort_by_second])
+            #
+            # where sort_by_first, sort_by_second = {name, alignment, init_priority}
+            #
+            # Emits SORT_BY_NAME, SORT_BY_ALIGNMENT or SORT_BY_INIT_PRIORITY
+            # depending on arguments. Nested sort follows linker script rules.
            keywords = Keyword('name') | Keyword('alignment') | Keyword('init_priority')
            grammar = (Keyword('SORT').suppress() + Suppress('(') +
                       keywords.setResultsName('first') +
@ -458,7 +524,9 @@ class Mapping(Fragment):

 class DeprecatedMapping():
    """
-    Encapsulates a mapping fragment, which defines what targets the input sections of mappable entties are placed under.
+    Mapping fragment with old grammar in versions older than ESP-IDF v4.0. Does not conform to
+    requirements of the Fragment class and thus is limited when it comes to conditional expression
+    evaluation.
    """

    # Name of the default condition entry
--- a/tools/ldgen/generation.py
+++ b/tools/ldgen/generation.py
@ -26,6 +26,26 @@ from output_commands import AlignAtAddress, InputSectionDesc, SymbolAtAddress


 class Placement():
+    """
+    A Placement is an assignment of an entity's input sections to a target
+    in the output linker script - a precursor to the input section description.
+
+    A placement can be excluded from another placement. These are represented
+    as contents of EXCLUDE_FILE in the input section description. Since the linker uses the
+    first matching rule, these exclusions make sure that accidental matching
+    of entities with higher specificity does not occur.
+
+    The placement which a placement is excluded from is referred to as the
+    'basis' placement. It operates on the same input section of the entity on
+    one of the parent (or parent's parent and so forth), but might have
+    a different target (see is_significant() for the criteria).
+
+    A placement is explicit if it was derived from an actual entry in one of
+    the mapping fragments. Just as intermediate entity nodes are created in some cases,
+    intermediate placements are created particularly for symbol placements.
+    The reason is that EXCLUDE_FILE does not work on symbols (see ObjectNode
+    for details).
+    """

    def __init__(self, node, sections, target, flags, explicit, force=False, dryrun=False):
        self.node = node
@ -43,9 +63,7 @@ class Placement():
        # fragment entry.
        self.explicit = explicit

-        # Find basis placement. A basis placement is a placement
-        # on the parent (or parent's parent and so on and so forth)
-        # that operates on the same section as this one.
+        # Find basis placement.
        parent = node.parent
        candidate = None
        while parent:
@ -91,6 +109,31 @@ class Placement():


 class EntityNode():
+    """
+    Node in entity tree. An EntityNode
+    is created from an Entity (see entity.py).
+
+    The entity tree has a maximum depth of 3. Nodes at different
+    depths are derived from this class for special behavior (see
+    RootNode, ArchiveNode, ObjectNode, SymbolNode) depending
+    on entity specificity.
+
+    Nodes for entities are inserted at the appropriate depth, creating
+    intermediate nodes along the path if necessary. For example, a node
+    for entity `lib1.a:obj1:sym1` needs to be inserted. If the node for `lib1:obj1`
+    does not exist, then it needs to be created.
+
+    A node contains a dictionary of placements (see Placement).
+    The key to this dictionary are contents of sections fragments,
+    representing the input sections of an entity. For example,
+    a node for entity `lib1.a` might have a placement entry for its `.text` input section
+    in this dictionary. The placement will contain details about the
+    target, the flags, etc.
+
+    Generation of output commands to be written to the output linker script
+    requires traversal of the tree, each node collecting the output commands
+    from its children, so on and so forth.
+    """

    def __init__(self, parent, name):
        self.children = []
@ -212,14 +255,32 @@ class EntityNode():


 class SymbolNode(EntityNode):
-
+    """
+    Entities at depth=3. Represents entities with archive, object
+    and symbol specified.
+    """
    def __init__(self, parent, name):
        EntityNode.__init__(self, parent, name)
        self.entity = Entity(self.parent.parent.name, self.parent.name)


 class ObjectNode(EntityNode):
+    """
+    Entities at depth=2. Represents entities with archive
+    and object specified.

+    Creating a placement on a child node (SymbolNode) has a different behavior, since
+    exclusions using EXCLUDE_FILE for symbols does not work.
+
+    The sections of this entity has to be 'expanded'. That is, we must
+    look into the actual input sections of this entity and remove
+    the ones corresponding to the symbol. The remaining sections of an expanded
+    object entity will be listed one-by-one in the corresponding
+    input section description.
+
+    An intermediate placement on this node is created, if one does not exist,
+    and is the one excluded from its basis placement.
+    """
    def __init__(self, parent, name):
        EntityNode.__init__(self, parent, name)
        self.child_t = SymbolNode
@ -281,7 +342,9 @@ class ObjectNode(EntityNode):


 class ArchiveNode(EntityNode):
-
+    """
+    Entities at depth=1. Represents entities with archive specified.
+    """
    def __init__(self, parent, name):
        EntityNode.__init__(self, parent, name)
        self.child_t = ObjectNode
@ -289,6 +352,10 @@ class ArchiveNode(EntityNode):


 class RootNode(EntityNode):
+    """
+    Single entity at depth=0. Represents entities with no specific members
+    specified.
+    """
    def __init__(self):
        EntityNode.__init__(self, None, Entity.ALL)
        self.child_t = ArchiveNode
@ -297,7 +364,9 @@ class RootNode(EntityNode):

 class Generation:
    """
-    Implements generation of placement based on collected sections, scheme and mapping fragment.
+    Processes all fragments processed from fragment files included in the build.
+    Generates output commands (see output_commands.py) that LinkerScript (see linker_script.py) can
+    write to the output linker script.
    """

    # Processed mapping, scheme and section entries
--- a/tools/ldgen/linker_script.py
+++ b/tools/ldgen/linker_script.py
@ -24,8 +24,11 @@ from pyparsing import ParseException, Suppress, White

 class LinkerScript:
    """
-    Encapsulates a linker script template file. Finds marker syntax and handles replacement to generate the
-    final output.
+    Process a linker script template, which contains markers with grammar:
+
+    [<target>]
+
+    The <target> is where output commands (see output_commands.py) are placed.
    """

    Marker = collections.namedtuple('Marker', 'target indent rules')
--- a/tools/ldgen/output_commands.py
+++ b/tools/ldgen/output_commands.py
@ -16,8 +16,20 @@

 from entity import Entity

+# Contains classes for output section commands referred to in
+# https://www.acrc.bris.ac.uk/acrc/RedHat/rhel-ld-en-4/sections.html#OUTPUT-SECTION-DESCRIPTION.
+

 class AlignAtAddress():
+    """
+    Outputs assignment of builtin function ALIGN to current
+    position:
+
+    . = ALIGN(<alignment>)
+
+    Mapping fragment flag ALIGN causes this output section
+    command to be emitted.
+    """

    def __init__(self, alignment):
        self.alignment = alignment
@ -31,6 +43,16 @@ class AlignAtAddress():


 class SymbolAtAddress():
+    """
+    Outputs assignment of builtin function ABSOLUTE to a symbol
+    for current position:
+
+    <symbol> = ABSOLUTE(.)
+
+    Mapping fragment flag SURROUND causes this
+    output section command to be emitted before and after
+    an InputSectionDesc.
+    """

    def __init__(self, symbol):
        self.symbol = symbol
@ -44,6 +66,14 @@ class SymbolAtAddress():


 class InputSectionDesc():
+    """
+    Outputs an input section description as described in
+    https://www.acrc.bris.ac.uk/acrc/RedHat/rhel-ld-en-4/sections.html#INPUT-SECTION.
+
+    These commands are emmited from mapping fragment entries, specifically attaching
+    a scheme onto an entity. Mapping fragment flags KEEP, SORT will also affect
+    the emitted input section description.
+    """

    def __init__(self, entity, sections, exclusions=None, keep=False, sort=None):
        assert(entity.specificity != Entity.Specificity.SYMBOL)
--- a/tools/ldgen/sdkconfig.py
+++ b/tools/ldgen/sdkconfig.py
@ -21,8 +21,8 @@ from pyparsing import (Combine, Group, Literal, Optional, Word, alphanums, hexnu

 class SDKConfig:
    """
-    Encapsulates an sdkconfig file. Defines grammar of a configuration entry, and enables
-    evaluation of logical expressions involving those entries.
+    Evaluates conditional expressions based on the build's sdkconfig and Kconfig files.
+    This also defines the grammar of conditional expressions.
    """

    # A configuration entry is in the form CONFIG=VALUE. Definitions of components of that grammar