aboutsummaryrefslogtreecommitdiff
path: root/utils/check_copyright.py
blob: b6dc933eee7cb0145657405c4168c83232e572b5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
#!/usr/bin/env python
# coding=utf-8
# Copyright (c) 2016 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Checks for copyright notices in all the files that need them under the
current directory.  Optionally insert them.  When inserting, replaces
an MIT or Khronos free use license with Apache 2.
"""

import argparse
import fileinput
import fnmatch
import inspect
import os
import re
import sys

# List of designated copyright owners.
AUTHORS = ['The Khronos Group Inc.',
           'LunarG Inc.',
           'Google Inc.',
           'Google LLC',
           'Pierre Moreau',
           'Samsung Inc',
           'André Perez Maselco',
           'Vasyl Teliman',
           'Advanced Micro Devices, Inc.',
           'Stefano Milizia',
           'Alastair F. Donaldson',
           'Mostafa Ashraf',
           'Shiyu Liu',
           'ZHOU He']
CURRENT_YEAR='2021'

YEARS = '(2014-2016|2015-2016|2015-2020|2016|2016-2017|2017|2017-2019|2018|2019|2020|2021|2022)'
COPYRIGHT_RE = re.compile(
        'Copyright \(c\) {} ({})'.format(YEARS, '|'.join(AUTHORS)))

MIT_BEGIN_RE = re.compile('Permission is hereby granted, '
                          'free of charge, to any person obtaining a')
MIT_END_RE = re.compile('MATERIALS OR THE USE OR OTHER DEALINGS IN '
                        'THE MATERIALS.')
APACHE2_BEGIN_RE = re.compile('Licensed under the Apache License, '
                              'Version 2.0 \(the "License"\);')
APACHE2_END_RE = re.compile('limitations under the License.')

LICENSED = """Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License."""
LICENSED_LEN = 10 # Number of lines in LICENSED


def find(top, filename_glob, skip_glob_dir_list, skip_glob_files_list):
    """Returns files in the tree rooted at top matching filename_glob but not
    in directories matching skip_glob_dir_list nor files matching
    skip_glob_dir_list."""

    file_list = []
    for path, dirs, files in os.walk(top):
        for glob in skip_glob_dir_list:
            for match in fnmatch.filter(dirs, glob):
                dirs.remove(match)
        for filename in fnmatch.filter(files, filename_glob):
            full_file = os.path.join(path, filename)
            if full_file not in skip_glob_files_list:
                file_list.append(full_file)
    return file_list


def filtered_descendants(glob):
    """Returns glob-matching filenames under the current directory, but skips
    some irrelevant paths."""
    return find('.', glob, ['third_party', 'external', 'CompilerIdCXX',
        'build*', 'out*'], ['./utils/clang-format-diff.py'])


def skip(line):
    """Returns true if line is all whitespace or shebang."""
    stripped = line.lstrip()
    return stripped == '' or stripped.startswith('#!')


def comment(text, prefix):
    """Returns commented-out text.

    Each line of text will be prefixed by prefix and a space character.  Any
    trailing whitespace will be trimmed.
    """
    accum = ['{} {}'.format(prefix, line).rstrip() for line in text.split('\n')]
    return '\n'.join(accum)


def insert_copyright(author, glob, comment_prefix):
    """Finds all glob-matching files under the current directory and inserts the
    copyright message, and license notice.  An MIT license or Khronos free
    use license (modified MIT) is replaced with an Apache 2 license.

    The copyright message goes into the first non-whitespace, non-shebang line
    in a file.  The license notice follows it.  Both are prefixed on each line
    by comment_prefix and a space.
    """

    copyright = comment('Copyright (c) {} {}'.format(CURRENT_YEAR, author),
                        comment_prefix) + '\n\n'
    licensed = comment(LICENSED, comment_prefix) + '\n\n'
    for file in filtered_descendants(glob):
        # Parsing states are:
        #   0 Initial: Have not seen a copyright declaration.
        #   1 Seen a copyright line and no other interesting lines
        #   2 In the middle of an MIT or Khronos free use license
        #   9 Exited any of the above
        state = 0
        update_file = False
        for line in fileinput.input(file, inplace=1):
            emit = True
            if state == 0:
                if COPYRIGHT_RE.search(line):
                    state = 1
                elif skip(line):
                    pass
                else:
                    # Didn't see a copyright. Inject copyright and license.
                    sys.stdout.write(copyright)
                    sys.stdout.write(licensed)
                    # Assume there isn't a previous license notice.
                    state = 1
            elif state == 1:
                if MIT_BEGIN_RE.search(line):
                    state = 2
                    emit = False
                elif APACHE2_BEGIN_RE.search(line):
                    # Assume an Apache license is preceded by a copyright
                    # notice.  So just emit it like the rest of the file.
                    state = 9
            elif state == 2:
                # Replace the MIT license with Apache 2
                emit = False
                if MIT_END_RE.search(line):
                    state = 9
                    sys.stdout.write(licensed)
            if emit:
                sys.stdout.write(line)


def alert_if_no_copyright(glob, comment_prefix):
    """Prints names of all files missing either a copyright or Apache 2 license.

    Finds all glob-matching files under the current directory and checks if they
    contain the copyright message and license notice.  Prints the names of all the
    files that don't meet both criteria.

    Returns the total number of file names printed.
    """
    printed_count = 0
    for file in filtered_descendants(glob):
        has_copyright = False
        has_apache2 = False
        line_num = 0
        apache_expected_end = 0
        with open(file, encoding='utf-8') as contents:
            for line in contents:
                line_num += 1
                if COPYRIGHT_RE.search(line):
                    has_copyright = True
                if APACHE2_BEGIN_RE.search(line):
                    apache_expected_end = line_num + LICENSED_LEN
                if (line_num is apache_expected_end) and APACHE2_END_RE.search(line):
                    has_apache2 = True
        if not (has_copyright and has_apache2):
            message = file
            if not has_copyright:
                message += ' has no copyright'
            if not has_apache2:
                message += ' has no Apache 2 license notice'
            print(message)
            printed_count += 1
    return printed_count


class ArgParser(argparse.ArgumentParser):
    def __init__(self):
        super(ArgParser, self).__init__(
                description=inspect.getdoc(sys.modules[__name__]))
        self.add_argument('--update', dest='author', action='store',
                          help='For files missing a copyright notice, insert '
                               'one for the given author, and add a license '
                               'notice.  The author must be in the AUTHORS '
                               'list in the script.')


def main():
    glob_comment_pairs = [('*.h', '//'), ('*.hpp', '//'), ('*.sh', '#'),
                          ('*.py', '#'), ('*.cpp', '//'),
                          ('CMakeLists.txt', '#')]
    argparser = ArgParser()
    args = argparser.parse_args()

    if args.author:
        if args.author not in AUTHORS:
            print('error: --update argument must be in the AUTHORS list in '
                  'check_copyright.py: {}'.format(AUTHORS))
            sys.exit(1)
        for pair in glob_comment_pairs:
            insert_copyright(args.author, *pair)
        sys.exit(0)
    else:
        count = sum([alert_if_no_copyright(*p) for p in glob_comment_pairs])
        sys.exit(count > 0)


if __name__ == '__main__':
    main()