pre-commit/pre_commit/xargs.py at master · morristech/pre-commit

History

130 lines (100 loc) · 3.64 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

from __future__ import absolute_import

from __future__ import division

from __future__ import unicode_literals

import concurrent.futures

import contextlib

import math

import sys

import six

from pre_commit import parse_shebang

from pre_commit.util import cmd_output

# TODO: properly compute max_length value

def _get_platform_max_length():

# posix minimum

return 4 * 1024

def _command_length(*cmd):

full_cmd = ' '.join(cmd)

# win32 uses the amount of characters, more details at:

# https://github.com/pre-commit/pre-commit/pull/839

if sys.platform == 'win32':

# the python2.x apis require bytes, we encode as UTF-8

if six.PY2:

return len(full_cmd.encode('utf-8'))

else:

return len(full_cmd.encode('utf-16le')) // 2

else:

return len(full_cmd.encode(sys.getfilesystemencoding()))

class ArgumentTooLongError(RuntimeError):

pass

def partition(cmd, varargs, target_concurrency, _max_length=None):

_max_length = _max_length or _get_platform_max_length()

# Generally, we try to partition evenly into at least `target_concurrency`

# partitions, but we don't want a bunch of tiny partitions.

max_args = max(4, math.ceil(len(varargs) / target_concurrency))

cmd = tuple(cmd)

ret = []

ret_cmd = []

# Reversed so arguments are in order

varargs = list(reversed(varargs))

total_length = _command_length(*cmd)

while varargs:

arg = varargs.pop()

arg_length = _command_length(arg) + 1

if (

total_length + arg_length <= _max_length and

len(ret_cmd) < max_args

ret_cmd.append(arg)

total_length += arg_length

elif not ret_cmd:

raise ArgumentTooLongError(arg)

else:

# We've exceeded the length, yield a command

ret.append(cmd + tuple(ret_cmd))

ret_cmd = []

total_length = _command_length(*cmd)

varargs.append(arg)

ret.append(cmd + tuple(ret_cmd))

return tuple(ret)

@contextlib.contextmanager

def _thread_mapper(maxsize):

if maxsize == 1:

yield map

else:

with concurrent.futures.ThreadPoolExecutor(maxsize) as ex:

yield ex.map

def xargs(cmd, varargs, **kwargs):

"""A simplified implementation of xargs.

negate: Make nonzero successful and zero a failure

target_concurrency: Target number of partitions to run concurrently

"""

negate = kwargs.pop('negate', False)

target_concurrency = kwargs.pop('target_concurrency', 1)

retcode = 0

stdout = b''

stderr = b''

try:

parse_shebang.normexe(cmd[0])

except parse_shebang.ExecutableNotFoundError as e:

return e.to_output()

partitions = partition(cmd, varargs, target_concurrency, **kwargs)

def run_cmd_partition(run_cmd):

return cmd_output(*run_cmd, encoding=None, retcode=None)

threads = min(len(partitions), target_concurrency)

with _thread_mapper(threads) as thread_map:

results = thread_map(run_cmd_partition, partitions)

for proc_retcode, proc_out, proc_err in results:

# This is *slightly* too clever so I'll explain it.

# First the xor boolean table:

# T | F |

# +-------+

# T | F | T |

# --+-------+

# F | T | F |

# --+-------+

# When negate is True, it has the effect of flipping the return

# code. Otherwise, the returncode is unchanged.

retcode |= bool(proc_retcode) ^ negate

stdout += proc_out

stderr += proc_err

return retcode, stdout, stderr

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

xargs.py

Latest commit

History

xargs.py

File metadata and controls