split.js 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. // Split chunks line-wise for generators passed to the `std*` options
  2. export const getSplitLinesGenerator = (binary, preserveNewlines, skipped, state) => binary || skipped
  3. ? undefined
  4. : initializeSplitLines(preserveNewlines, state);
  5. // Same but for synchronous methods
  6. export const splitLinesSync = (chunk, preserveNewlines, objectMode) => objectMode
  7. ? chunk.flatMap(item => splitLinesItemSync(item, preserveNewlines))
  8. : splitLinesItemSync(chunk, preserveNewlines);
  9. const splitLinesItemSync = (chunk, preserveNewlines) => {
  10. const {transform, final} = initializeSplitLines(preserveNewlines, {});
  11. return [...transform(chunk), ...final()];
  12. };
  13. const initializeSplitLines = (preserveNewlines, state) => {
  14. state.previousChunks = '';
  15. return {
  16. transform: splitGenerator.bind(undefined, state, preserveNewlines),
  17. final: linesFinal.bind(undefined, state),
  18. };
  19. };
  20. // This imperative logic is much faster than using `String.split()` and uses very low memory.
  21. const splitGenerator = function * (state, preserveNewlines, chunk) {
  22. if (typeof chunk !== 'string') {
  23. yield chunk;
  24. return;
  25. }
  26. let {previousChunks} = state;
  27. let start = -1;
  28. for (let end = 0; end < chunk.length; end += 1) {
  29. if (chunk[end] === '\n') {
  30. const newlineLength = getNewlineLength(chunk, end, preserveNewlines, state);
  31. let line = chunk.slice(start + 1, end + 1 - newlineLength);
  32. if (previousChunks.length > 0) {
  33. line = concatString(previousChunks, line);
  34. previousChunks = '';
  35. }
  36. yield line;
  37. start = end;
  38. }
  39. }
  40. if (start !== chunk.length - 1) {
  41. previousChunks = concatString(previousChunks, chunk.slice(start + 1));
  42. }
  43. state.previousChunks = previousChunks;
  44. };
  45. const getNewlineLength = (chunk, end, preserveNewlines, state) => {
  46. if (preserveNewlines) {
  47. return 0;
  48. }
  49. state.isWindowsNewline = end !== 0 && chunk[end - 1] === '\r';
  50. return state.isWindowsNewline ? 2 : 1;
  51. };
  52. const linesFinal = function * ({previousChunks}) {
  53. if (previousChunks.length > 0) {
  54. yield previousChunks;
  55. }
  56. };
  57. // Unless `preserveNewlines: true` is used, we strip the newline of each line.
  58. // This re-adds them after the user `transform` code has run.
  59. export const getAppendNewlineGenerator = ({binary, preserveNewlines, readableObjectMode, state}) => binary || preserveNewlines || readableObjectMode
  60. ? undefined
  61. : {transform: appendNewlineGenerator.bind(undefined, state)};
  62. const appendNewlineGenerator = function * ({isWindowsNewline = false}, chunk) {
  63. const {unixNewline, windowsNewline, LF, concatBytes} = typeof chunk === 'string' ? linesStringInfo : linesUint8ArrayInfo;
  64. if (chunk.at(-1) === LF) {
  65. yield chunk;
  66. return;
  67. }
  68. const newline = isWindowsNewline ? windowsNewline : unixNewline;
  69. yield concatBytes(chunk, newline);
  70. };
  71. const concatString = (firstChunk, secondChunk) => `${firstChunk}${secondChunk}`;
  72. const linesStringInfo = {
  73. windowsNewline: '\r\n',
  74. unixNewline: '\n',
  75. LF: '\n',
  76. concatBytes: concatString,
  77. };
  78. const concatUint8Array = (firstChunk, secondChunk) => {
  79. const chunk = new Uint8Array(firstChunk.length + secondChunk.length);
  80. chunk.set(firstChunk, 0);
  81. chunk.set(secondChunk, firstChunk.length);
  82. return chunk;
  83. };
  84. const linesUint8ArrayInfo = {
  85. windowsNewline: new Uint8Array([0x0D, 0x0A]),
  86. unixNewline: new Uint8Array([0x0A]),
  87. LF: 0x0A,
  88. concatBytes: concatUint8Array,
  89. };