Working with Large JSON Files - A Practical Guide

2026-01-30#JSON#Large Files#Streaming#Performance

Large JSON files pose unique challenges. This guide covers strategies for processing files that exceed available memory or browser limits.

Understanding the Problem

Browsers have memory limits (typically 1-4GB per tab). Large JSON files can:

Cause "Out of Memory" errors
Freeze the UI during parsing
Increase GC pressure and lag

File Size Guidelines

File Size	Approach
< 1MB	Standard JSON.parse
1-10MB	Standard with progress indicator
10-100MB	Streaming or chunked processing
> 100MB	Server-side processing or Web Workers

Streaming JSON Parsing

Using ndjson Format

JSON Lines (ndjson) stores one JSON object per line:

{"id":1,"name":"Item 1"}
{"id":2,"name":"Item 2"}
{"id":3,"name":"Item 3"}

Process line by line:

const readline = require('readline');
const fs = require('fs');

const file = fs.createReadStream('data.ndjson');
const rl = readline.createInterface({ input: file });

rl.on('line', (line) => {
  if (line.trim()) {
    const obj = JSON.parse(line);
    processObject(obj);
  }
});

Chunked JSON Array Processing

For large JSON arrays:

async function processArrayInChunks(filePath, chunkSize = 1000) {
  const file = fs.createReadStream(filePath);
  let buffer = '';
  let items = [];
  let count = 0;

  for await (const chunk of file) {
    buffer += chunk;
    const lines = buffer.split('\n');
    buffer = lines.pop();

    for (const line of lines) {
      if (line.trim() === '],' || line.trim() === ']') {
        // End of array
        if (items.length > 0) {
          await processChunk(items);
          items = [];
        }
      } else if (line.trim() && !line.includes('[') && !line.includes(']')) {
        try {
          const obj = JSON.parse(line);
          items.push(obj);
          if (items.length >= chunkSize) {
            await processChunk(items);
            items = [];
          }
        } catch (e) {
          // Skip invalid lines
        }
      }
    }
  }
}

Browser-Based Streaming

File API with Slicing

async function processLargeJSON(file) {
  const chunkSize = 1024 * 1024; // 1MB chunks
  const chunks = Math.ceil(file.size / chunkSize);
  
  for (let i = 0; i < chunks; i++) {
    const start = i * chunkSize;
    const end = Math.min(start + chunkSize, file.size);
    const slice = file.slice(start, end);
    const text = await slice.text();
    
    processChunk(text, i === 0, i === chunks - 1);
  }
}

Using JSON.parse with Reviver

For structured streaming:

function createStreamingParser(onObject) {
  let buffer = '';
  let depth = 0;
  let inString = false;
  let escape = false;
  
  return {
    write(chunk) {
      buffer += chunk;
      // Simplified parsing logic...
    },
    end() {
      // Finalize parsing
    }
  };
}

Server-Side Processing

Node.js Streams

const { createReadStream, createWriteStream } = require('fs');
const { Transform } = require('stream');

class JsonArrayStream extends Transform {
  constructor() {
    super({ objectMode: true });
    this.first = true;
  }
  
  _transform(line, encoding, callback) {
    if (line.trim() && line !== '[' && line !== ']') {
      try {
        const obj = JSON.parse(line);
        if (!this.first) this.push(',\n');
        this.first = false;
        this.push(JSON.stringify(transformObj(obj)));
      } catch (e) {
        // Skip invalid lines
      }
    }
    callback();
  }
}

const read = createReadStream('input.json');
const write = createWriteStream('output.json');

read.pipe(/* split into lines */)
  .pipe(new JsonArrayStream())
  .pipe(write);

Memory-Mapped Files

For very large files, use memory mapping:

const fs = require('fs');
const mm = require('mmap-js');

const fd = fs.openSync('huge.json', 'r');
const size = fs.fstatSync(fd).size;
const map = mm.map(fd, size);

fs.closeSync(fd);

// Now process the memory-mapped file

Progressive Rendering

For UI display, render progressively:

async function renderLargeDataset(container, data, batchSize = 100) {
  const items = Array.isArray(data) ? data : data.items;
  const total = items.length;
  let rendered = 0;
  
  function renderBatch() {
    const batch = items.slice(rendered, rendered + batchSize);
    const html = batch.map(item => renderItem(item)).join('');
    container.insertAdjacentHTML('beforeend', html);
    rendered += batchSize;
    
    if (rendered < total) {
      requestAnimationFrame(renderBatch);
    }
  }
  
  renderBatch();
}

Virtual Scrolling

For displaying large lists, use virtual scrolling:

import { FixedSizeList as List } from 'react-window';

function VirtualList({ items }) {
  const Row = ({ index, style }) => (
    <div style={style}>
      {renderItem(items[index])}
    </div>
  );
  
  return (
    <List
      height={600}
      itemCount={items.length}
      itemSize={50}
      width="100%"
    >
      {Row}
    </List>
  );
}

Compression and Storage

Use Efficient Formats

For storage, consider:

gzip: ~70% size reduction for text
brotli: ~80% size reduction
MessagePack: Binary, ~50% smaller than JSON

Chunked Storage

Store large datasets in chunks:

data/
  part-001.json  (1000 records each)
  part-002.json
  part-003.json
  manifest.json  (index of chunks)

Summary

Handle large JSON files by: streaming when possible, processing in chunks, using Web Workers to avoid UI blocking, and considering server-side processing for very large datasets.