JSON Lines and Streaming JSON - Efficient Line-Based Data Processing
2026-01-30#JSON#JSON Lines#Streaming#Data Processing
JSON Lines (ndjson) is a simple, efficient format for streaming JSON data. It's ideal for logs, data pipelines, and large-scale processing.
What is JSON Lines?
JSON Lines stores one JSON object per line:
{"id":1,"name":"Item 1","value":100}
{"id":2,"name":"Item 2","value":200}
{"id":3,"name":"Item 3","value":300}
Each line is valid JSON. Files don't require special parsers or libraries.
Benefits of JSON Lines
- Streamable: Process line by line without loading entire file
- Appendable: Easily add new records without re-parsing
- Simple: No complex libraries required
- Compatible: Works with any JSON parser
- Unix-Friendly: Pipes and filters work naturally
Common Use Cases
Log Files
{"timestamp":"2026-01-30T10:00:00Z","level":"INFO","message":"User logged in","userId":123}
{"timestamp":"2026-01-30T10:00:01Z","level":"DEBUG","message":"Session created","sessionId":"abc123"}
{"timestamp":"2026-01-30T10:00:02Z","level":"ERROR","message":"Database connection failed","error":"ECONNREFUSED"}
Data Pipelines
{"action":"create","table":"users","data":{"name":"John","email":"john@example.com"}}
{"action":"update","table":"users","data":{"id":1,"name":"John Doe"}}
{"action":"delete","table":"users","data":{"id":2}}
Large Dataset Export
{"id":1,"product":"Widget A","price":19.99,"category":"Tools"}
{"id":2,"product":"Widget B","price":29.99,"category":"Tools"}
{"id":3,"product":"Gadget C","price":49.99,"category":"Electronics"}
Reading JSON Lines Files
Node.js
const fs = require('fs');
const readline = require('readline');
async function processJsonLines(filePath) {
const fileStream = fs.createReadStream(filePath);
const rl = readline.createInterface({
input: fileStream,
crlfDelay: Infinity
});
let lineNumber = 0;
for await (const line of rl) {
lineNumber++;
if (line.trim()) {
try {
const obj = JSON.parse(line);
await processObject(obj, lineNumber);
} catch (error) {
console.error(`Line ${lineNumber}: Parse error`, error.message);
}
}
}
}
Python
import json
def process_json_lines(filename):
with open(filename, 'r') as f:
for line_num, line in enumerate(f, 1):
line = line.strip()
if line:
try:
obj = json.loads(line)
process_object(obj, line_num)
except json.JSONDecodeError as e:
print(f"Line {line_num}: Parse error - {e}")
Browser
async function processJsonLines(file) {
const stream = file.stream();
const reader = stream.getReader();
const decoder = new TextDecoder();
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) {
if (buffer.trim()) {
processLine(buffer.trim());
}
break;
}
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop();
for (const line of lines) {
if (line.trim()) {
processLine(line.trim());
}
}
}
}
function processLine(line) {
const obj = JSON.parse(line);
console.log(obj);
}
Writing JSON Lines
Node.js
const fs = require('fs');
const writeStream = fs.createWriteStream('output.jsonl');
function writeJsonLine(obj) {
const line = JSON.stringify(obj) + '\n';
writeStream.write(line);
}
// Or append line by line
writeJsonLine({ id: 1, name: 'Item 1' });
writeJsonLine({ id: 2, name: 'Item 2' });
Streaming from API
async function* fetchJsonLines(url) {
const response = await fetch(url);
const reader = response.body.getReader();
const decoder = new TextDecoder();
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) {
if (buffer.trim()) yield JSON.parse(buffer);
break;
}
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop();
for (const line of lines) {
if (line.trim()) {
yield JSON.parse(line);
}
}
}
}
// Usage
for await (const obj of fetchJsonLines('/api/stream')) {
console.log(obj);
}
Command Line Tools
Using jq
# Filter lines
jq '.price > 50' data.jsonl > filtered.jsonl
# Transform
jq '{id: .id, total: .price * .quantity}' data.jsonl > totals.jsonl
# Aggregate
jq -s 'map(.price) | add' data.jsonl
# Count
jq -s 'length' data.jsonl
Using grep
# Find specific records
grep '"status":"active"' data.jsonl > active.jsonl
# Find by field value
grep '"userId":123' data.jsonl
Processing with Unix Tools
Pipeline Example
# Extract user IDs from logs
cat logs.jsonl | grep '"action":"login"' | jq '.userId' | sort | uniq -c
Parallel Processing
# Split and process in parallel
split -l 10000 data.jsonl chunk_
parallel -a chunk_* --pipe -j4 'jq ".price * 1.1" > processed/{}'
Memory Efficiency
JSON Lines is memory-efficient because:
- One object parsed at a time
- No entire file in memory
- Stream can be infinite (API stream)
// Memory usage comparison
// Traditional JSON: entire 1GB file in memory
const largeData = JSON.parse(fs.readFileSync('1gb.json'));
// JSON Lines: ~1KB per object
for await (const line of rl) {
const obj = JSON.parse(line); // Only 1KB in memory
}
Error Handling
const fs = require('fs');
const readline = require('readline');
async function processWithErrorRecovery(filePath, onError) {
const fileStream = fs.createReadStream(filePath);
const rl = readline.createInterface({
input: fileStream,
crlfDelay: Infinity
});
let lineNumber = 0;
let errors = [];
for await (const line of rl) {
lineNumber++;
try {
const obj = JSON.parse(line);
await processObject(obj);
} catch (error) {
const errorInfo = { lineNumber, line: line.slice(0, 100), error: error.message };
errors.push(errorInfo);
await onError(errorInfo);
}
}
return { processed: lineNumber - errors.length, errors };
}
Summary
JSON Lines is ideal for:
- Log files
- Data streaming
- Large dataset processing
- Unix pipeline workflows
- Real-time data feeds
Use JSON Lines when you need simple, streamable JSON without the overhead of parsing entire files.
Related articles
Working with Large JSON Files - A Practical Guide
Techniques and tools for handling JSON files that exceed memory limits or browser constraints.
JSON vs XML - Choosing the Right Format for Your Use Case
A comprehensive comparison of JSON and XML to help you make informed format decisions.
JSON Tools Ecosystem - A Comprehensive Overview
Explore the best tools, libraries, and utilities for working with JSON across different platforms and use cases.
JSON Security Best Practices - Protecting Your Applications
Essential security measures for handling JSON data safely and preventing common vulnerabilities.
Understanding JSON Schema - A Complete Guide
Learn how to define and validate JSON structure with JSON Schema, from basics to advanced features.
JSON Performance Optimization Techniques
Speed up JSON parsing, serialization, and processing with these proven optimization strategies.