Querying Data
Master common query patterns and optimization techniques for Machbase. Learn how to write efficient queries for time-series data analysis.
Query Basics
Simple SELECT
-- Get all recent data
SELECT * FROM sensors DURATION 1 HOUR;
-- With specific columns
SELECT sensor_id, value, _arrival_time
FROM sensors DURATION 1 HOUR;
-- With conditions
SELECT * FROM sensors
WHERE sensor_id = 'sensor01'
DURATION 1 HOUR;
Time-Based Queries
-- Last 10 minutes
SELECT * FROM logs DURATION 10 MINUTE;
-- Last hour
SELECT * FROM logs DURATION 1 HOUR;
-- Last day
SELECT * FROM logs DURATION 1 DAY;
-- 30 minutes starting 2 hours ago
SELECT * FROM logs DURATION 30 MINUTE BEFORE 2 HOUR;
-- Specific time range
SELECT * FROM logs
WHERE _arrival_time BETWEEN '2025-10-10 00:00:00' AND '2025-10-10 23:59:59';
Common Patterns
Pattern 1: Recent Data Monitoring
-- Last 5 minutes of errors
SELECT * FROM app_logs
WHERE level = 'ERROR'
DURATION 5 MINUTE;
-- Latest sensor readings
SELECT sensor_id, value, _arrival_time
FROM sensors
DURATION 10 MINUTE
ORDER BY _arrival_time DESC;
Pattern 2: Aggregations
-- Count by hour
SELECT
TO_CHAR(_arrival_time, 'YYYY-MM-DD HH24:00:00') as hour,
COUNT(*) as count
FROM logs
DURATION 24 HOUR
GROUP BY TO_CHAR(_arrival_time, 'YYYY-MM-DD HH24:00:00');
-- Average by sensor
SELECT
sensor_id,
AVG(value) as avg_value,
MIN(value) as min_value,
MAX(value) as max_value
FROM sensors
DURATION 1 DAY
GROUP BY sensor_id;
Pattern 3: Text Search
-- Search for keyword
SELECT * FROM logs
WHERE message SEARCH 'timeout'
DURATION 1 HOUR;
-- Multiple keywords (OR)
SELECT * FROM logs
WHERE message SEARCH 'error'
OR message SEARCH 'failed'
DURATION 1 HOUR;
-- Case-insensitive search
SELECT * FROM logs
WHERE LOWER(message) LIKE '%error%'
DURATION 1 HOUR;
Pattern 4: JOIN Operations
-- Enrich sensor data with device info
SELECT
s.sensor_id,
s.value,
s._arrival_time,
d.device_name,
d.location
FROM sensors s
JOIN devices d ON s.sensor_id = d.device_id
DURATION 1 HOUR;
-- Multiple joins
SELECT
s.*,
d.device_name,
f.facility_name,
f.city
FROM sensors s
JOIN devices d ON s.sensor_id = d.device_id
JOIN facilities f ON d.facility = f.facility_code
DURATION 1 HOUR;
Pattern 5: Rollup Queries (Tag Tables)
-- Query hourly rollup
SELECT
sensor_id,
time,
min_temperature,
max_temperature,
avg_temperature,
count
FROM sensors
WHERE rollup = hour
DURATION 7 DAY;
-- Minute-level rollup
SELECT * FROM sensors
WHERE rollup = min
DURATION 24 HOUR;
-- Second-level rollup
SELECT * FROM sensors
WHERE rollup = sec
DURATION 1 HOUR;
Query Optimization
1. Always Use Time Filters
Bad (scans all data):
SELECT * FROM sensors WHERE sensor_id = 'sensor01';
Good (scans relevant partition):
SELECT * FROM sensors
WHERE sensor_id = 'sensor01'
DURATION 1 HOUR;
2. Use LIMIT for Large Results
-- Limit results
SELECT * FROM logs DURATION 1 DAY LIMIT 1000;
-- Top N results
SELECT * FROM sensors
ORDER BY value DESC
LIMIT 10;
3. Query Rollup, Not Raw Data
Slow (processes millions of rows):
SELECT sensor_id, AVG(value)
FROM sensors
DURATION 30 DAY
GROUP BY sensor_id;
Fast (queries pre-aggregated data):
SELECT sensor_id, AVG(avg_value)
FROM sensors
WHERE rollup = hour
DURATION 30 DAY
GROUP BY sensor_id;
4. Use Indexes
-- Create index on frequently queried column
CREATE INDEX idx_level ON logs(level);
-- Now this query is fast
SELECT * FROM logs
WHERE level = 'ERROR'
DURATION 1 HOUR;
5. Avoid SELECT *
-- Bad (reads all columns)
SELECT * FROM sensors;
-- Good (reads only needed columns)
SELECT sensor_id, value FROM sensors DURATION 1 HOUR;
Advanced Queries
Subqueries
-- Find sensors above average
SELECT sensor_id, value
FROM sensors
WHERE value > (
SELECT AVG(value) FROM sensors DURATION 1 HOUR
)
DURATION 1 HOUR;
Common Table Expressions (CTE)
-- Calculate hourly averages
WITH hourly_avg AS (
SELECT
sensor_id,
TO_CHAR(_arrival_time, 'HH24') as hour,
AVG(value) as avg_value
FROM sensors
DURATION 24 HOUR
GROUP BY sensor_id, TO_CHAR(_arrival_time, 'HH24')
)
SELECT * FROM hourly_avg WHERE avg_value > 25.0;
Window Functions
-- Running average
SELECT
sensor_id,
value,
AVG(value) OVER (
PARTITION BY sensor_id
ORDER BY _arrival_time
ROWS BETWEEN 9 PRECEDING AND CURRENT ROW
) as moving_avg
FROM sensors
DURATION 1 HOUR;
Time Functions
Date/Time Formatting
-- Format timestamp
SELECT
TO_CHAR(_arrival_time, 'YYYY-MM-DD HH24:MI:SS') as formatted_time,
sensor_id,
value
FROM sensors DURATION 1 HOUR;
-- Extract parts
SELECT
TO_CHAR(_arrival_time, 'YYYY') as year,
TO_CHAR(_arrival_time, 'MM') as month,
TO_CHAR(_arrival_time, 'DD') as day,
TO_CHAR(_arrival_time, 'HH24') as hour
FROM logs DURATION 1 DAY;
Time Calculations
-- Current time
SELECT SYSDATE;
SELECT NOW;
-- Time arithmetic
SELECT SYSDATE - INTERVAL '1' HOUR;
SELECT NOW + INTERVAL '30' MINUTE;
-- Date conversion
SELECT TO_DATE('2025-10-10', 'YYYY-MM-DD');
SELECT TO_TIMESTAMP('2025-10-10 14:30:00', 'YYYY-MM-DD HH24:MI:SS');
Analytical Queries
Statistical Analysis
-- Comprehensive statistics
SELECT
sensor_id,
COUNT(*) as count,
AVG(value) as mean,
STDDEV(value) as stddev,
MIN(value) as min,
MAX(value) as max,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY value) as median
FROM sensors
DURATION 1 DAY
GROUP BY sensor_id;
Trend Analysis
-- Hourly trend
SELECT
TO_CHAR(_arrival_time, 'YYYY-MM-DD HH24:00:00') as hour,
AVG(value) as avg_value,
LAG(AVG(value)) OVER (ORDER BY TO_CHAR(_arrival_time, 'YYYY-MM-DD HH24:00:00')) as prev_hour,
AVG(value) - LAG(AVG(value)) OVER (ORDER BY TO_CHAR(_arrival_time, 'YYYY-MM-DD HH24:00:00')) as change
FROM sensors
DURATION 24 HOUR
GROUP BY TO_CHAR(_arrival_time, 'YYYY-MM-DD HH24:00:00')
ORDER BY hour;
Anomaly Detection
-- Find values outside 2 standard deviations
WITH stats AS (
SELECT
AVG(value) as mean,
STDDEV(value) as stddev
FROM sensors DURATION 1 DAY
)
SELECT s.*, st.mean, st.stddev
FROM sensors s, stats st
WHERE s.value < st.mean - 2 * st.stddev
OR s.value > st.mean + 2 * st.stddev
DURATION 1 DAY;
Query Performance Monitoring
Check Active Queries
-- View running queries
SHOW STATEMENTS;
Query Execution Plan
-- Explain query plan
EXPLAIN SELECT * FROM sensors DURATION 1 HOUR;
Performance Tips
- Use time filters - Always include DURATION or time WHERE clause
- Query rollup - Use pre-aggregated data for Tag tables
- Create indexes - Index frequently queried columns on Log/Lookup tables
- Limit results - Use LIMIT to restrict result set size
- Select specific columns - Avoid SELECT *
- Batch operations - Process large datasets in chunks
Common Query Patterns
Dashboard Queries
-- Real-time status board
SELECT
device_id,
status,
last_value,
last_updated
FROM device_status
ORDER BY last_updated DESC
LIMIT 20;
-- Error summary
SELECT
level,
COUNT(*) as count,
MAX(_arrival_time) as last_occurrence
FROM logs
DURATION 1 HOUR
GROUP BY level;
Reporting Queries
-- Daily summary report
SELECT
TO_CHAR(_arrival_time, 'YYYY-MM-DD') as date,
COUNT(*) as total_records,
COUNT(DISTINCT sensor_id) as unique_sensors,
AVG(value) as avg_value
FROM sensors
DURATION 30 DAY
GROUP BY TO_CHAR(_arrival_time, 'YYYY-MM-DD')
ORDER BY date;
Alert Queries
-- Critical errors in last 5 minutes
SELECT * FROM logs
WHERE level = 'ERROR'
AND message SEARCH 'critical'
DURATION 5 MINUTE;
-- Sensors exceeding threshold
SELECT sensor_id, value, _arrival_time
FROM sensors
WHERE value > 30.0
DURATION 10 MINUTE;
Best Practices
- Always filter by time - Use DURATION or time-based WHERE clause
- Test queries on small time ranges first - Verify before running on large datasets
- Use LIMIT - Prevent accidentally returning millions of rows
- Query rollup for analytics - Much faster than aggregating raw data
- Create indexes - For frequently queried columns on Log/Lookup tables
- Monitor performance - Use SHOW STATEMENTS to track slow queries
- Use appropriate table types - Tag for sensors, Log for events, etc.
Troubleshooting
Query too slow:
- Add time filter (DURATION)
- Use LIMIT clause
- Query rollup instead of raw data
- Create index on filter columns
Out of memory:
- Reduce time range
- Use LIMIT
- Select fewer columns
- Increase server memory (MAX_QPX_MEM)
Connection timeout:
- Increase query timeout
- Break into smaller queries
- Optimize query (add indexes, use rollup)
Next Steps
- User Management: User Management - Control query permissions
- Backup: Backup & Recovery - Protect your data
- Core Concepts: Indexing - Deep dive into performance
Master these query patterns and unlock the full power of Machbase analytics!
Last updated on