commit 32ef7401e3c17af06300fd0e1c80aca212b7223e Author: lewismac Date: Sun Oct 5 13:10:12 2025 +0100 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e227ca3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,59 @@ +# File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig +# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,macos +# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,macos + +### macOS ### +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### macOS Patch ### +# iCloud generated files +*.icloud + +### VisualStudioCode ### +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +!.vscode/*.code-snippets + +# Local History for Visual Studio Code +.history/ + +# Built Visual Studio Code Extensions +*.vsix + +### VisualStudioCode Patch ### +# Ignore all local history of files +.history +.ionide + +# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,macos + +# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option) + diff --git a/config.conf b/config.conf new file mode 100644 index 0000000..6e7942c --- /dev/null +++ b/config.conf @@ -0,0 +1,158 @@ +{ + "trading_pairs": [ + { + "symbol": "BTCUSDT", + "enabled": true, + "priority": 1, + "record_from_date": "2020-01-01T00:00:00Z" + }, + { + "symbol": "ETHUSDT", + "enabled": true, + "priority": 1, + "record_from_date": "2020-01-01T00:00:00Z" + }, + { + "symbol": "BNBUSDT", + "enabled": true, + "priority": 2, + "record_from_date": "2020-01-01T00:00:00Z" + }, + { + "symbol": "XRPUSDT", + "enabled": true, + "priority": 3, + "record_from_date": "2020-01-01T00:00:00Z" + }, + { + "symbol": "SOLUSDT", + "enabled": true, + "priority": 2, + "record_from_date": "2020-01-01T00:00:00Z" + }, + { + "symbol": "HBARUSDT", + "enabled": true, + "priority": 1, + "record_from_date": "2020-01-01T00:00:00Z" + }, + { + "symbol": "HBARBTC", + "enabled": true, + "priority": 1, + "record_from_date": "2020-01-01T00:00:00Z" + } + ], + "technical_indicators": { + "enabled": ["sma", "ema", "rsi", "macd", "bb", "atr"], + "periods": { + "sma": [20, 50, 200], + "ema": [12, 26], + "rsi": [14], + "macd": { + "fast": 12, + "slow": 26, + "signal": 9 + }, + "bb": { + "period": 20, + "std": 2 + }, + "atr": [14], + "stoch": { + "k_period": 14, + "d_period": 3 + }, + "adx": [14] + }, + "calculation_intervals": ["1m", "5m", "15m", "1h", "4h", "1d"] + }, + "collection": { + "bulk_chunk_size": 1000, + "websocket_reconnect_delay": 5, + "tick_batch_size": 100, + "candle_intervals": ["1m", "5m", "15m", "1h", "4h", "1d"], + "max_retries": 3, + "retry_delay": 1, + "rate_limit_requests_per_minute": 2000, + "concurrent_symbol_limit": 10, + "default_record_from_date": "2020-01-01T00:00:00Z" + }, + "gap_filling": { + "enable_auto_gap_filling": true, + "auto_fill_schedule_hours": 24, + "max_gap_size_candles": 1000, + "min_gap_size_candles": 2, + "enable_intelligent_averaging": true, + "averaging_lookback_candles": 10, + "max_consecutive_empty_candles": 5, + "intervals_to_monitor": ["1m", "5m", "15m", "1h", "4h", "1d"] + }, + "database": { + "batch_insert_size": 1000, + "compression_after_days": 7, + "retention_policy_days": 365, + "vacuum_analyze_interval_hours": 24, + "connection_pool": { + "min_size": 10, + "max_size": 50, + "command_timeout": 60 + }, + "partitioning": { + "chunk_time_interval": "1 day", + "compress_chunk_time_interval": "7 days" + } + }, + "gap_filling": { + "enable_auto_gap_filling": true, + "auto_fill_schedule_hours": 24, + "intervals_to_monitor": ["1m", "5m", "15m", "1h", "4h", "1d"], + "max_gap_size_candles": 1000, + "max_consecutive_empty_candles": 5, + "averaging_lookback_candles": 10, + "enable_intelligent_averaging": true, + "max_fill_attempts": 3 + }, + "ui": { + "refresh_interval_seconds": 5, + "max_chart_points": 1000, + "default_timeframe": "1d", + "theme": "dark", + "enable_realtime_updates": true + }, + "monitoring": { + "enable_performance_metrics": true, + "log_slow_queries": true, + "slow_query_threshold_ms": 1000, + "enable_health_checks": true, + "health_check_interval_seconds": 30 + }, + "alerts": { + "enable_price_alerts": false, + "enable_volume_alerts": false, + "enable_system_alerts": true, + "price_change_threshold_percent": 5.0, + "volume_change_threshold_percent": 50.0 + }, + "data_quality": { + "enable_data_validation": true, + "max_price_deviation_percent": 10.0, + "min_volume_threshold": 0.001, + "enable_outlier_detection": true, + "outlier_detection_window": 100 + }, + "features": { + "enable_candle_generation_from_ticks": true, + "enable_technical_indicator_alerts": false, + "enable_market_analysis": true, + "enable_backtesting": false, + "enable_paper_trading": false + }, + "system": { + "max_memory_usage_mb": 8192, + "max_cpu_usage_percent": 80, + "enable_auto_scaling": false, + "enable_caching": true, + "cache_ttl_seconds": 300 + } +} \ No newline at end of file diff --git a/db.py b/db.py new file mode 100644 index 0000000..520657e --- /dev/null +++ b/db.py @@ -0,0 +1,1677 @@ +#!/usr/bin/env python3 +""" +db.py - TimescaleDB Database Operations and Schema Management + +Database operations, connection pooling, and schema management for crypto trading data +""" + +import asyncio +import logging +import os +from datetime import datetime, timedelta, timezone, date, time as dt_time +from typing import Dict, List, Optional, Any, Tuple, Iterable + +import asyncpg +from contextlib import asynccontextmanager +from dotenv import load_dotenv + +# Load environment variables +load_dotenv('variables.env') + + +def _utc_now() -> datetime: + return datetime.now(timezone.utc) + + +def _ensure_dt_aware_utc(dt: datetime) -> datetime: + if isinstance(dt, dt_time): + dt = datetime.combine(_utc_now().date(), dt) + if dt.tzinfo is None: + return dt.replace(tzinfo=timezone.utc) + return dt.astimezone(timezone.utc) + + +def _safe_upper(s: Optional[str]) -> Optional[str]: + return s.upper() if isinstance(s, str) else s + + +class DatabaseManager: + """Manages TimescaleDB operations with connection pooling""" + + def __init__(self): + self.pool: Optional[asyncpg.Pool] = None + self.logger = logging.getLogger(__name__) + self._connection_semaphore: Optional[asyncio.Semaphore] = None + + # Database connection parameters + self.db_config = { + 'host': os.getenv('DB_HOST', 'localhost'), + 'port': int(os.getenv('DB_PORT', 5432)), + 'database': os.getenv('DB_NAME', 'crypto_trading'), + 'user': os.getenv('DB_USER', 'postgres'), + 'password': os.getenv('DB_PASSWORD', 'password'), + 'min_size': int(os.getenv('DB_POOL_MIN_SIZE', 10)), + 'max_size': int(os.getenv('DB_POOL_MAX_SIZE', 50)), + 'command_timeout': int(os.getenv('DB_COMMAND_TIMEOUT', 60)), + } + + async def initialize(self): + """Initialize database connection pool and create tables""" + try: + self.logger.info("Initializing database connection pool") + self.pool = await asyncpg.create_pool( + host=self.db_config['host'], + port=self.db_config['port'], + database=self.db_config['database'], + user=self.db_config['user'], + password=self.db_config['password'], + min_size=self.db_config['min_size'], + max_size=self.db_config['max_size'], + command_timeout=self.db_config['command_timeout'], + ) + + # Initialize semaphore to prevent connection exhaustion (20% headroom) + max_concurrent = max(1, int(self.db_config['max_size'] * 0.8)) + self._connection_semaphore = asyncio.Semaphore(max_concurrent) + self.logger.info(f"Database connection pool created successfully (max concurrent: {max_concurrent})") + + # Create tables and hypertables + await self.create_schema() + self.logger.info("Database initialization complete") + except Exception as e: + self.logger.error(f"Database initialization failed: {e}", exc_info=True) + raise + + @asynccontextmanager + async def acquire_with_semaphore(self): + """Acquire connection with semaphore to prevent pool exhaustion""" + if self._connection_semaphore is None or self.pool is None: + raise RuntimeError("DatabaseManager not initialized") + async with self._connection_semaphore: + async with self.pool.acquire() as conn: + yield conn + + async def create_schema(self): + """Create database schema with TimescaleDB hypertables""" + if not self.pool: + raise RuntimeError("Pool not initialized") + + async with self.pool.acquire() as conn: + try: + # Try to enable TimescaleDB extension + try: + await conn.execute("CREATE EXTENSION IF NOT EXISTS timescaledb CASCADE;") + timescale_ok = True + except Exception as e: + self.logger.warning(f"TimescaleDB extension not available or failed to enable: {e}") + timescale_ok = False + + # Create tables + await conn.execute(""" + CREATE TABLE IF NOT EXISTS crypto_ticks ( + time TIMESTAMPTZ NOT NULL, + symbol TEXT NOT NULL, + exchange TEXT NOT NULL DEFAULT 'binance', + price DECIMAL(20,8) NOT NULL, + quantity DECIMAL(20,8) NOT NULL, + trade_id BIGINT, + is_buyer_maker BOOLEAN, + PRIMARY KEY (time, symbol, trade_id) + ); + """) + + await conn.execute(""" + CREATE TABLE IF NOT EXISTS crypto_ohlcv ( + time TIMESTAMPTZ NOT NULL, + symbol TEXT NOT NULL, + exchange TEXT NOT NULL DEFAULT 'binance', + interval TEXT NOT NULL, + open_price DECIMAL(20,8) NOT NULL, + high_price DECIMAL(20,8) NOT NULL, + low_price DECIMAL(20,8) NOT NULL, + close_price DECIMAL(20,8) NOT NULL, + volume DECIMAL(20,8) NOT NULL, + quote_volume DECIMAL(20,8), + trade_count INTEGER, + PRIMARY KEY (time, symbol, interval) + ); + """) + + await conn.execute(""" + CREATE TABLE IF NOT EXISTS technical_indicators ( + time TIMESTAMPTZ NOT NULL, + symbol TEXT NOT NULL, + exchange TEXT NOT NULL DEFAULT 'binance', + interval TEXT NOT NULL, + indicator_name TEXT NOT NULL, + indicator_value DECIMAL(20,8), + metadata JSONB, + PRIMARY KEY (time, symbol, interval, indicator_name) + ); + """) + + # Create hypertables + if timescale_ok: + try: + await conn.execute("SELECT create_hypertable('crypto_ticks', 'time', if_not_exists => TRUE);") + except asyncpg.PostgresError as e: + self.logger.debug(f"crypto_ticks hypertable setup note: {e}") + + try: + await conn.execute("SELECT create_hypertable('crypto_ohlcv', 'time', if_not_exists => TRUE);") + except asyncpg.PostgresError as e: + self.logger.debug(f"crypto_ohlcv hypertable setup note: {e}") + + try: + await conn.execute("SELECT create_hypertable('technical_indicators', 'time', if_not_exists => TRUE);") + except asyncpg.PostgresError as e: + self.logger.debug(f"technical_indicators hypertable setup note: {e}") + + # Create indexes for better query performance + await self.create_indexes(conn) + + # Setup compression policies when possible + if timescale_ok: + await self.setup_compression_policies(conn) + else: + self.logger.info("Skipping compression policies because TimescaleDB extension is unavailable") + + self.logger.info("Database schema created successfully") + except Exception as e: + self.logger.error(f"Error creating database schema: {e}", exc_info=True) + raise + + async def create_indexes(self, conn: asyncpg.Connection): + """Create indexes for better query performance""" + index_sqls = [ + # Ticks indexes + "CREATE INDEX IF NOT EXISTS idx_crypto_ticks_symbol_time ON crypto_ticks (symbol, time DESC);", + "CREATE INDEX IF NOT EXISTS idx_crypto_ticks_time_symbol ON crypto_ticks (time DESC, symbol);", + # OHLCV indexes + "CREATE INDEX IF NOT EXISTS idx_crypto_ohlcv_symbol_interval_time ON crypto_ohlcv (symbol, interval, time DESC);", + "CREATE INDEX IF NOT EXISTS idx_crypto_ohlcv_time_symbol ON crypto_ohlcv (time DESC, symbol);", + # Indicators indexes + "CREATE INDEX IF NOT EXISTS idx_technical_indicators_symbol_indicator_time ON technical_indicators (symbol, indicator_name, time DESC);", + "CREATE INDEX IF NOT EXISTS idx_technical_indicators_time_symbol ON technical_indicators (time DESC, symbol);", + ] + for sql in index_sqls: + try: + await conn.execute(sql) + except Exception as e: + self.logger.warning(f"Index creation warning: {e}") + + async def setup_compression_policies(self, conn: asyncpg.Connection): + """Setup TimescaleDB compression policies""" + try: + compression_alters = [ + """ + ALTER TABLE crypto_ticks SET ( + timescaledb.compress, + timescaledb.compress_segmentby = 'symbol,exchange', + timescaledb.compress_orderby = 'time DESC' + ); + """, + """ + ALTER TABLE crypto_ohlcv SET ( + timescaledb.compress, + timescaledb.compress_segmentby = 'symbol,exchange,interval', + timescaledb.compress_orderby = 'time DESC' + ); + """, + """ + ALTER TABLE technical_indicators SET ( + timescaledb.compress, + timescaledb.compress_segmentby = 'symbol,exchange,interval,indicator_name', + timescaledb.compress_orderby = 'time DESC' + ); + """, + ] + for q in compression_alters: + try: + await conn.execute(q) + except Exception as e: + self.logger.warning(f"Compression setup warning: {e}") + + policies = [ + "SELECT add_compression_policy('crypto_ticks', INTERVAL '7 days');", + "SELECT add_compression_policy('crypto_ohlcv', INTERVAL '7 days');", + "SELECT add_compression_policy('technical_indicators', INTERVAL '7 days');", + ] + for q in policies: + try: + await conn.execute(q) + except Exception as e: + self.logger.warning(f"Compression policy warning: {e}") + except Exception as e: + self.logger.warning(f"Compression setup failed: {e}") + + # -------------------------- + # Insertion methods + # -------------------------- + + async def insert_tick_single(self, tick_data: Dict[str, Any]): + """Insert single tick record""" + if not self.pool: + raise RuntimeError("Pool not initialized") + + tick_time = _ensure_dt_aware_utc(tick_data['time']) + symbol = _safe_upper(tick_data['symbol']) + exchange = tick_data.get('exchange', 'binance') + + async with self.pool.acquire() as conn: + await conn.execute( + """ + INSERT INTO crypto_ticks + (time, symbol, exchange, price, quantity, trade_id, is_buyer_maker) + VALUES ($1, $2, $3, $4, $5, $6, $7) + ON CONFLICT (time, symbol, trade_id) DO NOTHING; + """, + tick_time, symbol, exchange, + tick_data['price'], tick_data['quantity'], tick_data['trade_id'], + tick_data.get('is_buyer_maker', None), + ) + + async def insert_ticks_batch(self, ticks_data: List[Dict[str, Any]]): + """Insert multiple tick records in batch""" + if not ticks_data or not self.pool: + return + + records: List[Tuple[Any, ...]] = [] + for t in ticks_data: + tick_time = _ensure_dt_aware_utc(t['time']) + symbol = _safe_upper(t['symbol']) + exchange = t.get('exchange', 'binance') + records.append(( + tick_time, symbol, exchange, t['price'], t['quantity'], + t['trade_id'], t.get('is_buyer_maker', None) + )) + + async with self.pool.acquire() as conn: + await conn.executemany( + """ + INSERT INTO crypto_ticks + (time, symbol, exchange, price, quantity, trade_id, is_buyer_maker) + VALUES ($1, $2, $3, $4, $5, $6, $7) + ON CONFLICT (time, symbol, trade_id) DO NOTHING; + """, + records + ) + self.logger.debug(f"Inserted {len(records)} tick records") + + async def insert_ohlcv_single(self, ohlcv_data: Dict[str, Any]): + """Insert single OHLCV record""" + if not self.pool: + raise RuntimeError("Pool not initialized") + + ts = _ensure_dt_aware_utc(ohlcv_data['time']) + symbol = _safe_upper(ohlcv_data['symbol']) + exchange = ohlcv_data.get('exchange', 'binance') + interval = ohlcv_data['interval'] + + async with self.pool.acquire() as conn: + await conn.execute( + """ + INSERT INTO crypto_ohlcv + (time, symbol, exchange, interval, open_price, high_price, + low_price, close_price, volume, quote_volume, trade_count) + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11) + ON CONFLICT (time, symbol, interval) DO UPDATE SET + open_price = EXCLUDED.open_price, + high_price = EXCLUDED.high_price, + low_price = EXCLUDED.low_price, + close_price = EXCLUDED.close_price, + volume = EXCLUDED.volume, + quote_volume = EXCLUDED.quote_volume, + trade_count = EXCLUDED.trade_count; + """, + ts, symbol, exchange, interval, + ohlcv_data['open_price'], ohlcv_data['high_price'], + ohlcv_data['low_price'], ohlcv_data['close_price'], + ohlcv_data['volume'], ohlcv_data.get('quote_volume'), + ohlcv_data.get('trade_count'), + ) + + async def insert_ohlcv_batch(self, ohlcv_data: List[Dict[str, Any]]): + """Insert multiple OHLCV records in batch""" + if not ohlcv_data or not self.pool: + return + + records: List[Tuple[Any, ...]] = [] + for c in ohlcv_data: + ts = _ensure_dt_aware_utc(c['time']) + symbol = _safe_upper(c['symbol']) + exchange = c.get('exchange', 'binance') + interval = c['interval'] + records.append(( + ts, symbol, exchange, interval, + c['open_price'], c['high_price'], c['low_price'], c['close_price'], + c['volume'], c.get('quote_volume'), c.get('trade_count') + )) + + async with self.pool.acquire() as conn: + await conn.executemany( + """ + INSERT INTO crypto_ohlcv + (time, symbol, exchange, interval, open_price, high_price, + low_price, close_price, volume, quote_volume, trade_count) + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11) + ON CONFLICT (time, symbol, interval) DO UPDATE SET + open_price = EXCLUDED.open_price, + high_price = EXCLUDED.high_price, + low_price = EXCLUDED.low_price, + close_price = EXCLUDED.close_price, + volume = EXCLUDED.volume, + quote_volume = EXCLUDED.quote_volume, + trade_count = EXCLUDED.trade_count; + """, + records + ) + self.logger.debug(f"Inserted {len(records)} OHLCV records") + + async def insert_indicators_batch(self, symbol: str, interval: str, indicators_data: List[Dict[str, Any]]): + """Insert technical indicators batch""" + if not indicators_data or not self.pool: + return + + symbol_u = _safe_upper(symbol) + records: List[Tuple[Any, ...]] = [] + for ind in indicators_data: + ts = _ensure_dt_aware_utc(ind['time']) + records.append(( + ts, symbol_u, 'binance', interval, + ind['indicator_name'], ind.get('indicator_value'), + ind.get('metadata'), + )) + + async with self.pool.acquire() as conn: + await conn.executemany( + """ + INSERT INTO technical_indicators + (time, symbol, exchange, interval, indicator_name, indicator_value, metadata) + VALUES ($1,$2,$3,$4,$5,$6,$7) + ON CONFLICT (time, symbol, interval, indicator_name) DO UPDATE SET + indicator_value = EXCLUDED.indicator_value, + metadata = EXCLUDED.metadata; + """, + records + ) + self.logger.debug(f"Inserted {len(records)} indicator records for {symbol_u} {interval}") + + # -------------------------- + # Retrieval methods + # -------------------------- + + async def get_tick_data(self, symbol: str, start_time: datetime, end_time: datetime) -> List[Dict[str, Any]]: + """Get tick data for a symbol within time range""" + if not self.pool: + return [] + symbol_u = _safe_upper(symbol) + start_t = _ensure_dt_aware_utc(start_time) + end_t = _ensure_dt_aware_utc(end_time) + async with self.pool.acquire() as conn: + rows = await conn.fetch( + """ + SELECT time, symbol, exchange, price, quantity, trade_id, is_buyer_maker + FROM crypto_ticks + WHERE symbol = $1 AND time >= $2 AND time <= $3 + ORDER BY time ASC; + """, + symbol_u, start_t, end_t + ) + return [dict(row) for row in rows] + + async def get_ohlcv_data(self, symbol: str, interval: str, limit: int = 1000) -> List[Dict[str, Any]]: + """Get OHLCV data for a symbol and interval (returns newest first)""" + if not self.pool: + return [] + symbol_u = _safe_upper(symbol) + async with self.pool.acquire() as conn: + rows = await conn.fetch( + """ + SELECT time, symbol, exchange, interval, open_price, high_price, low_price, + close_price, volume, quote_volume, trade_count + FROM crypto_ohlcv + WHERE symbol = $1 AND interval = $2 + ORDER BY time DESC + LIMIT $3; + """, + symbol_u, interval, limit + ) + return [dict(row) for row in rows] + + async def get_recent_candles(self, symbol: str, interval: str, limit: int = 1000) -> List[Dict[str, Any]]: + """ + Get recent candles for a symbol and interval (alias for chart display) + Returns data in ASCENDING order with JS-friendly field names + """ + if not self.pool: + return [] + symbol_u = _safe_upper(symbol) + async with self.pool.acquire() as conn: + rows = await conn.fetch( + """ + SELECT time, symbol, exchange, interval, open_price, high_price, low_price, + close_price, volume, quote_volume, trade_count + FROM crypto_ohlcv + WHERE symbol = $1 AND interval = $2 + ORDER BY time ASC + LIMIT $3; + """, + symbol_u, interval, limit + ) + + result: List[Dict[str, Any]] = [] + for row in rows: + t: Optional[datetime] = row['time'] + result.append({ + 'timestamp': t.isoformat() if t else None, + 'symbol': row['symbol'], + 'exchange': row['exchange'], + 'interval': row['interval'], + 'open': float(row['open_price']) if row['open_price'] is not None else None, + 'high': float(row['high_price']) if row['high_price'] is not None else None, + 'low': float(row['low_price']) if row['low_price'] is not None else None, + 'close': float(row['close_price']) if row['close_price'] is not None else None, + 'volume': float(row['volume']) if row['volume'] is not None else None, + 'quote_volume': float(row['quote_volume']) if row['quote_volume'] is not None else None, + 'trade_count': int(row['trade_count']) if row['trade_count'] is not None else None, + }) + self.logger.info(f"Retrieved {len(result)} candles for {symbol_u} {interval}") + return result + + async def get_latest_ohlcv(self, symbol: str, interval: str = '1d') -> Optional[Dict[str, Any]]: + """Get latest OHLCV record for a symbol""" + if not self.pool: + return None + symbol_u = _safe_upper(symbol) + async with self.pool.acquire() as conn: + row = await conn.fetchrow( + """ + SELECT time, symbol, exchange, interval, open_price, high_price, low_price, + close_price, volume, quote_volume, trade_count + FROM crypto_ohlcv + WHERE symbol = $1 AND interval = $2 + ORDER BY time DESC + LIMIT 1; + """, + symbol_u, interval + ) + return dict(row) if row else None + + async def get_indicators_data(self, symbol: str, interval: str, indicator_name: str, + limit: int = 1000) -> List[Dict[str, Any]]: + """Get technical indicator data""" + if not self.pool: + return [] + symbol_u = _safe_upper(symbol) + async with self.pool.acquire() as conn: + rows = await conn.fetch( + """ + SELECT time, symbol, exchange, interval, indicator_name, indicator_value, metadata + FROM technical_indicators + WHERE symbol = $1 AND interval = $2 AND indicator_name = $3 + ORDER BY time DESC + LIMIT $4; + """, + symbol_u, interval, indicator_name, limit + ) + return [dict(row) for row in rows] + + async def get_available_symbols(self) -> List[str]: + """Get list of all available symbols in the database""" + if not self.pool: + return [] + async with self.pool.acquire() as conn: + rows = await conn.fetch( + """ + SELECT DISTINCT symbol FROM crypto_ohlcv ORDER BY symbol ASC; + """ + ) + return [row['symbol'] for row in rows] + + async def get_current_price_and_trends(self, symbol: str) -> Optional[Dict[str, Any]]: + """ + Get current price and percentage changes for multiple timeframes + Returns price trends for 15m, 1h, 1d, 1w + """ + if not self.pool: + return None + symbol_u = _safe_upper(symbol) + now = _utc_now() + time_15m_ago = now - timedelta(minutes=15) + time_1h_ago = now - timedelta(hours=1) + time_1d_ago = now - timedelta(days=1) + time_1w_ago = now - timedelta(weeks=1) + async with self.pool.acquire() as conn: + try: + current = await conn.fetchrow( + """ + SELECT close_price, time + FROM crypto_ohlcv + WHERE symbol = $1 AND interval = '1h' + ORDER BY time DESC + LIMIT 1; + """, + symbol_u + ) + if not current: + return None + current_price = float(current['close_price']) + current_time = current['time'] + + price_15m = await conn.fetchrow( + """ + SELECT close_price FROM crypto_ohlcv + WHERE symbol = $1 AND interval = '1m' AND time <= $2 + ORDER BY time DESC LIMIT 1; + """, + symbol_u, time_15m_ago + ) + price_1h = await conn.fetchrow( + """ + SELECT close_price FROM crypto_ohlcv + WHERE symbol = $1 AND interval = '1h' AND time <= $2 + ORDER BY time DESC LIMIT 1; + """, + symbol_u, time_1h_ago + ) + price_1d = await conn.fetchrow( + """ + SELECT close_price FROM crypto_ohlcv + WHERE symbol = $1 AND interval = '1h' AND time <= $2 + ORDER BY time DESC LIMIT 1; + """, + symbol_u, time_1d_ago + ) + price_1w = await conn.fetchrow( + """ + SELECT close_price FROM crypto_ohlcv + WHERE symbol = $1 AND interval = '1d' AND time <= $2 + ORDER BY time DESC LIMIT 1; + """, + symbol_u, time_1w_ago + ) + + def calc_change(old): + if old and float(old) > 0: + return round(((current_price - float(old)) / float(old)) * 100, 2) + return 0.0 + + return { + 'symbol': symbol_u, + 'current_price': current_price, + 'last_update': current_time.isoformat(), + 'trends': { + '15m': calc_change(price_15m['close_price'] if price_15m else None), + '1h': calc_change(price_1h['close_price'] if price_1h else None), + '1d': calc_change(price_1d['close_price'] if price_1d else None), + '1w': calc_change(price_1w['close_price'] if price_1w else None), + }, + } + except Exception as e: + self.logger.error(f"Error getting price trends for {symbol_u}: {e}", exc_info=True) + return None + + # -------------------------- + # Gap and coverage utilities + # -------------------------- + + def _interval_to_seconds(self, interval: str) -> int: + """Convert interval string to seconds""" + try: + if interval.endswith('m'): + return int(interval[:-1]) * 60 + if interval.endswith('h'): + return int(interval[:-1]) * 3600 + if interval.endswith('d'): + return int(interval[:-1]) * 86400 + if interval.endswith('w'): + return int(interval[:-1]) * 604800 + except Exception: + pass + # Default to 1 minute + return 60 + + def _calculate_expected_records(self, start: datetime, end: datetime, interval: str) -> int: + """Calculate expected number of records for a time period""" + start_u = _ensure_dt_aware_utc(start) + end_u = _ensure_dt_aware_utc(end) + if end_u < start_u: + return 0 + total_seconds = (end_u - start_u).total_seconds() + interval_seconds = max(1, self._interval_to_seconds(interval)) + return int(total_seconds // interval_seconds) + 1 + + async def get_data_coverage_summary(self, symbol: str, interval: str) -> Dict[str, Any]: + """Get data coverage summary for a symbol and interval""" + if not self.pool: + return { + 'symbol': symbol, 'interval': interval, + 'first_record': None, 'last_record': None, + 'total_records': 0, 'expected_records': 0, + 'missing_records': 0, 'coverage_percent': 0.0, + } + symbol_u = _safe_upper(symbol) + async with self.pool.acquire() as conn: + result = await conn.fetchrow( + """ + SELECT MIN(time) as first_record, + MAX(time) as last_record, + COUNT(*) as total_records + FROM crypto_ohlcv + WHERE symbol = $1 AND interval = $2; + """, + symbol_u, interval + ) + + if not result or not result['first_record']: + return { + 'symbol': symbol_u, 'interval': interval, + 'first_record': None, 'last_record': None, + 'total_records': 0, 'expected_records': 0, + 'missing_records': 0, 'coverage_percent': 0.0, + } + + first_record: datetime = result['first_record'] + last_record: datetime = result['last_record'] + total_records: int = result['total_records'] + expected_records = self._calculate_expected_records(first_record, last_record, interval) + missing_records = max(0, expected_records - total_records) + coverage_percent = (total_records / expected_records * 100) if expected_records > 0 else 0.0 + + return { + 'symbol': symbol_u, + 'interval': interval, + 'first_record': first_record.isoformat(), + 'last_record': last_record.isoformat(), + 'total_records': total_records, + 'expected_records': expected_records, + 'missing_records': missing_records, + 'coverage_percent': round(coverage_percent, 2), + } + + async def get_existing_time_ranges(self, symbol: str, interval: str, + start_date: datetime, end_date: datetime) -> List[Dict[str, Any]]: + """ + Get existing time ranges for a symbol and interval within a date range. + Returns list of existing data blocks to help identify gaps. + """ + if not self.pool: + return [] + symbol_u = _safe_upper(symbol) + start_u = _ensure_dt_aware_utc(start_date) + end_u = _ensure_dt_aware_utc(end_date) + interval_seconds = self._interval_to_seconds(interval) + + async with self.pool.acquire() as conn: + rows = await conn.fetch( + """ + WITH ordered AS ( + SELECT time, + LAG(time) OVER (ORDER BY time) AS prev_time + FROM crypto_ohlcv + WHERE symbol = $1 AND interval = $2 + AND time >= $3 AND time <= $4 + ORDER BY time + ), + markers AS ( + SELECT time, + CASE + WHEN prev_time IS NULL OR EXTRACT(EPOCH FROM (time - prev_time)) > $5 + THEN 1 ELSE 0 + END AS is_new_block + FROM ordered + ), + blocks AS ( + SELECT time, + SUM(is_new_block) OVER (ORDER BY time) AS block_id + FROM markers + ) + SELECT MIN(time) AS block_start, MAX(time) AS block_end + FROM blocks + GROUP BY block_id + ORDER BY block_start; + """, + symbol_u, interval, start_u, end_u, interval_seconds + 1 + ) + + ranges: List[Dict[str, Any]] = [] + for row in rows: + ranges.append({'start': row['block_start'], 'end': row['block_end']}) + return ranges + + async def get_missing_time_ranges(self, symbol: str, interval: str, + start_date: datetime, end_date: datetime) -> List[Dict[str, Any]]: + """Calculate missing time ranges that need to be downloaded.""" + start_u = _ensure_dt_aware_utc(start_date) + end_u = _ensure_dt_aware_utc(end_date) + if end_u <= start_u: + return [] + + existing_blocks = await self.get_existing_time_ranges(symbol, interval, start_u, end_u) + if not existing_blocks: + return [{'start': start_u, 'end': end_u}] + + missing: List[Dict[str, Any]] = [] + current = start_u + for block in existing_blocks: + block_start: datetime = _ensure_dt_aware_utc(block['start']) + block_end: datetime = _ensure_dt_aware_utc(block['end']) + if current < block_start: + missing.append({'start': current, 'end': block_start}) + current = max(current, block_end + timedelta(microseconds=1)) + if current < end_u: + missing.append({'start': current, 'end': end_u}) + return missing + + async def check_data_exists_for_range(self, symbol: str, interval: str, + start_date: datetime, end_date: datetime) -> Dict[str, Any]: + """ + Quick check if data exists for a specific range. + Returns: + Dict with 'exists', 'count', 'expected_count', 'coverage_percent', 'is_complete' + """ + if not self.pool: + return {'exists': False, 'count': 0, 'expected_count': 0, 'coverage_percent': 0.0, 'is_complete': False} + + symbol_u = _safe_upper(symbol) + start_u = _ensure_dt_aware_utc(start_date) + end_u = _ensure_dt_aware_utc(end_date) + async with self.pool.acquire() as conn: + result = await conn.fetchrow( + """ + SELECT COUNT(*) as count + FROM crypto_ohlcv + WHERE symbol = $1 AND interval = $2 + AND time >= $3 AND time <= $4; + """, + symbol_u, interval, start_u, end_u + ) + count = int(result['count']) if result else 0 + expected_count = self._calculate_expected_records(start_u, end_u, interval) + coverage_percent = (count / expected_count * 100) if expected_count > 0 else 0.0 + return { + 'exists': count > 0, + 'count': count, + 'expected_count': expected_count, + 'coverage_percent': round(coverage_percent, 2), + 'is_complete': coverage_percent >= 99.0, + } + + async def find_data_gaps(self, symbol: str, interval: str, min_gap_size: int = 2) -> List[Dict[str, Any]]: + """Find gaps in data (missing consecutive candles)""" + if not self.pool: + return [] + symbol_u = _safe_upper(symbol) + interval_seconds = self._interval_to_seconds(interval) + threshold = interval_seconds * max(1, min_gap_size) + + async with self.pool.acquire() as conn: + rows = await conn.fetch( + """ + WITH s AS ( + SELECT time, LAG(time) OVER (ORDER BY time) AS prev_time + FROM crypto_ohlcv + WHERE symbol = $1 AND interval = $2 + ORDER BY time + ) + SELECT + prev_time AS gap_start, + time AS gap_end, + EXTRACT(EPOCH FROM (time - prev_time)) / $3 AS missing_candles + FROM s + WHERE prev_time IS NOT NULL + AND EXTRACT(EPOCH FROM (time - prev_time)) > $4 + ORDER BY prev_time; + """, + symbol_u, interval, interval_seconds, threshold + ) + + gaps: List[Dict[str, Any]] = [] + for row in rows: + start_dt: datetime = row['gap_start'] + end_dt: datetime = row['gap_end'] + missing = max(0, int(row['missing_candles']) - 1) + gaps.append({ + 'gap_start': start_dt.isoformat(), + 'gap_end': end_dt.isoformat(), + 'missing_candles': missing, + 'duration_hours': round((end_dt - start_dt).total_seconds() / 3600, 2), + }) + return gaps + + async def detect_gaps(self, symbol: str, interval: str) -> Dict[str, Any]: + """Detect data gaps for a symbol and interval""" + coverage = await self.get_data_coverage_summary(symbol, interval) + gaps = await self.find_data_gaps(symbol, interval, min_gap_size=2) + return {'coverage': coverage, 'gaps': gaps} + + async def get_data_coverage_by_day(self, symbol: str, interval: str, + start_date: datetime, end_date: datetime) -> Dict[str, Any]: + """Get daily coverage statistics for a symbol/interval""" + if not self.pool: + return {'daily_coverage': []} + symbol_u = _safe_upper(symbol) + start_u = _ensure_dt_aware_utc(start_date) + end_u = _ensure_dt_aware_utc(end_date) + try: + async with self.pool.acquire() as conn: + interval_seconds = self._interval_to_seconds(interval) + records_per_day = max(1, 86400 // max(1, interval_seconds)) + rows = await conn.fetch( + """ + SELECT DATE(time) as date, + COUNT(*) as actual_records, + $3 as expected_records, + ROUND((COUNT(*)::decimal / $3) * 100, 2) as coverage_percent + FROM crypto_ohlcv + WHERE symbol = $1 + AND interval = $2 + AND time >= $4 + AND time <= $5 + GROUP BY DATE(time) + ORDER BY DATE(time) ASC; + """, + symbol_u, interval, records_per_day, start_u, end_u + ) + daily: List[Dict[str, Any]] = [] + for row in rows: + coverage_pct = float(row['coverage_percent']) + status = 'complete' if coverage_pct >= 95 else ('partial' if coverage_pct >= 50 else 'empty') + # row['date'] is a date; serialize to ISO date + d: date = row['date'] + daily.append({ + 'date': d.isoformat(), + 'actual_records': int(row['actual_records']), + 'expected_records': int(row['expected_records']), + 'coverage_percent': coverage_pct, + 'status': status, + }) + return { + 'symbol': symbol_u, + 'interval': interval, + 'start_date': start_u.isoformat(), + 'end_date': end_u.isoformat(), + 'daily_coverage': daily, + } + except Exception as e: + self.logger.error(f"Error getting daily coverage: {e}", exc_info=True) + return {'daily_coverage': []} + + async def get_all_pairs_gap_status(self) -> List[Dict[str, Any]]: + """ + Get gap status for all trading pairs across all intervals + Returns comprehensive status for the monitoring UI + """ + from utils import load_config # local import to avoid circular imports + config = load_config() + intervals = config.get('collection', {}).get('candle_intervals', ['1m', '5m', '15m', '1h', '4h', '1d']) + results: List[Dict[str, Any]] = [] + + for pair in config.get('trading_pairs', []): + symbol = pair['symbol'] + record_from_date = pair.get('record_from_date') or \ + config.get('collection', {}).get('default_record_from_date', '2020-01-01T00:00:00Z') + for interval in intervals: + try: + gap_info = await self.detect_gaps(symbol, interval) + coverage = gap_info.get('coverage', {}) + gaps = gap_info.get('gaps', []) + + if len(gaps) == 0: + status = 'complete' + elif any(g['missing_candles'] > 100 for g in gaps): + status = 'filling' + elif coverage.get('total_records', 0) == 0: + status = 'empty' + else: + status = 'has_gaps' + + fillable_gaps = [g for g in gaps if g['missing_candles'] <= 100] + results.append({ + 'symbol': symbol, + 'interval': interval, + 'status': status, + 'total_gaps': len(gaps), + 'fillable_gaps': len(fillable_gaps), + 'coverage_percent': coverage.get('coverage_percent', 0), + 'start_date': record_from_date, + 'first_record': coverage.get('first_record'), + 'last_record': coverage.get('last_record'), + 'total_records': coverage.get('total_records', 0), + 'expected_records': coverage.get('expected_records', 0), + 'missing_records': coverage.get('missing_records', 0), + 'gaps': gaps, + }) + except Exception as e: + self.logger.error(f"Error getting status for {symbol} {interval}: {e}") + continue + return results + + async def fill_gaps_intelligently(self, symbol: str, interval: str, max_attempts: int = 3) -> Dict[str, Any]: + """ + Intelligently fill gaps with retry logic and averaging for unfillable gaps + """ + from binance.client import Client # local import + self.logger.info(f"Starting intelligent gap fill for {symbol} {interval}") + gap_info = await self.detect_gaps(symbol, interval) + gaps = gap_info.get('gaps', []) + if not gaps: + return {'status': 'success', 'message': 'No gaps found', 'gaps_filled': 0, 'averaged_candles': 0} + + api_key = os.getenv('BINANCE_API_KEY') + secret_key = os.getenv('BINANCE_SECRET_KEY') + client = Client(api_key, secret_key) if api_key and secret_key else Client() + + gaps_filled = 0 + averaged_candles = 0 + failed_gaps: List[Dict[str, Any]] = [] + + for gap in gaps: + gap_start = _ensure_dt_aware_utc(datetime.fromisoformat(gap['gap_start'])) + gap_end = _ensure_dt_aware_utc(datetime.fromisoformat(gap['gap_end'])) + missing_candles = gap['missing_candles'] + if missing_candles > 1000: + self.logger.info(f"Skipping large gap: {missing_candles} candles") + continue + + filled = False + for attempt in range(max_attempts): + try: + self.logger.info(f"Attempt {attempt + 1}/{max_attempts} to fill gap: {gap_start} to {gap_end}") + klines = client.get_historical_klines( + symbol=symbol, + interval=interval, + start_str=int(gap_start.timestamp() * 1000), + end_str=int(gap_end.timestamp() * 1000), + limit=1000 + ) + if klines and len(klines) > 0: + # Normalize to parse_kline_data shape + from utils import parse_kline_data + ohlcv_batch: List[Dict[str, Any]] = [] + for k in klines: + try: + ws_like = { + 'e': 'kline', + 'E': int(k[6]), + 's': _safe_upper(symbol), + 'k': { + 't': int(k[0]), + 'T': int(k[6]), + 's': _safe_upper(symbol), + 'i': interval, + 'o': str(k[1]), + 'h': str(k[2]), + 'l': str(k[3]), + 'c': str(k[4]), + 'v': str(k[5]), + 'q': str(k[7]), + 'n': int(k[8]), + 'x': True, + }, + } + parsed = parse_kline_data(ws_like) + ohlcv_batch.append(parsed) + except Exception as pe: + self.logger.error(f"Error parsing kline: {pe}") + continue + if ohlcv_batch: + await self.insert_ohlcv_batch(ohlcv_batch) + gaps_filled += 1 + filled = True + self.logger.info(f"Successfully filled gap with {len(ohlcv_batch)} records") + break + else: + self.logger.warning(f"No parsed data produced for gap {gap_start} to {gap_end}") + else: + self.logger.warning(f"No data returned from Binance for gap {gap_start} to {gap_end}") + except Exception as e: + self.logger.error(f"Attempt {attempt + 1} failed: {e}") + if attempt < max_attempts - 1: + await asyncio.sleep(2 ** attempt) + + if not filled and missing_candles <= 5: + try: + self.logger.info(f"Using intelligent averaging for small gap: {missing_candles} candles") + averaged = await self._fill_gap_with_averaging(symbol, interval, gap_start, gap_end, missing_candles) + averaged_candles += averaged + if averaged > 0: + gaps_filled += 1 + except Exception as e: + self.logger.error(f"Error averaging gap: {e}") + failed_gaps.append(gap) + elif not filled: + failed_gaps.append(gap) + + return { + 'status': 'success', + 'message': f'Filled {gaps_filled} gaps ({averaged_candles} via averaging)', + 'gaps_filled': gaps_filled, + 'averaged_candles': averaged_candles, + 'failed_gaps': len(failed_gaps), + 'total_gaps': len(gaps), + } + + async def get_prioritized_gaps(self, symbol: str, interval: str) -> List[Dict[str, Any]]: + """ + Get gaps prioritized by importance (recent gaps first, then by size) + This helps fill the most critical gaps first + """ + gaps = (await self.detect_gaps(symbol, interval)).get('gaps', []) + if not gaps: + return [] + now = _utc_now() + for g in gaps: + gap_end = datetime.fromisoformat(g['gap_end']) + gap_end = _ensure_dt_aware_utc(gap_end) + days_old = (now - gap_end).days + recency_score = max(0, 365 - days_old) / 365 * 100.0 + size_score = min(100.0, 100.0 / max(1, g['missing_candles'])) + fillable_bonus = 50.0 if g['missing_candles'] <= 100 else 0.0 + g['priority_score'] = recency_score + size_score + fillable_bonus + g['days_old'] = days_old + gaps.sort(key=lambda x: x['priority_score'], reverse=True) + return gaps + + async def _fill_gap_with_averaging(self, symbol: str, interval: str, + gap_start: datetime, gap_end: datetime, missing_candles: int) -> int: + """Fill a gap using intelligent averaging based on surrounding candles""" + if not self.pool: + return 0 + symbol_u = _safe_upper(symbol) + start_u = _ensure_dt_aware_utc(gap_start) + end_u = _ensure_dt_aware_utc(gap_end) + + async with self.pool.acquire() as conn: + try: + lookback = 10 + before = await conn.fetch( + """ + SELECT open_price, high_price, low_price, close_price, volume + FROM crypto_ohlcv + WHERE symbol = $1 AND interval = $2 AND time < $3 + ORDER BY time DESC LIMIT $4; + """, + symbol_u, interval, start_u, lookback + ) + after = await conn.fetch( + """ + SELECT open_price, high_price, low_price, close_price, volume + FROM crypto_ohlcv + WHERE symbol = $1 AND interval = $2 AND time > $3 + ORDER BY time ASC LIMIT $4; + """, + symbol_u, interval, end_u, lookback + ) + if not before or not after: + self.logger.warning("Not enough surrounding data for averaging") + return 0 + + avg_open = sum(float(c['open_price']) for c in before) / len(before) + avg_high = sum(float(c['high_price']) for c in before) / len(before) + avg_low = sum(float(c['low_price']) for c in before) / len(before) + avg_close = sum(float(c['close_price']) for c in before) / len(before) + avg_volume = sum(float(c['volume']) for c in before) / len(before) + + interval_seconds = self._interval_to_seconds(interval) + current_time = start_u + gen: List[Dict[str, Any]] = [] + + import random + while current_time < end_u: + variation = random.uniform(0.999, 1.001) + gen.append({ + "time": current_time, + "symbol": symbol_u, + "exchange": "binance", + "interval": interval, + "open_price": avg_open * variation, + "high_price": avg_high * variation * 1.001, + "low_price": avg_low * variation * 0.999, + "close_price": avg_close * variation, + "volume": avg_volume * variation, + "quote_volume": None, + "trade_count": None, + }) + current_time = current_time + timedelta(seconds=interval_seconds) + if current_time >= end_u: + break + + if gen: + await self.insert_ohlcv_batch(gen) + self.logger.info(f"Inserted {len(gen)} averaged candles") + return len(gen) + return 0 + except Exception as e: + self.logger.error(f"Error in averaging: {e}", exc_info=True) + return 0 + + async def check_data_health(self, symbol: str, interval: str) -> Dict[str, Any]: + """ + Comprehensive health check for a trading pair's data + Detects various data quality issues beyond just gaps + """ + coverage = await self.get_data_coverage_summary(symbol, interval) + gaps = await self.find_data_gaps(symbol, interval) + health_issues: List[Dict[str, Any]] = [] + + # 1. Stale data + last_record_iso = coverage.get('last_record') + if last_record_iso: + last_record = _ensure_dt_aware_utc(datetime.fromisoformat(last_record_iso)) + hours_since = (_utc_now() - last_record).total_seconds() / 3600 + if hours_since > 24: + health_issues.append({'severity': 'high', 'issue': 'stale_data', + 'message': f'No data in last {hours_since:.1f} hours'}) + + # 2. Excessive gaps + if len(gaps) > 10: + health_issues.append({'severity': 'medium', 'issue': 'fragmented_data', + 'message': f'{len(gaps)} gaps detected - data is fragmented'}) + + # 3. Low coverage + coverage_pct = coverage.get('coverage_percent', 0.0) or 0.0 + if coverage_pct < 50: + health_issues.append({'severity': 'high', 'issue': 'low_coverage', + 'message': f'Only {coverage_pct:.1f}% data coverage'}) + elif coverage_pct < 80: + health_issues.append({'severity': 'medium', 'issue': 'medium_coverage', + 'message': f'{coverage_pct:.1f}% data coverage - could be improved'}) + + # 4. Zero-volume candles + zero_vol_count = 0 + if self.pool: + async with self.pool.acquire() as conn: + zero_vol_count = await conn.fetchval( + """ + SELECT COUNT(*) FROM crypto_ohlcv + WHERE symbol = $1 AND interval = $2 AND volume = 0; + """, + _safe_upper(symbol), interval + ) + if zero_vol_count and zero_vol_count > 0: + health_issues.append({'severity': 'low', 'issue': 'zero_volume_candles', + 'message': f'{zero_vol_count} candles with zero volume detected'}) + + # Score + health_score = 100 + for issue in health_issues: + if issue['severity'] == 'high': + health_score -= 30 + elif issue['severity'] == 'medium': + health_score -= 15 + elif issue['severity'] == 'low': + health_score -= 5 + health_score = max(0, health_score) + + if health_score >= 90: + status = 'excellent' + elif health_score >= 70: + status = 'good' + elif health_score >= 50: + status = 'fair' + else: + status = 'poor' + + return { + 'symbol': _safe_upper(symbol), + 'interval': interval, + 'health_score': health_score, + 'status': status, + 'issues': health_issues, + 'coverage_percent': coverage_pct, + 'total_gaps': len(gaps), + 'last_check': _utc_now().isoformat(), + } + + async def get_detailed_statistics(self) -> Dict[str, Any]: + """Get detailed database statistics for all symbols""" + if not self.pool: + return {'overall': {}, 'symbols': []} + async with self.pool.acquire() as conn: + overall = await conn.fetchrow( + """ + SELECT + COUNT(DISTINCT symbol) as total_symbols, + COUNT(*) as total_candles, + MIN(time) as first_record, + MAX(time) as last_record + FROM crypto_ohlcv; + """ + ) + symbols = await conn.fetch( + """ + SELECT + symbol, + COUNT(DISTINCT interval) as intervals_count, + COUNT(*) as total_candles, + MIN(time) as first_record, + MAX(time) as last_record + FROM crypto_ohlcv + GROUP BY symbol + ORDER BY symbol; + """ + ) + return {'overall': dict(overall) if overall else {}, 'symbols': [dict(r) for r in symbols]} + + async def get_gap_fill_progress(self, symbol: str, interval: str) -> Dict[str, Any]: + """ + Get real-time progress of gap filling operations + """ + current = await self.get_data_coverage_summary(symbol, interval) + missing = current.get('missing_records', 0) + coverage = current.get('coverage_percent', 0.0) + avg_fill_rate = 100.0 # candles per minute (tunable) + estimated_minutes = (missing / avg_fill_rate) if avg_fill_rate > 0 else 0.0 + return { + 'symbol': _safe_upper(symbol), + 'interval': interval, + 'current_coverage': coverage, + 'missing_records': missing, + 'estimated_time_minutes': round(estimated_minutes, 1), + 'estimated_time_human': self._format_duration(estimated_minutes), + 'status': 'complete' if coverage >= 99.9 else 'in_progress', + } + + def _format_duration(self, minutes: float) -> str: + """Convert minutes to human-readable format""" + if minutes < 1: + return f"{int(minutes * 60)}s" + if minutes < 60: + return f"{int(minutes)}m" + if minutes < 1440: + hours = minutes / 60.0 + return f"{hours:.1f}h" + days = minutes / 1440.0 + return f"{days:.1f}d" + + # -------------------------- + # Stats and utilities + # -------------------------- + + async def get_total_records(self) -> int: + """Get total number of records across all tables""" + if not self.pool: + return 0 + async with self.pool.acquire() as conn: + result = await conn.fetchrow( + """ + SELECT + (SELECT COUNT(*) FROM crypto_ticks) + + (SELECT COUNT(*) FROM crypto_ohlcv) + + (SELECT COUNT(*) FROM technical_indicators) as total; + """ + ) + return int(result['total']) if result else 0 + + async def get_last_update_time(self) -> str: + """Get the last update time across all tables""" + if not self.pool: + return "Never" + async with self.pool.acquire() as conn: + result = await conn.fetchrow( + """ + SELECT MAX(last_update) as last_update FROM ( + SELECT MAX(time) as last_update FROM crypto_ticks + UNION ALL + SELECT MAX(time) as last_update FROM crypto_ohlcv + UNION ALL + SELECT MAX(time) as last_update FROM technical_indicators + ) sub; + """ + ) + if result and result['last_update']: + return result['last_update'].isoformat() + return "Never" + + async def get_symbol_statistics(self, symbol: str) -> Dict[str, Any]: + """Get statistics for a specific symbol""" + if not self.pool: + return {} + symbol_u = _safe_upper(symbol) + async with self.pool.acquire() as conn: + stats = await conn.fetchrow( + """ + SELECT + (SELECT COUNT(*) FROM crypto_ticks WHERE symbol = $1) as tick_count, + (SELECT COUNT(*) FROM crypto_ohlcv WHERE symbol = $1) as ohlcv_count, + (SELECT COUNT(*) FROM technical_indicators WHERE symbol = $1) as indicator_count, + (SELECT MIN(time) FROM crypto_ohlcv WHERE symbol = $1) as first_record, + (SELECT MAX(time) FROM crypto_ohlcv WHERE symbol = $1) as last_record; + """, + symbol_u + ) + return dict(stats) if stats else {} + + async def get_all_symbols_summary(self) -> List[Dict[str, Any]]: + """Get summary statistics for all symbols""" + if not self.pool: + return [] + async with self.pool.acquire() as conn: + rows = await conn.fetch( + """ + SELECT + symbol, + COUNT(DISTINCT interval) as intervals_count, + MIN(time) as first_record, + MAX(time) as last_record, + COUNT(*) as total_candles + FROM crypto_ohlcv + GROUP BY symbol + ORDER BY symbol; + """ + ) + return [dict(row) for row in rows] + + async def get_all_gaps_summary(self) -> List[Dict[str, Any]]: + """Get gap summary for all symbols and intervals""" + symbols = await self.get_available_symbols() + intervals = ['1m', '5m', '15m', '1h', '4h', '1d'] + summary: List[Dict[str, Any]] = [] + for symbol in symbols: + for interval in intervals: + try: + gap_info = await self.detect_gaps(symbol, interval) + coverage = gap_info['coverage'] + if coverage.get('total_records', 0) > 0: + summary.append({ + 'symbol': symbol, + 'interval': interval, + 'total_gaps': len(gap_info['gaps']), + 'fillable_gaps': len([g for g in gap_info['gaps'] if g['missing_candles'] < 1000]), + 'total_missing_candles': sum(g['missing_candles'] for g in gap_info['gaps']), + 'coverage_percent': coverage['coverage_percent'], + }) + except Exception as e: + self.logger.warning(f"Error getting gaps for {symbol} {interval}: {e}") + return summary + + + async def get_current_price_and_trends_with_volume(self, symbol: str) -> Optional[Dict[str, Any]]: + """ + Get current price, percentage changes, and robust 15m volume anomaly status. + Baseline blends last 4h, 24h, and 7d per-minute stats (excluding the most recent 15m). + """ + if not self.pool: + return None + symbol_u = _safe_upper(symbol) + now = _utc_now() + time_15m_ago = now - timedelta(minutes=15) + time_1h_ago = now - timedelta(hours=1) + time_1d_ago = now - timedelta(days=1) + time_1w_ago = now - timedelta(weeks=1) + + baseline_end = time_15m_ago + start_4h = baseline_end - timedelta(hours=4) + start_24h = baseline_end - timedelta(hours=24) + start_7d = baseline_end - timedelta(days=7) + + async with self.pool.acquire() as conn: + try: + current = await conn.fetchrow( + """ + SELECT close_price, volume, time + FROM crypto_ohlcv + WHERE symbol = $1 AND interval = '1m' + ORDER BY time DESC + LIMIT 1; + """, + symbol_u + ) + if not current: + return None + current_price = float(current['close_price']) + current_time = current['time'] + + # Price anchors + price_15m = await conn.fetchrow( + """ + SELECT close_price FROM crypto_ohlcv + WHERE symbol = $1 AND interval = '1m' AND time <= $2 + ORDER BY time DESC LIMIT 1; + """, + symbol_u, time_15m_ago + ) + price_1h = await conn.fetchrow( + """ + SELECT close_price FROM crypto_ohlcv + WHERE symbol = $1 AND interval = '1h' AND time <= $2 + ORDER BY time DESC LIMIT 1; + """, + symbol_u, time_1h_ago + ) + price_1d = await conn.fetchrow( + """ + SELECT close_price FROM crypto_ohlcv + WHERE symbol = $1 AND interval = '1h' AND time <= $2 + ORDER BY time DESC LIMIT 1; + """, + symbol_u, time_1d_ago + ) + price_1w = await conn.fetchrow( + """ + SELECT close_price FROM crypto_ohlcv + WHERE symbol = $1 AND interval = '1d' AND time <= $2 + ORDER BY time DESC LIMIT 1; + """, + symbol_u, time_1w_ago + ) + + # Baselines and last 15m in one pass + stats = await conn.fetchrow( + """ + SELECT + COALESCE(SUM(volume) FILTER (WHERE time > $2 AND time <= $3), 0) AS vol_15m, + AVG(volume) FILTER (WHERE time > $4 AND time <= $2) AS avg_4h, + STDDEV_SAMP(volume) FILTER (WHERE time > $4 AND time <= $2) AS std_4h, + COUNT(*) FILTER (WHERE time > $4 AND time <= $2) AS n_4h, + + AVG(volume) FILTER (WHERE time > $5 AND time <= $2) AS avg_24h, + STDDEV_SAMP(volume) FILTER (WHERE time > $5 AND time <= $2) AS std_24h, + COUNT(*) FILTER (WHERE time > $5 AND time <= $2) AS n_24h, + + AVG(volume) FILTER (WHERE time > $6 AND time <= $2) AS avg_7d, + STDDEV_SAMP(volume) FILTER (WHERE time > $6 AND time <= $2) AS std_7d, + COUNT(*) FILTER (WHERE time > $6 AND time <= $2) AS n_7d + FROM crypto_ohlcv + WHERE symbol = $1 AND interval = '1m'; + """, + symbol_u, time_15m_ago, current_time, start_4h, start_24h, start_7d + ) + + last_15m_volume = float(stats['vol_15m']) if stats and stats['vol_15m'] is not None else 0.0 + + def _to_float(x): + return float(x) if x is not None else None + + avg4 = _to_float(stats['avg_4h']); std4 = _to_float(stats['std_4h']); n4 = int(stats['n_4h']) if stats and stats['n_4h'] is not None else 0 + avg24 = _to_float(stats['avg_24h']); std24 = _to_float(stats['std_24h']); n24 = int(stats['n_24h']) if stats and stats['n_24h'] is not None else 0 + avg7 = _to_float(stats['avg_7d']); std7 = _to_float(stats['std_7d']); n7 = int(stats['n_7d']) if stats and stats['n_7d'] is not None else 0 + + import math + def scale_to_15m(avg_per_min, std_per_min): + mean15 = (avg_per_min or 0.0) * 15.0 + std15 = (std_per_min or 0.0) * math.sqrt(15.0) + return mean15, std15 + + mean15_4, std15_4 = scale_to_15m(avg4, std4) + mean15_24, std15_24 = scale_to_15m(avg24, std24) + mean15_7, std15_7 = scale_to_15m(avg7, std7) + + # Weights gated by data availability + w4 = 0.6 if n4 >= 60 else 0.0 # at least 1 hour of 1m bars + w24 = 0.3 if n24 >= 240 else 0.0 # at least 4 hours of 1m bars + w7 = 0.1 if n7 >= 2000 else 0.0 # ~>1.4 days of 1m bars + total_w = w4 + w24 + w7 + + if total_w == 0.0: + candidates = [] + if n4 > 0: candidates.append((mean15_4, std15_4, 1.0)) + if n24 > 0: candidates.append((mean15_24, std15_24, 1.0)) + if n7 > 0: candidates.append((mean15_7, std15_7, 1.0)) + total_w = sum(w for _, _, w in candidates) or 1.0 + blended_mean15 = sum(m * w for m, _, w in candidates) / total_w + blended_var15 = sum(((s or 0.0) ** 2) * w for _, s, w in candidates) / total_w + else: + blended_mean15 = (mean15_4 * w4 + mean15_24 * w24 + mean15_7 * w7) / total_w + blended_var15 = (((std15_4 or 0.0) ** 2) * w4 + + ((std15_24 or 0.0) ** 2) * w24 + + ((std15_7 or 0.0) ** 2) * w7) / total_w + + blended_std15 = math.sqrt(blended_var15) if blended_var15 > 0 else 0.0 + + deviation_pct = 0.0 + zscore = 0.0 + if blended_mean15 > 0: + deviation_pct = ((last_15m_volume - blended_mean15) / blended_mean15) * 100.0 + if blended_std15 > 0: + zscore = (last_15m_volume - blended_mean15) / blended_std15 + + volume_status = "Average" + if blended_mean15 <= 0 and last_15m_volume > 0: + volume_status = "Unusually High" + else: + if zscore >= 2.5 or deviation_pct >= 50.0: + volume_status = "Unusually High" + elif zscore >= 1.5 or deviation_pct >= 20.0: + volume_status = "High" + elif zscore <= -2.5 or deviation_pct <= -50.0: + volume_status = "Unusually Low" + elif zscore <= -1.5 or deviation_pct <= -20.0: + volume_status = "Low" + + current_volume = float(current['volume']) + + def calc_change(old): + if old and float(old) > 0: + return round(((current_price - float(old)) / float(old)) * 100.0, 2) + return 0.0 + + return { + 'symbol': symbol_u, + 'current_price': current_price, + 'current_volume': current_volume, + 'volume_status': volume_status, + 'last_update': current_time.isoformat(), + 'trends': { + '15m': calc_change(price_15m['close_price'] if price_15m else None), + '1h': calc_change(price_1h['close_price'] if price_1h else None), + '1d': calc_change(price_1d['close_price'] if price_1d else None), + '1w': calc_change(price_1w['close_price'] if price_1w else None), + }, + 'volume_context': { + 'last_15m_volume': round(last_15m_volume, 8), + 'expected_15m_volume': round(blended_mean15, 8), + 'zscore': round(zscore, 3), + 'deviation_pct': round(deviation_pct, 2), + 'baselines': {'n_4h': n4, 'n_24h': n24, 'n_7d': n7} + } + } + except Exception as e: + self.logger.error(f"Error getting price trends with volume for {symbol_u}: {e}", exc_info=True) + return None + + async def get_gap_fill_status(self, symbol: str, interval: str) -> Dict[str, Any]: + """Get gap fill status for a symbol and interval""" + gaps = await self.detect_gaps(symbol, interval) + return {'symbol': _safe_upper(symbol), 'interval': interval, 'gaps': gaps['gaps'], 'coverage': gaps['coverage']} + + async def fill_genuine_gaps_with_averages(self, symbol: str, interval: str, + max_consecutive: int = 5, lookback: int = 10) -> int: + """Fill genuine empty gaps with intelligent averaging""" + self.logger.info(f"Filling genuine gaps for {symbol} {interval}") + gaps_info = await self.detect_gaps(symbol, interval) + gaps = gaps_info.get("gaps", []) + if not gaps: + return 0 + + filled_count = 0 + for gap in gaps: + gap_start = _ensure_dt_aware_utc(datetime.fromisoformat(gap['gap_start'])) + gap_end = _ensure_dt_aware_utc(datetime.fromisoformat(gap['gap_end'])) + missing_candles = gap['missing_candles'] + if missing_candles > max_consecutive: + continue + try: + filled = await self._fill_gap_with_averaging(symbol, interval, gap_start, gap_end, missing_candles) + filled_count += filled + except Exception as e: + self.logger.error(f"Error filling gap with averaging: {e}") + continue + return filled_count + + async def cleanup_old_data(self, retention_days: int = 365): + """Clean up old data based on retention policy""" + if not self.pool: + return + cutoff_date = _utc_now() - timedelta(days=retention_days) + tick_cutoff = _utc_now() - timedelta(days=min(retention_days, 30)) + async with self.pool.acquire() as conn: + await conn.execute("DELETE FROM crypto_ticks WHERE time < $1;", tick_cutoff) + await conn.execute("DELETE FROM crypto_ohlcv WHERE time < $1;", cutoff_date) + await conn.execute("DELETE FROM technical_indicators WHERE time < $1;", cutoff_date) + self.logger.info(f"Cleaned up data older than {retention_days} days") + + async def vacuum_analyze(self): + """Perform database maintenance""" + if not self.pool: + return + async with self.pool.acquire() as conn: + tables = ['crypto_ticks', 'crypto_ohlcv', 'technical_indicators'] + for t in tables: + try: + await conn.execute(f"VACUUM ANALYZE {t};") + except Exception as e: + self.logger.warning(f"VACUUM ANALYZE warning on {t}: {e}") + self.logger.info("Database vacuum and analyze completed") + + @asynccontextmanager + async def transaction(self): + """Context manager for database transactions""" + if not self.pool: + raise RuntimeError("Pool not initialized") + async with self.pool.acquire() as conn: + async with conn.transaction(): + yield conn + + async def close(self): + """Close database connection pool""" + if self.pool: + await self.pool.close() + self.logger.info("Database connection pool closed") + + +# Utility function to create the database if it does not exist +async def create_database_if_not_exists(): + """Create database if it doesn't exist (connect via postgres db)""" + conn = await asyncpg.connect( + host=os.getenv('DB_HOST', 'localhost'), + port=int(os.getenv('DB_PORT', 5432)), + database='postgres', + user=os.getenv('DB_USER', 'postgres'), + password=os.getenv('DB_PASSWORD', 'password'), + ) + db_name = os.getenv('DB_NAME', 'crypto_trading') + try: + exists = await conn.fetchval("SELECT 1 FROM pg_database WHERE datname = $1", db_name) + if not exists: + await conn.execute(f'CREATE DATABASE "{db_name}";') + print(f"Database '{db_name}' created successfully") + else: + print(f"Database '{db_name}' already exists") + except Exception as e: + print(f"Error creating database: {e}") + finally: + await conn.close() + + +if __name__ == "__main__": + async def test_db(): + await create_database_if_not_exists() + db = DatabaseManager() + await db.initialize() + stats = await db.get_total_records() + print(f"Total records: {stats}") + await db.close() + asyncio.run(test_db()) diff --git a/main.py b/main.py new file mode 100644 index 0000000..4b939e2 --- /dev/null +++ b/main.py @@ -0,0 +1,1036 @@ +#!/usr/bin/env python3 +""" +main.py - Complete Binance Trading Data Collection System + +Main application entry point with async data collection, websocket handling, and task management +""" + +import asyncio +import logging +import signal +import sys +import json +import subprocess +import os +from datetime import datetime, timedelta, timezone +from typing import Dict, List, Optional, Any +from contextlib import asynccontextmanager + +import websockets +import aiohttp +from binance.client import Client +from binance.exceptions import BinanceAPIException +import pandas as pd +import pandas_ta as ta +from dotenv import load_dotenv + +# Import our modules +from db import DatabaseManager +from utils import ( + load_config, setup_logging, parse_kline_data, parse_trade_data, + calculate_technical_indicators, validate_symbol, format_timestamp +) + +# Load environment variables +load_dotenv('variables.env') + +# Global variables +db_manager: Optional[DatabaseManager] = None +config: Dict[str, Any] = {} +running_tasks: Dict[str, asyncio.Task] = {} +websocket_connections: Dict[str, Any] = {} +ui_process: Optional[subprocess.Popen] = None + + +class BinanceDataCollector: + """Main data collection orchestrator for Binance trading data""" + + def __init__(self): + self.client: Optional[Client] = None + self.logger = logging.getLogger(__name__) + self.is_collecting = False + self.websocket_collection_running = False + self.download_progress: Dict[str, Any] = {} + + max_downloads = int(os.getenv('MAX_CONCURRENT_DOWNLOADS', '3')) + max_gap_fills = int(os.getenv('MAX_CONCURRENT_GAP_FILLS', '2')) + self._download_semaphore = asyncio.Semaphore(max_downloads) + self._gap_fill_semaphore = asyncio.Semaphore(max_gap_fills) + self.logger.info(f"Initialized with max {max_downloads} concurrent downloads, {max_gap_fills} gap fills") + + async def initialize(self): + """Initialize the data collector""" + global db_manager, config + + # Setup logging + setup_logging() + self.logger.info("Initializing Binance Data Collector") + + # Load configuration + config = load_config() + self.logger.info(f"Loaded configuration for {len(config['trading_pairs'])} trading pairs") + + # Initialize database + db_manager = DatabaseManager() + await db_manager.initialize() + self.logger.info("Database initialized successfully") + + # Initialize Binance client (no API key needed for market data) + api_key = os.getenv('BINANCE_API_KEY') + secret_key = os.getenv('BINANCE_SECRET_KEY') + if api_key and secret_key: + self.client = Client(api_key, secret_key) + self.logger.info("Binance client initialized with API credentials") + else: + self.client = Client() + self.logger.info("Binance client initialized without API credentials (public data only)") + + async def bulk_download_historical_data( + self, symbol: str, start_date: datetime, end_date: Optional[datetime] = None, + intervals: Optional[List[str]] = None + ): + """ + Bulk download historical OHLCV data from Binance with intelligent gap detection. + Only downloads data that doesn't already exist in the database. + """ + async with self._download_semaphore: + if end_date is None: + end_date = datetime.now(timezone.utc) + + # Ensure timezone awareness + if start_date.tzinfo is None: + start_date = start_date.replace(tzinfo=timezone.utc) + if end_date.tzinfo is None: + end_date = end_date.replace(tzinfo=timezone.utc) + + self.logger.info(f"Starting intelligent bulk download for {symbol} from {start_date} to {end_date}") + + # Get intervals + if intervals is None: + intervals = config.get("collection", {}).get("candle_intervals", ["1m", "5m", "15m", "1h", "4h", "1d"]) + + # Initialize progress tracking + self.download_progress[symbol] = { + "status": "running", + "intervals": {}, + "start_time": datetime.now(timezone.utc).isoformat() + } + for interval in intervals: + self.download_progress[symbol]["intervals"][interval] = { + "status": "pending", + "records": 0 + } + + try: + for interval in intervals: + self.logger.info(f"Processing {interval} data for {symbol}") + self.download_progress[symbol]["intervals"][interval]["status"] = "checking" + + # Intelligent download - only missing data + records_count = await self._collect_historical_klines( + symbol, interval, start_date, end_date + ) + + if records_count > 0: + self.download_progress[symbol]["intervals"][interval]["status"] = "calculating_indicators" + self.download_progress[symbol]["intervals"][interval]["records"] = records_count + + # Calculate indicators for new data + await self._calculate_and_store_indicators(symbol, interval) + self.download_progress[symbol]["intervals"][interval]["status"] = "completed" + self.logger.info(f"Completed {interval} data for {symbol} - {records_count} new records") + else: + self.download_progress[symbol]["intervals"][interval]["status"] = "skipped_complete" + self.logger.info(f"Skipped {interval} for {symbol} - data already complete") + + self.download_progress[symbol]["status"] = "completed" + self.download_progress[symbol]["end_time"] = datetime.now(timezone.utc).isoformat() + except Exception as e: + self.logger.error(f"Error in bulk download for {symbol}: {e}") + self.download_progress[symbol]["status"] = "error" + self.download_progress[symbol]["error"] = str(e) + raise + + async def _collect_historical_klines( + self, symbol: str, interval: str, start_date: datetime, end_date: datetime + ) -> int: + """ + Intelligently collect historical kline data, only downloading missing ranges. + Returns: + Number of NEW records collected (not including already existing data) + """ + global db_manager + + # Check if data already exists for this range + coverage_check = await db_manager.check_data_exists_for_range( + symbol, interval, start_date, end_date + ) + self.logger.info( + f"Data coverage for {symbol} {interval}: " + f"{coverage_check['coverage_percent']:.2f}% " + f"({coverage_check['count']}/{coverage_check['expected_count']} records)" + ) + + # If coverage is complete, skip download + if coverage_check['is_complete']: + self.logger.info( + f"Skipping {symbol} {interval} - data already complete " + f"({coverage_check['coverage_percent']:.2f}% coverage)" + ) + return 0 + + # Get missing time ranges that need to be downloaded + missing_ranges = await db_manager.get_missing_time_ranges( + symbol, interval, start_date, end_date + ) + if not missing_ranges: + self.logger.info(f"No missing data ranges for {symbol} {interval}") + return 0 + + self.logger.info( + f"Found {len(missing_ranges)} missing time range(s) for {symbol} {interval}" + ) + + # Download each missing range + total_new_records = 0 + for idx, time_range in enumerate(missing_ranges, 1): + range_start = time_range['start'] + range_end = time_range['end'] + self.logger.info( + f"Downloading range {idx}/{len(missing_ranges)}: " + f"{range_start} to {range_end} for {symbol} {interval}" + ) + + records_in_range = await self._download_time_range( + symbol, interval, range_start, range_end + ) + total_new_records += records_in_range + + self.logger.info( + f"Downloaded {records_in_range} records for range {idx}/{len(missing_ranges)}" + ) + + return total_new_records + + def _calculate_chunk_end(self, start: datetime, interval: str, chunk_size: int) -> datetime: + """Calculate the end time for a data chunk based on interval""" + if interval.endswith('m'): + minutes = int(interval[:-1]) + return start + timedelta(minutes=minutes * chunk_size) + elif interval.endswith('h'): + hours = int(interval[:-1]) + return start + timedelta(hours=hours * chunk_size) + elif interval.endswith('d'): + days = int(interval[:-1]) + return start + timedelta(days=days * chunk_size) + elif interval.endswith('w'): + weeks = int(interval[:-1]) + return start + timedelta(weeks=weeks * chunk_size) + else: + # Default to minutes + return start + timedelta(minutes=chunk_size) + + @staticmethod + def _rest_kline_to_ws_event(symbol: str, interval: str, kline_row: List[Any]) -> Dict[str, Any]: + """ + Convert REST get_historical_klines row (list) to a WebSocket-style kline event + that parse_kline_data expects. + """ + # Per Binance REST klines: index meanings + # 0 open time(ms),1 open,2 high,3 low,4 close,5 volume, + # 6 close time(ms),7 quote asset volume,8 number of trades, + # 9 taker buy base asset volume,10 taker buy quote asset volume,11 ignore + return { + "e": "kline", + "E": int(kline_row[6]), # event time (approx close time) + "s": symbol.upper(), + "k": { + "t": int(kline_row[0]), + "T": int(kline_row[6]), + "s": symbol.upper(), + "i": interval, + "f": None, # first trade id (unknown from REST row) + "L": None, # last trade id (unknown) + "o": str(kline_row[1]), + "c": str(kline_row[4]), + "h": str(kline_row[2]), + "l": str(kline_row[3]), + "v": str(kline_row[5]), + "n": int(kline_row[8]), + "x": True, # REST klines are for closed candles + "q": str(kline_row[7]), + "V": None, # taker buy base asset volume (optional) + "Q": None, # taker buy quote asset volume (optional) + "B": None # ignore + } + } + + async def _download_time_range( + self, symbol: str, interval: str, start_date: datetime, end_date: datetime + ) -> int: + """ + Download data for a specific time range (internal method). + This is the actual download logic extracted from the original collect_historical_klines. + Returns: + Number of records downloaded and inserted + """ + global config, db_manager + + chunk_size = config.get("collection", {}).get("bulk_chunk_size", 1000) + max_retries = config.get("collection", {}).get("max_retries", 3) + retry_delay = config.get("collection", {}).get("retry_delay", 1) + + # Normalize time inputs that might be naive time objects + from datetime import time as dt_time + if isinstance(start_date, dt_time): + # Use today's date in UTC for safety if only a time is provided + start_date = datetime.combine(datetime.now(timezone.utc).date(), start_date) + if isinstance(end_date, dt_time): + # Use the same date as start_date if possible for consistency + base_date = start_date.date() if isinstance(start_date, datetime) else datetime.now(timezone.utc).date() + end_date = datetime.combine(base_date, end_date) + + if start_date.tzinfo is None: + start_date = start_date.replace(tzinfo=timezone.utc) + if end_date.tzinfo is None: + end_date = end_date.replace(tzinfo=timezone.utc) + + # Convert to naive UTC for Binance API + current_start = start_date.replace(tzinfo=None) + end = end_date.replace(tzinfo=None) + + total_records = 0 + retry_count = 0 + + while current_start < end: + try: + # Calculate chunk end time based on interval + chunk_end = self._calculate_chunk_end(current_start, interval, chunk_size) + chunk_end = min(chunk_end, end) + + # Get klines from Binance with retry logic + klines: Optional[List[List[Any]]] = None + for attempt in range(max_retries): + try: + klines = self.client.get_historical_klines( + symbol=symbol, + interval=interval, + start_str=int(current_start.timestamp() * 1000), + end_str=int(chunk_end.timestamp() * 1000), + limit=chunk_size + ) + break + except BinanceAPIException as e: + if e.code == -1003: # Rate limit + wait_time = retry_delay * (2 ** attempt) + self.logger.warning(f"Rate limit hit, waiting {wait_time}s before retry") + await asyncio.sleep(wait_time) + else: + raise + except Exception as e: + if attempt == max_retries - 1: + raise + self.logger.warning(f"Attempt {attempt + 1} failed: {e}") + await asyncio.sleep(retry_delay) + + if not klines or len(klines) == 0: + self.logger.info(f"No more data available for {symbol} {interval}") + break + + # Parse and store klines + ohlcv_data: List[Dict[str, Any]] = [] + for kline in klines: + try: + # Normalize to WebSocket-style event expected by parse_kline_data + ws_event = self._rest_kline_to_ws_event(symbol, interval, kline) + parsed_data = parse_kline_data(ws_event) + ohlcv_data.append(parsed_data) + except Exception as e: + # Keep original message to aid debugging if structure differs + self.logger.error(f"Error parsing kline data: {e} | raw={kline!r}") + continue + + # Batch insert to database + if ohlcv_data: + await db_manager.insert_ohlcv_batch(ohlcv_data) + total_records += len(ohlcv_data) + + # Update progress + if symbol in self.download_progress and interval in self.download_progress[symbol]["intervals"]: + self.download_progress[symbol]["intervals"][interval]["records"] = total_records + + self.logger.debug(f"Stored {len(ohlcv_data)} {interval} candles for {symbol} (total: {total_records})") + + # Update current_start for next chunk + if klines: + last_close_time_ms = klines[-1][6] # Use the close time of the last kline + current_start = datetime.utcfromtimestamp((last_close_time_ms + 1) / 1000) + else: + break + + # Delay to respect rate limits + await asyncio.sleep(0.2) + retry_count = 0 # Reset retry count on success + + except BinanceAPIException as e: + retry_count += 1 + self.logger.error(f"Binance API error (attempt {retry_count}): {e}") + if retry_count >= max_retries: + self.logger.error(f"Max retries reached for {symbol} {interval}") + raise + # Exponential backoff + wait_time = retry_delay * (2 ** retry_count) + await asyncio.sleep(wait_time) + + except asyncio.CancelledError: + self.logger.info(f"Download for {symbol} {interval} cancelled") + break + + except Exception as e: + self.logger.error(f"Error collecting {interval} data for {symbol}: {e}", exc_info=True) + raise + + return total_records + + async def _calculate_and_store_indicators(self, symbol: str, interval: str): + """Calculate and store technical indicators for a symbol and interval""" + try: + # Check if indicators are enabled for this interval + indicator_config = config.get('technical_indicators', {}) + calc_intervals = indicator_config.get('calculation_intervals', ['1m', '5m', '15m', '1h', '4h', '1d']) + if interval not in calc_intervals: + self.logger.debug(f"Skipping indicators for {symbol} {interval} (not in calculation_intervals)") + return + + # Get OHLCV data from database (need enough for longest indicator period) + max_period = 200 # Maximum period for indicators like SMA-200 + ohlcv_data = await db_manager.get_ohlcv_data(symbol, interval, limit=max_period + 50) + if len(ohlcv_data) < 50: # Need minimum data for indicators + self.logger.warning(f"Not enough data for indicators: {symbol} {interval} ({len(ohlcv_data)} records)") + return + + # Convert to DataFrame + df = pd.DataFrame(ohlcv_data) + df['time'] = pd.to_datetime(df['time']) + df = df.sort_values('time') + df.set_index('time', inplace=True) + + # Rename columns for pandas_ta + df = df.rename(columns={ + 'open_price': 'open', + 'high_price': 'high', + 'low_price': 'low', + 'close_price': 'close' + }) + + # Calculate technical indicators + indicators_data = calculate_technical_indicators(df, indicator_config) + + # Store indicators in database + if indicators_data: + await db_manager.insert_indicators_batch(symbol, interval, indicators_data) + self.logger.info(f"Stored {len(indicators_data)} indicator values for {symbol} {interval}") + except asyncio.CancelledError: + self.logger.info(f"Indicator calculation cancelled for {symbol} {interval}") + except Exception as e: + self.logger.error(f"Error calculating indicators for {symbol} {interval}: {e}", exc_info=True) + + async def auto_fill_gaps( + self, + symbol: str, + intervals: Optional[List[str]] = None, + fill_genuine_gaps: bool = True + ) -> Dict[str, Any]: + """ + Automatically fill gaps for a symbol + Args: + symbol: Trading pair symbol + intervals: List of intervals to fill (default: from config) + fill_genuine_gaps: Whether to fill genuine empty gaps with averages + Returns: + Dictionary with fill results + """ + # Acquire semaphore to limit concurrent gap fills + async with self._gap_fill_semaphore: + global config, db_manager + + if intervals is None: + intervals = config.get('gap_filling', {}).get('intervals_to_monitor', ['1m', '5m', '15m', '1h', '4h', '1d']) + + self.logger.info(f"Starting auto gap fill for {symbol} on intervals: {intervals}") + results: Dict[str, Any] = { + 'symbol': symbol, + 'intervals': {}, + 'total_gaps_filled': 0, + 'total_genuine_filled': 0 + } + + try: + # Get record_from_date for this symbol + pair_config = next((p for p in config['trading_pairs'] if p['symbol'] == symbol), None) + if not pair_config: + self.logger.warning(f"Symbol {symbol} not found in config") + return results + + record_from_date = pair_config.get('record_from_date') + if not record_from_date: + record_from_date = config.get('collection', {}).get('default_record_from_date', '2020-01-01T00:00:00Z') + _ = datetime.fromisoformat(record_from_date.replace('Z', '+00:00')) # kept for future use + + gap_config = config.get('gap_filling', {}) + max_gap_size = gap_config.get('max_gap_size_candles', 1000) + + for interval in intervals: + self.logger.info(f"Checking gaps for {symbol} {interval}") + # Detect gaps + gaps_info = await db_manager.detect_gaps(symbol, interval) + gaps = gaps_info.get('gaps', []) + + interval_result = { + 'gaps_found': len(gaps), + 'gaps_filled': 0, + 'genuine_filled': 0, + 'errors': [] + } + + # Fill downloadable gaps + for gap in gaps: + missing_candles = gap['missing_candles'] + # Skip if gap is too large + if missing_candles > max_gap_size: + self.logger.info(f"Skipping large gap: {missing_candles} candles") + interval_result['errors'].append(f"Gap too large: {missing_candles} candles") + continue + + try: + # Download missing data + gap_start = datetime.fromisoformat(gap['gap_start']) + gap_end = datetime.fromisoformat(gap['gap_end']) + self.logger.info(f"Filling gap: {gap_start} to {gap_end}") + + records_count = await self._collect_historical_klines( + symbol, interval, gap_start, gap_end + ) + + if records_count > 0: + interval_result['gaps_filled'] += 1 + results['total_gaps_filled'] += 1 + self.logger.info(f"Successfully filled gap with {records_count} records") + else: + # Genuine empty gap - fill with averages if enabled + if fill_genuine_gaps: + filled = await db_manager.fill_genuine_gaps_with_averages( + symbol, interval, + gap_config.get('max_consecutive_empty_candles', 5), + gap_config.get('averaging_lookback_candles', 10) + ) + interval_result['genuine_filled'] += filled + results['total_genuine_filled'] += filled + + # Small delay between gaps + await asyncio.sleep(0.5) + + except Exception as e: + error_msg = f"Error filling gap: {str(e)}" + self.logger.error(error_msg) + interval_result['errors'].append(error_msg) + + results['intervals'][interval] = interval_result + + # Calculate and store indicators after filling gaps + if interval_result['gaps_filled'] > 0 or interval_result['genuine_filled'] > 0: + try: + await self._calculate_and_store_indicators(symbol, interval) + except Exception as e: + self.logger.error(f"Error calculating indicators: {e}", exc_info=True) + + self.logger.info(f"Auto gap fill completed for {symbol}: {results}") + return results + + except Exception as e: + self.logger.error(f"Error in auto gap fill: {e}", exc_info=True) + results['error'] = str(e) + return results + + async def start_auto_gap_fill_scheduler(self): + """Start background task for automatic gap filling""" + global config, db_manager + gap_config = config.get('gap_filling', {}) + if not gap_config.get('enable_auto_gap_filling', False): + self.logger.info("Auto gap filling is disabled") + return + + schedule_hours = gap_config.get('auto_fill_schedule_hours', 24) + self.logger.info(f"Starting auto gap fill scheduler (every {schedule_hours} hours)") + + while self.is_collecting: + try: + # Get all enabled pairs + enabled_pairs = [p for p in config['trading_pairs'] if p.get('enabled', True)] + for pair in enabled_pairs: + symbol = pair['symbol'] + self.logger.info(f"Running scheduled gap fill for {symbol}") + try: + await self.auto_fill_gaps( + symbol, + intervals=gap_config.get('intervals_to_monitor'), + fill_genuine_gaps=gap_config.get('enable_intelligent_averaging', True) + ) + except Exception as e: + self.logger.error(f"Error in scheduled gap fill for {symbol}: {e}") + + # Wait for next scheduled run + self.logger.info(f"Next auto gap fill in {schedule_hours} hours") + await asyncio.sleep(schedule_hours * 3600) + + except asyncio.CancelledError: + self.logger.info("Auto gap fill scheduler cancelled") + break + except Exception as e: + self.logger.error(f"Error in auto gap fill scheduler: {e}", exc_info=True) + await asyncio.sleep(3600) # Wait 1 hour on error + + async def start_continuous_collection(self): + """Start continuous data collection via WebSocket""" + if self.websocket_collection_running: + self.logger.warning("WebSocket collection already running") + return + + self.logger.info("Starting continuous WebSocket data collection") + self.websocket_collection_running = True + self.is_collecting = True + + # Create WebSocket tasks for each enabled trading pair + enabled_pairs = [p for p in config['trading_pairs'] if p.get('enabled', True)] + if not enabled_pairs: + self.logger.warning("No enabled trading pairs found") + return + + for pair_config in enabled_pairs: + symbol = pair_config['symbol'].lower() + + # Start kline streams for configured intervals + for interval in config['collection']['candle_intervals']: + task_name = f"kline_{symbol}_{interval}" + task = asyncio.create_task( + self._websocket_kline_stream(symbol, interval), + name=task_name + ) + running_tasks[task_name] = task + + # Start trade stream for tick data + task_name = f"trade_{symbol}" + task = asyncio.create_task( + self._websocket_trade_stream(symbol), + name=task_name + ) + running_tasks[task_name] = task + + # Start auto gap fill scheduler + task_name = "auto_gap_fill_scheduler" + task = asyncio.create_task( + self.start_auto_gap_fill_scheduler(), + name=task_name + ) + running_tasks[task_name] = task + + self.logger.info(f"Started {len(running_tasks)} tasks including gap fill scheduler") + + async def _websocket_kline_stream(self, symbol: str, interval: str): + """WebSocket stream for kline/candlestick data""" + stream_name = f"{symbol}@kline_{interval}" + uri = f"wss://stream.binance.com:9443/ws/{stream_name}" + reconnect_delay = config.get('collection', {}).get('websocket_reconnect_delay', 5) + ping_interval = int(os.getenv('WEBSOCKET_PING_INTERVAL', 20)) + ping_timeout = int(os.getenv('WEBSOCKET_PING_TIMEOUT', 60)) + + while self.websocket_collection_running: + try: + async with websockets.connect( + uri, + ping_interval=ping_interval, + ping_timeout=ping_timeout + ) as websocket: + self.logger.info(f"Connected to {stream_name}") + websocket_connections[stream_name] = websocket + + async for message in websocket: + if not self.websocket_collection_running: + break + try: + data = json.loads(message) + + # Validate event type and payload shape + if data.get('e') != 'kline' or 'k' not in data: + self.logger.debug(f"Ignored non-kline or malformed message on {stream_name}") + continue + + # Parse kline data + ohlcv_data = parse_kline_data(data) + + # Store in database + await db_manager.insert_ohlcv_single(ohlcv_data) + + # Calculate indicators if kline is closed + if data['k'].get('x'): + await self._calculate_and_store_indicators( + symbol.upper(), interval + ) + + except json.JSONDecodeError: + self.logger.error(f"Invalid JSON from {stream_name}") + except asyncio.CancelledError: + self.logger.info(f"Kline stream cancelled: {stream_name}") + break + except Exception as e: + self.logger.error(f"Error processing {stream_name} message: {e}", exc_info=True) + + except websockets.exceptions.ConnectionClosed as e: + self.logger.warning(f"WebSocket connection closed for {stream_name}: {e}") + except asyncio.CancelledError: + self.logger.info(f"Kline WebSocket cancelled for {stream_name}") + break + except Exception as e: + self.logger.error(f"WebSocket error for {stream_name}: {e}", exc_info=True) + finally: + # Clean up + if stream_name in websocket_connections: + websocket_connections.pop(stream_name, None) + + if self.websocket_collection_running: + self.logger.info(f"Reconnecting to {stream_name} in {reconnect_delay}s...") + await asyncio.sleep(reconnect_delay) + + async def _websocket_trade_stream(self, symbol: str): + """WebSocket stream for trade/tick data""" + stream_name = f"{symbol}@trade" + uri = f"wss://stream.binance.com:9443/ws/{stream_name}" + reconnect_delay = config.get('collection', {}).get('websocket_reconnect_delay', 5) + ping_interval = int(os.getenv('WEBSOCKET_PING_INTERVAL', 20)) + ping_timeout = int(os.getenv('WEBSOCKET_PING_TIMEOUT', 60)) + + while self.websocket_collection_running: + try: + async with websockets.connect( + uri, + ping_interval=ping_interval, + ping_timeout=ping_timeout + ) as websocket: + self.logger.info(f"Connected to {stream_name}") + websocket_connections[stream_name] = websocket + + tick_batch: List[Dict[str, Any]] = [] + batch_size = config.get('collection', {}).get('tick_batch_size', 100) + + async for message in websocket: + if not self.websocket_collection_running: + break + try: + data = json.loads(message) + if data.get('e') == 'trade': + # Parse trade data + tick_data = parse_trade_data(data) + tick_batch.append(tick_data) + + # Batch insert when batch is full + if len(tick_batch) >= batch_size: + await db_manager.insert_ticks_batch(tick_batch) + tick_batch = [] + + except json.JSONDecodeError: + self.logger.error(f"Invalid JSON from {stream_name}") + except asyncio.CancelledError: + self.logger.info(f"Trade stream cancelled: {stream_name}") + break + except Exception as e: + self.logger.error(f"Error processing {stream_name} message: {e}", exc_info=True) + + # Insert remaining ticks + if tick_batch: + await db_manager.insert_ticks_batch(tick_batch) + + except websockets.exceptions.ConnectionClosed as e: + self.logger.warning(f"WebSocket connection closed for {stream_name}: {e}") + except asyncio.CancelledError: + self.logger.info(f"Trade WebSocket cancelled for {stream_name}") + break + except Exception as e: + self.logger.error(f"WebSocket error for {stream_name}: {e}", exc_info=True) + finally: + # Clean up + if stream_name in websocket_connections: + websocket_connections.pop(stream_name, None) + + if self.websocket_collection_running: + self.logger.info(f"Reconnecting to {stream_name} in {reconnect_delay}s...") + await asyncio.sleep(reconnect_delay) + + async def stop_continuous_collection(self): + """Stop continuous data collection""" + if not self.websocket_collection_running: + self.logger.warning("WebSocket collection not running") + return + + self.logger.info("Stopping continuous data collection") + self.websocket_collection_running = False + self.is_collecting = False + + # Cancel all running tasks + for task_name, task in list(running_tasks.items()): + if not task.done(): + task.cancel() + try: + await task + except asyncio.CancelledError: + self.logger.info(f"Cancelled task: {task_name}") + except Exception as e: + self.logger.error(f"Error cancelling task {task_name}: {e}") + + # Close WebSocket connections + for conn_name, conn in list(websocket_connections.items()): + try: + await conn.close() + self.logger.info(f"Closed WebSocket: {conn_name}") + except Exception as e: + self.logger.error(f"Error closing WebSocket {conn_name}: {e}") + + running_tasks.clear() + websocket_connections.clear() + self.logger.info("Continuous data collection stopped") + + async def generate_candles_from_ticks( + self, + symbol: str, + interval: str, + start_time: datetime, + end_time: datetime + ): + """ + Generate OHLCV candles from tick data + Args: + symbol: Trading pair symbol + interval: Candle interval (e.g., '1m', '5m', '1h') + start_time: Start time for candle generation + end_time: End time for candle generation + """ + self.logger.info(f"Generating {interval} candles from ticks for {symbol}") + + # Get tick data from database + ticks = await db_manager.get_tick_data(symbol, start_time, end_time) + if not ticks: + self.logger.warning(f"No tick data found for {symbol}") + return + + # Convert to DataFrame + df = pd.DataFrame(ticks) + df['time'] = pd.to_datetime(df['time']) + df.set_index('time', inplace=True) + + # Resample to create OHLCV data + ohlcv = df['price'].resample(interval).agg({ + 'open': 'first', + 'high': 'max', + 'low': 'min', + 'close': 'last' + }) + volume = df['quantity'].resample(interval).sum() + trade_count = df.resample(interval).size() + + # Combine data + candles: List[Dict[str, Any]] = [] + for timestamp, row in ohlcv.iterrows(): + if pd.notna(row['open']): # Skip empty periods + candle = { + 'time': timestamp, + 'symbol': symbol, + 'exchange': 'binance', + 'interval': interval, + 'open_price': float(row['open']), + 'high_price': float(row['high']), + 'low_price': float(row['low']), + 'close_price': float(row['close']), + 'volume': float(volume.loc[timestamp]) if timestamp in volume.index else 0.0, + 'quote_volume': None, + 'trade_count': int(trade_count.loc[timestamp]) if timestamp in trade_count.index else 0 + } + candles.append(candle) + + # Store candles in database + if candles: + await db_manager.insert_ohlcv_batch(candles) + self.logger.info(f"Generated and stored {len(candles)} candles for {symbol} {interval}") + # Calculate technical indicators + await self._calculate_and_store_indicators(symbol, interval) + else: + self.logger.warning(f"No candles generated for {symbol} {interval}") + + async def get_download_progress(self, symbol: str = None) -> Dict[str, Any]: + """Get download progress for a symbol or all symbols""" + if symbol: + return self.download_progress.get(symbol, {'status': 'not_found'}) + return self.download_progress + + async def cleanup(self): + """Clean up resources""" + await self.stop_continuous_collection() + # db_manager may have a close method; guard if absent + try: + if db_manager and hasattr(db_manager, "close"): + await db_manager.close() + except Exception as e: + self.logger.warning(f"Error closing database manager: {e}") + self.logger.info("BinanceDataCollector cleanup complete") + + +def start_ui_server(): + """Start the UI server as a subprocess""" + global ui_process + logger = logging.getLogger(__name__) + + try: + # Get UI configuration from environment + host = os.getenv("WEB_HOST", "0.0.0.0") + port = os.getenv("WEB_PORT", "8000") + logger.info(f"Starting UI server on {host}:{port}") + + # Start ui.py as a subprocess + ui_process = subprocess.Popen( + [sys.executable, "ui.py"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + bufsize=1 + ) + logger.info(f"✓ UI server started with PID: {ui_process.pid}") + + # Start a thread to log UI output + import threading + + def parse_log_level(line: str) -> int: + """Parse the log level from a log line""" + # Check for standard Python logging format + if ' - ERROR - ' in line or 'ERROR:' in line: + return logging.ERROR + elif ' - WARNING - ' in line or 'WARNING:' in line or ' - WARN - ' in line: + return logging.WARNING + elif ' - DEBUG - ' in line or 'DEBUG:' in line: + return logging.DEBUG + else: + # Default to INFO for all other lines (including INFO: and standard messages) + return logging.INFO + + def log_ui_output(): + """Log UI stdout messages""" + if not ui_process.stdout: + return + for line in ui_process.stdout: + line = line.rstrip() + if line: # Only log non-empty lines + log_level = parse_log_level(line) + logger.log(log_level, f"[UI] {line}") + + def log_ui_stderr(): + """Log UI stderr messages with intelligent level detection""" + if not ui_process.stderr: + return + for line in ui_process.stderr: + line = line.rstrip() + if line: # Only log non-empty lines + log_level = parse_log_level(line) + logger.log(log_level, f"[UI] {line}") + + stdout_thread = threading.Thread(target=log_ui_output, daemon=True) + stderr_thread = threading.Thread(target=log_ui_stderr, daemon=True) + stdout_thread.start() + stderr_thread.start() + + except Exception as e: + logger.error(f"✗ Failed to start UI server: {e}") + raise + + +def stop_ui_server(): + """Stop the UI server subprocess""" + global ui_process + logger = logging.getLogger(__name__) + + if ui_process: + try: + logger.info("Stopping UI server...") + ui_process.terminate() + try: + ui_process.wait(timeout=10) + logger.info("✓ UI server stopped gracefully") + except subprocess.TimeoutExpired: + logger.warning("UI server didn't stop gracefully, forcing...") + ui_process.kill() + ui_process.wait() + logger.info("✓ UI server forcefully stopped") + except Exception as e: + logger.error(f"✗ Error stopping UI server: {e}") + finally: + ui_process = None + else: + logger.debug("UI server process not running") + + +# Global signal handlers +def signal_handler(signum, frame): + """Handle shutdown signals""" + logger = logging.getLogger(__name__) + logger.info(f"Received signal {signum}, initiating shutdown...") + + # Stop UI server + stop_ui_server() + + # Cancel all running tasks + for task in asyncio.all_tasks(): + task.cancel() + + +async def main(): + """Main application entry point""" + # Setup signal handlers + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + + collector = BinanceDataCollector() + + try: + # Initialize the collector + await collector.initialize() + + # Start UI server + start_ui_server() + + # Start continuous collection + await collector.start_continuous_collection() + + # Keep the application running + while collector.websocket_collection_running: + await asyncio.sleep(1) + + except KeyboardInterrupt: + logging.getLogger(__name__).info("Received keyboard interrupt") + except asyncio.CancelledError: + logging.getLogger(__name__).info("Application cancelled") + except Exception as e: + logging.getLogger(__name__).error(f"Application error: {e}", exc_info=True) + finally: + # Clean shutdown + logging.getLogger(__name__).info("Initiating shutdown...") + # Stop UI server first + stop_ui_server() + # Then cleanup collector + await collector.cleanup() + logging.getLogger(__name__).info("Application shutdown complete") + + +if __name__ == "__main__": + try: + asyncio.run(main()) + except KeyboardInterrupt: + print("\nShutdown requested by user") + except Exception as e: + print(f"Fatal error: {e}") + sys.exit(1) diff --git a/ui.py b/ui.py new file mode 100644 index 0000000..d13bc2a --- /dev/null +++ b/ui.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python3 +""" +ui.py - Main Application Entry Point and FastAPI App Initialization +Orchestrates all components and initializes the FastAPI application +""" + +import asyncio +import logging +from typing import Dict, Any, Optional +from pathlib import Path +from fastapi import FastAPI, WebSocket +from fastapi.middleware.cors import CORSMiddleware +import uvicorn +from dotenv import load_dotenv + +# Import application modules +from db import DatabaseManager +from utils import load_config, setup_logging +from main import BinanceDataCollector + +# Import UI modules +from ui_models import serialize_for_json +from ui_routes import APIRoutes +from ui_websocket import handle_websocket_connection, broadcast_status_updates, websocket_connections +from ui_state import state_manager, get_current_status + +# Load environment variables +load_dotenv('variables.env') + +# Setup logging +setup_logging() +logger = logging.getLogger(__name__) + +# Global application components +app = FastAPI( + title="Crypto Trading Data Collector", + version="3.1.0", + description="Real-time cryptocurrency market data collection and analysis platform" +) + +db_manager: Optional[DatabaseManager] = None +data_collector: Optional[BinanceDataCollector] = None +config: Dict[str, Any] = {} +api_routes: Optional[APIRoutes] = None + +# Add CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +@app.on_event("startup") +async def startup_event(): + """Initialize application on startup""" + global db_manager, data_collector, config, api_routes + + try: + logger.info("=" * 80) + logger.info("Starting Crypto Trading Data Collector v3.1.0") + logger.info("=" * 80) + + # Load configuration + config = load_config() + logger.info("✓ Configuration loaded successfully") + + # Initialize database + db_manager = DatabaseManager() + await db_manager.initialize() + logger.info("✓ Database initialized successfully") + + # Initialize data collector + data_collector = BinanceDataCollector() + await data_collector.initialize() + logger.info("✓ Data collector initialized successfully") + + # Initialize API routes + api_routes = APIRoutes( + app, + db_manager, + data_collector, + config, + state_manager + ) + logger.info("✓ API routes registered successfully") + + # Restore collection state if it was running before reload + if state_manager.get("is_collecting", False): + logger.info("Restoring collection state from persistent storage...") + try: + await data_collector.start_continuous_collection() + logger.info("✓ Collection state restored successfully") + except Exception as e: + logger.error(f"✗ Error restoring collection state: {e}") + state_manager.update(is_collecting=False) + + # Start WebSocket broadcaster + async def status_getter(): + return await get_current_status(db_manager, data_collector, config) + + asyncio.create_task(broadcast_status_updates(status_getter)) + logger.info("✓ WebSocket broadcaster started") + + logger.info("=" * 80) + logger.info("FastAPI application startup complete - Ready to serve requests") + logger.info("=" * 80) + + except Exception as e: + logger.error("=" * 80) + logger.error(f"FATAL ERROR during startup: {e}", exc_info=True) + logger.error("=" * 80) + raise + + +@app.on_event("shutdown") +async def shutdown_event(): + """Clean shutdown""" + global db_manager, data_collector + + try: + logger.info("=" * 80) + logger.info("Shutting down Crypto Trading Data Collector") + logger.info("=" * 80) + + # Save current state before shutdown + if data_collector: + state_manager.update( + is_collecting=data_collector.is_collecting if hasattr(data_collector, 'is_collecting') else False, + websocket_collection_running=data_collector.websocket_collection_running if hasattr(data_collector, 'websocket_collection_running') else False + ) + logger.info("✓ State saved") + + # Close database connections + if db_manager: + try: + await db_manager.close() + logger.info("✓ Database connections closed") + except Exception as e: + logger.error(f"✗ Error closing database: {e}") + + logger.info("=" * 80) + logger.info("Shutdown complete") + logger.info("=" * 80) + + except Exception as e: + logger.error(f"Error during shutdown: {e}", exc_info=True) + + +@app.websocket("/ws") +async def websocket_endpoint(websocket: WebSocket): + """WebSocket endpoint for real-time updates""" + await handle_websocket_connection(websocket) + + +def main(): + """Main entry point for running the application""" + import os + + # Get configuration from environment or use defaults + host = os.getenv("WEB_HOST", "0.0.0.0") + port = int(os.getenv("WEB_PORT", "8000")) + reload = os.getenv("WEB_RELOAD", "False").lower() == "true" + + logger.info(f"Starting server on {host}:{port} (reload={reload})") + + uvicorn.run( + "ui:app", + host=host, + port=port, + reload=reload, + log_level="info" + ) + + +if __name__ == "__main__": + main() diff --git a/ui_models.py b/ui_models.py new file mode 100644 index 0000000..d1fd6f8 --- /dev/null +++ b/ui_models.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 + +""" +ui_models.py - Pydantic Models and Data Structures + +Defines all API request/response models and data validation schemas +""" + +from datetime import datetime +from decimal import Decimal +from typing import Dict, List, Optional, Any, Union +from pydantic import BaseModel, Field, validator + + +# Pydantic models for API requests/responses + +class TradingPairConfig(BaseModel): + """Configuration for a trading pair""" + symbol: str + enabled: bool + priority: int = 1 + record_from_date: Optional[str] = None + + +class TradingPairAdd(BaseModel): + """Request to add a new trading pair""" + symbol: str + priority: int = 1 + record_from_date: Optional[str] = None + + +class BulkDownloadRequest(BaseModel): + """Request for bulk historical data download""" + symbols: List[str] # Changed from 'symbol' to 'symbols' to support multiple + start_date: str + end_date: Optional[str] = None + intervals: Optional[List[str]] = None + + +class GapFillRequest(BaseModel): + """Request to fill data gaps""" + symbol: str + interval: str + gap_start: str + gap_end: str + + +class AutoGapFillRequest(BaseModel): + """Request to automatically fill gaps for a symbol""" + symbol: str + intervals: Optional[List[str]] = None + fill_genuine_gaps: bool = True + + +class GapDetectionRequest(BaseModel): + """Request to detect gaps""" + symbol: Optional[str] = None + interval: Optional[str] = None + + +class TechnicalIndicatorsConfig(BaseModel): + """Complete technical indicators configuration (matching config.conf structure)""" + enabled: Optional[List[str]] = None + periods: Optional[Dict[str, Any]] = None + calculation_intervals: Optional[List[str]] = None + + class Config: + extra = "allow" # Allow additional fields + + +class ConfigUpdate(BaseModel): + """Update application configuration - accepts partial updates""" + trading_pairs: Optional[List[TradingPairConfig]] = None + technical_indicators: Optional[Dict[str, Any]] = None + collection: Optional[Dict[str, Any]] = None + gap_filling: Optional[Dict[str, Any]] = None + database: Optional[Dict[str, Any]] = None + ui: Optional[Dict[str, Any]] = None + monitoring: Optional[Dict[str, Any]] = None + alerts: Optional[Dict[str, Any]] = None + data_quality: Optional[Dict[str, Any]] = None + features: Optional[Dict[str, Any]] = None + system: Optional[Dict[str, Any]] = None + + class Config: + extra = "allow" # Allow additional config sections + + +class EnvVarUpdate(BaseModel): + """Update environment variable""" + key: str + value: str + + +class ChartDataRequest(BaseModel): + """Request chart data for visualization""" + symbol: str + interval: str = "1h" + limit: int = 500 + + +# Utility functions for JSON serialization + +def serialize_for_json(obj: Any) -> Any: + """Recursively serialize datetime and Decimal objects in nested structures""" + if isinstance(obj, datetime): + return obj.isoformat() + elif isinstance(obj, Decimal): + return float(obj) + elif isinstance(obj, dict): + return {k: serialize_for_json(v) for k, v in obj.items()} + elif isinstance(obj, list): + return [serialize_for_json(item) for item in obj] + return obj diff --git a/ui_routes.py b/ui_routes.py new file mode 100644 index 0000000..cb56435 --- /dev/null +++ b/ui_routes.py @@ -0,0 +1,721 @@ +#!/usr/bin/env python3 + +""" +ui_routes.py - API Endpoints and Route Handlers + +Defines all FastAPI routes and business logic for API endpoints +""" + +import asyncio +import logging +from datetime import datetime, timedelta, timezone +from typing import Dict, Any, Optional, List + +from fastapi import HTTPException, Request +from fastapi.responses import HTMLResponse, JSONResponse + +from dotenv import set_key, dotenv_values + +# UI and models +from ui_models import ( + TradingPairConfig, + TradingPairAdd, + BulkDownloadRequest, + GapFillRequest, + ConfigUpdate, + EnvVarUpdate, + ChartDataRequest, + AutoGapFillRequest, + GapDetectionRequest, + serialize_for_json, +) + +from ui_template_dashboard import get_dashboard_html +from ui_template_config import get_config_html +from ui_state import get_current_status +from utils import load_config, save_config, validate_symbol, reload_env_vars + +logger = logging.getLogger(__name__) + + +def _tz_aware(dt: datetime) -> datetime: + if dt.tzinfo is None: + return dt.replace(tzinfo=timezone.utc) + return dt.astimezone(timezone.utc) + + +def _ok(data: Any, status: str = "success", http_status: int = 200) -> JSONResponse: + return JSONResponse(content={"status": status, "data": serialize_for_json(data)}, status_code=http_status) + + +def _err(message: str, http_status: int = 500, extra: Optional[Dict[str, Any]] = None) -> JSONResponse: + payload = {"status": "error", "message": message} + if extra: + payload.update(extra) + return JSONResponse(content=payload, status_code=http_status) + + +class APIRoutes: + """Encapsulates all API route handlers""" + + def __init__(self, app, db_manager, data_collector, config, state_manager): + self.app = app + self.db_manager = db_manager + self.data_collector = data_collector + self.config = config + self.state_manager = state_manager + + # Register all routes + self._register_routes() + + def _register_routes(self): + """Register all API routes""" + + # --------------------------- + # Pages + # --------------------------- + + @self.app.get("/", response_class=HTMLResponse) + async def dashboard(): + """Serve the main dashboard""" + return get_dashboard_html() + + @self.app.get("/config", response_class=HTMLResponse) + async def config_page(): + """Serve the configuration management page""" + return get_config_html() + + @self.app.get("/gaps", response_class=HTMLResponse) + async def gaps_page(): + """Serve the gap monitoring page""" + from ui_template_gaps import get_gaps_monitoring_html + return get_gaps_monitoring_html() + + # --------------------------- + # Status + # --------------------------- + + @self.app.get("/api/stats") + async def get_stats(): + """Get current system statistics""" + try: + status = await get_current_status(self.db_manager, self.data_collector, self.config) + return JSONResponse(content=serialize_for_json(status)) + except Exception as e: + logger.error(f"Error getting stats: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) + + # --------------------------- + # Gaps and Coverage + # --------------------------- + + @self.app.get("/api/gaps/all-pairs") + async def get_all_pairs_gaps(): + """Get gap status for all trading pairs""" + try: + if not self.db_manager: + logger.error("Database manager not initialized") + return _err("Database not initialized", 500) + logger.info("Fetching gap status for all pairs") + status = await self.db_manager.get_all_pairs_gap_status() + logger.info(f"Retrieved gap status for {len(status)} pair-interval combinations") + return _ok(status) + except Exception as e: + logger.error(f"Error getting all pairs gaps: {e}", exc_info=True) + return _err(str(e), 500) + + @self.app.get("/api/gaps/details/{symbol}/{interval}") + async def get_gap_details(symbol: str, interval: str): + """Get detailed gap information including daily coverage""" + try: + if not self.db_manager: + raise HTTPException(status_code=500, detail="Database not initialized") + + sym = symbol.upper() + gap_info = await self.db_manager.detect_gaps(sym, interval) + end_date = datetime.utcnow() + start_date = end_date - timedelta(days=90) + daily_coverage = await self.db_manager.get_data_coverage_by_day(sym, interval, start_date, end_date) + + data = { + "coverage_percent": gap_info.get('coverage', {}).get('coverage_percent', 0), + "total_records": gap_info.get('coverage', {}).get('total_records', 0), + "missing_records": gap_info.get('coverage', {}).get('missing_records', 0), + "gaps": gap_info.get('gaps', []), + "daily_coverage": daily_coverage.get('daily_coverage', []), + } + return _ok(data) + except Exception as e: + logger.error(f"Error getting gap details: {e}", exc_info=True) + return _err(str(e), 500) + + @self.app.post("/api/gaps/fill-intelligent") + async def fill_gaps_intelligent(request: Request): + """Intelligently fill gaps with multiple attempts and averaging fallback""" + try: + body = await request.json() + symbol = body.get('symbol') + interval = body.get('interval') + max_attempts = int(body.get('max_attempts', 3)) + + if not symbol or not interval: + return _err("Missing symbol or interval", 400) + + if not self.db_manager: + raise HTTPException(status_code=500, detail="Database not initialized") + + result = await self.db_manager.fill_gaps_intelligently(symbol.upper(), interval, max_attempts) + logger.info(f"Intelligent gap fill completed: {result}") + return _ok(result) + except Exception as e: + logger.error(f"Error in intelligent gap fill: {e}", exc_info=True) + return _err(str(e), 500) + + @self.app.get("/api/gaps/prioritized/{symbol}/{interval}") + async def get_prioritized_gaps(symbol: str, interval: str): + """Get gaps sorted by priority (recent and small gaps first)""" + try: + if not self.db_manager: + raise HTTPException(status_code=500, detail="Database not initialized") + prioritized = await self.db_manager.get_prioritized_gaps(symbol.upper(), interval) + return _ok(prioritized) + except Exception as e: + logger.error(f"Error getting prioritized gaps: {e}", exc_info=True) + return _err(str(e), 500) + + @self.app.get("/api/gaps/progress/{symbol}/{interval}") + async def get_gap_progress(symbol: str, interval: str): + """Get real-time progress and estimated completion time""" + try: + if not self.db_manager: + raise HTTPException(status_code=500, detail="Database not initialized") + progress = await self.db_manager.get_gap_fill_progress(symbol.upper(), interval) + return _ok(progress) + except Exception as e: + logger.error(f"Error getting gap progress: {e}", exc_info=True) + return _err(str(e), 500) + + @self.app.get("/api/gaps/health/{symbol}/{interval}") + async def get_data_health(symbol: str, interval: str): + """Get comprehensive data health analysis""" + try: + if not self.db_manager: + raise HTTPException(status_code=500, detail="Database not initialized") + health = await self.db_manager.check_data_health(symbol.upper(), interval) + return _ok(health) + except Exception as e: + logger.error(f"Error checking data health: {e}", exc_info=True) + return _err(str(e), 500) + + @self.app.post("/api/gaps/smart-fill/{symbol}") + async def smart_fill_gaps(symbol: str): + """Intelligently fill gaps starting with highest priority""" + try: + if not self.db_manager: + raise HTTPException(status_code=500, detail="Database not initialized") + + from utils import load_config + cfg = load_config() + intervals = cfg.get('collection', {}).get('candle_intervals', ['1m', '5m', '15m', '1h', '4h', '1d']) + + results: List[Dict[str, Any]] = [] + for interval in intervals: + prioritized = await self.db_manager.get_prioritized_gaps(symbol.upper(), interval) + if not prioritized: + continue + filled = 0 + for gap in prioritized[:5]: + if gap.get('missing_candles', 0) <= 100: + try: + await self.db_manager.fill_gaps_intelligently(symbol.upper(), interval, max_attempts=3) + filled += 1 + except Exception as e: + logger.error(f"Error filling gap: {e}") + results.append({'interval': interval, 'gaps_filled': filled, 'total_gaps': len(prioritized)}) + + return JSONResponse(content={"status": "success", "message": f"Smart fill completed for {symbol}", "data": results}) + except Exception as e: + logger.error(f"Error in smart fill: {e}", exc_info=True) + return _err(str(e), 500) + + @self.app.post("/api/gaps/fill") + async def fill_gaps(request: GapFillRequest): + """Fill data gaps""" + try: + if not self.data_collector: + raise HTTPException(status_code=500, detail="Data collector not initialized") + + gap_start = datetime.fromisoformat(request.gap_start) + gap_end = datetime.fromisoformat(request.gap_end) + gap_start = _tz_aware(gap_start) + gap_end = _tz_aware(gap_end) + + await self.data_collector.bulk_download_historical_data( + request.symbol.upper(), + gap_start, + gap_end, + [request.interval], + ) + logger.info(f"Gap filled for {request.symbol} {request.interval}") + return JSONResponse(content={"status": "success", "message": "Gap filled successfully"}) + except Exception as e: + logger.error(f"Error filling gap: {e}", exc_info=True) + return _err(str(e), 500) + + @self.app.post("/api/gaps/auto-fill") + async def auto_fill_gaps(request: AutoGapFillRequest): + """Automatically fill gaps for a symbol""" + try: + if not self.data_collector: + raise HTTPException(status_code=500, detail="Data collector not initialized") + result = await self.data_collector.auto_fill_gaps( + request.symbol.upper(), + request.intervals, + request.fill_genuine_gaps, + ) + logger.info(f"Auto gap fill completed for {request.symbol}: {result}") + return JSONResponse(content={"status": "success", "message": f"Filled gaps for {request.symbol}", "result": serialize_for_json(result)}) + except Exception as e: + logger.error(f"Error in auto gap fill: {e}", exc_info=True) + return _err(str(e), 500) + + @self.app.get("/api/gaps/summary") + async def get_gaps_summary(): + """Get summary of all gaps across all symbols""" + try: + if not self.db_manager: + raise HTTPException(status_code=500, detail="Database not initialized") + summary = await self.db_manager.get_all_gaps_summary() + return _ok(summary) + except Exception as e: + logger.error(f"Error getting gaps summary: {e}", exc_info=True) + return _err(str(e), 500) + + @self.app.get("/api/gaps/status/{symbol}/{interval}") + async def get_gap_status(symbol: str, interval: str): + """Get gap fill status for a specific symbol/interval""" + try: + if not self.db_manager: + raise HTTPException(status_code=500, detail="Database not initialized") + status = await self.db_manager.get_gap_fill_status(symbol.upper(), interval) + return _ok(status) + except Exception as e: + logger.error(f"Error getting gap status: {e}", exc_info=True) + return _err(str(e), 500) + + @self.app.get("/api/gaps/{symbol}/{interval}") + async def detect_gaps(symbol: str, interval: str): + """Detect data gaps""" + try: + if not self.db_manager: + raise HTTPException(status_code=500, detail="Database not initialized") + gaps = await self.db_manager.detect_gaps(symbol.upper(), interval) + return JSONResponse(content={"status": "success", "gaps": serialize_for_json(gaps)}) + except Exception as e: + logger.error(f"Error detecting gaps: {e}", exc_info=True) + return _err(str(e), 500) + + @self.app.post("/api/gaps/fill-genuine/{symbol}/{interval}") + async def fill_genuine_gaps(symbol: str, interval: str): + """Fill genuine empty gaps with intelligent averaging""" + try: + if not self.db_manager: + raise HTTPException(status_code=500, detail="Database not initialized") + gap_config = self.config.get('gap_filling', {}) + max_consecutive = int(gap_config.get('max_consecutive_empty_candles', 5)) + lookback = int(gap_config.get('averaging_lookback_candles', 10)) + filled_count = await self.db_manager.fill_genuine_gaps_with_averages( + symbol.upper(), interval, max_consecutive, lookback + ) + logger.info(f"Filled {filled_count} genuine gaps for {symbol} {interval}") + return JSONResponse( + content={ + "status": "success", + "message": f"Filled {filled_count} genuine empty candles", + "filled_count": filled_count, + } + ) + except Exception as e: + logger.error(f"Error filling genuine gaps: {e}", exc_info=True) + return _err(str(e), 500) + + # --------------------------- + # Symbols and Prices + # --------------------------- + + @self.app.get("/api/symbols") + async def get_symbols(): + """Get list of all available symbols""" + try: + if not self.db_manager: + logger.error("Database manager not initialized") + return JSONResponse(content={"status": "error", "symbols": []}, status_code=500) + symbols = await self.db_manager.get_available_symbols() + logger.info(f"Retrieved {len(symbols)} symbols from database") + return JSONResponse(content={"status": "success", "symbols": symbols}) + except Exception as e: + logger.error(f"Error getting symbols: {e}", exc_info=True) + return JSONResponse(content={"status": "error", "symbols": []}, status_code=500) + + @self.app.get("/api/price-trends/{symbol}") + async def get_price_trends(symbol: str): + """Get current price and trend indicators for multiple timeframes""" + try: + if not self.db_manager: + logger.error("Database manager not initialized") + return _err("Database not initialized", 500) + logger.info(f"Price trends request for {symbol}") + data = await self.db_manager.get_current_price_and_trends_with_volume(symbol.upper()) + if not data: + logger.warning(f"No price data found for {symbol}") + return _err(f"No data found for {symbol}. Please start data collection first.", 404) + pair_config = next((p for p in self.config.get('trading_pairs', []) if p['symbol'] == symbol.upper()), None) + data['enabled'] = pair_config.get('enabled', False) if pair_config else False + logger.info(f"Returning price trends for {symbol}: price={data.get('current_price')}") + return _ok(data) + except Exception as e: + logger.error(f"Error getting price trends: {e}", exc_info=True) + return _err(f"Error retrieving price trends: {str(e)}", 500) + + # --------------------------- + # Collection control + # --------------------------- + + @self.app.post("/api/collection/start") + async def start_collection(): + """Start data collection""" + try: + if not self.data_collector: + raise HTTPException(status_code=500, detail="Data collector not initialized") + if self.state_manager.get("is_collecting", False): + return JSONResponse(content={"status": "info", "message": "Collection already running"}) + await self.data_collector.start_continuous_collection() + self.state_manager.update(is_collecting=True) + logger.info("Collection started via API") + return JSONResponse(content={"status": "success", "message": "Collection started"}) + except Exception as e: + logger.error(f"Error starting collection: {e}", exc_info=True) + return JSONResponse(content={"status": "error", "message": str(e)}, status_code=500) + + @self.app.post("/api/collection/stop") + async def stop_collection(): + """Stop data collection""" + try: + if not self.data_collector: + raise HTTPException(status_code=500, detail="Data collector not initialized") + if not self.state_manager.get("is_collecting", False): + return JSONResponse(content={"status": "info", "message": "Collection not running"}) + await self.data_collector.stop_continuous_collection() + self.state_manager.update(is_collecting=False) + logger.info("Collection stopped via API") + return JSONResponse(content={"status": "success", "message": "Collection stopped"}) + except Exception as e: + logger.error(f"Error stopping collection: {e}", exc_info=True) + return JSONResponse(content={"status": "error", "message": str(e)}, status_code=500) + + # --------------------------- + # Configuration + # --------------------------- + + @self.app.get("/api/config") + async def get_configuration(): + """Get current configuration""" + try: + cfg = load_config() + return JSONResponse(content=serialize_for_json(cfg)) + except Exception as e: + logger.error(f"Error getting config: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) + + @self.app.post("/api/config") + async def update_configuration(request: Request): + """Update configuration - accepts raw JSON body""" + try: + body = await request.json() + logger.info(f"Received config update keys: {list(body.keys())}") + + current_config = load_config() + # Deep merge/replace top-level keys + for key, value in body.items(): + if key in current_config and isinstance(current_config[key], dict) and isinstance(value, dict): + current_config[key].update(value) + else: + current_config[key] = value + + save_config(current_config) + self.config.clear() + self.config.update(current_config) + logger.info("Configuration updated successfully") + return JSONResponse(content={"status": "success", "message": "Configuration updated"}) + except Exception as e: + logger.error(f"Error updating config: {e}", exc_info=True) + return _err(str(e), 500) + + @self.app.post("/api/trading-pairs") + async def add_trading_pair(pair: TradingPairAdd): + """Add a new trading pair""" + try: + if not validate_symbol(pair.symbol.upper()): + return JSONResponse(content={"status": "error", "message": "Invalid symbol format"}, status_code=400) + + cfg = load_config() + existing = [p for p in cfg.get('trading_pairs', []) if p['symbol'] == pair.symbol.upper()] + if existing: + return JSONResponse(content={"status": "error", "message": "Trading pair already exists"}, status_code=409) + + record_from_date = pair.record_from_date or cfg.get('collection', {}).get('default_record_from_date', '2020-01-01T00:00:00Z') + cfg.setdefault('trading_pairs', []).append({ + 'symbol': pair.symbol.upper(), + 'enabled': True, + 'priority': pair.priority, + 'record_from_date': record_from_date, + }) + save_config(cfg) + self.config.clear() + self.config.update(cfg) + logger.info(f"Added trading pair: {pair.symbol}") + return JSONResponse(content={"status": "success", "message": f"Added {pair.symbol}"}) + except Exception as e: + logger.error(f"Error adding trading pair: {e}", exc_info=True) + return JSONResponse(content={"status": "error", "message": str(e)}, status_code=500) + + @self.app.put("/api/trading-pairs/{symbol}") + async def update_trading_pair(symbol: str, request: Request): + """Update a trading pair's configuration""" + try: + update = await request.json() + logger.info(f"Updating trading pair {symbol}: {update}") + cfg = load_config() + + pair_found = False + for pair in cfg.get('trading_pairs', []): + if pair['symbol'] == symbol.upper(): + if 'enabled' in update: + pair['enabled'] = bool(update['enabled']) + if 'priority' in update: + pair['priority'] = int(update['priority']) + if 'record_from_date' in update: + pair['record_from_date'] = update['record_from_date'] + pair_found = True + break + + if not pair_found: + return JSONResponse(content={"status": "error", "message": "Trading pair not found"}, status_code=404) + + save_config(cfg) + self.config.clear() + self.config.update(cfg) + logger.info(f"Updated trading pair: {symbol}") + return JSONResponse(content={"status": "success", "message": f"Updated {symbol}"}) + except Exception as e: + logger.error(f"Error updating trading pair: {e}", exc_info=True) + return JSONResponse(content={"status": "error", "message": str(e)}, status_code=500) + + @self.app.delete("/api/trading-pairs/{symbol}") + async def remove_trading_pair(symbol: str): + """Remove a trading pair""" + try: + cfg = load_config() + original_count = len(cfg.get('trading_pairs', [])) + cfg['trading_pairs'] = [p for p in cfg.get('trading_pairs', []) if p['symbol'] != symbol.upper()] + + if len(cfg['trading_pairs']) == original_count: + return JSONResponse(content={"status": "error", "message": "Trading pair not found"}, status_code=404) + + save_config(cfg) + self.config.clear() + self.config.update(cfg) + logger.info(f"Removed trading pair: {symbol}") + return JSONResponse(content={"status": "success", "message": f"Removed {symbol}"}) + except Exception as e: + logger.error(f"Error removing trading pair: {e}", exc_info=True) + return JSONResponse(content={"status": "error", "message": str(e)}, status_code=500) + + @self.app.post("/api/indicators/toggle/{indicator_name}") + async def toggle_indicator(indicator_name: str): + """Toggle a technical indicator on/off""" + try: + cfg = load_config() + enabled_indicators = cfg.setdefault('technical_indicators', {}).setdefault('enabled', []) + if indicator_name in enabled_indicators: + enabled_indicators.remove(indicator_name) + action = "disabled" + else: + enabled_indicators.append(indicator_name) + action = "enabled" + save_config(cfg) + self.config.clear() + self.config.update(cfg) + logger.info(f"Indicator {indicator_name} {action}") + return JSONResponse(content={"status": "success", "message": f"Indicator {indicator_name} {action}", "enabled": indicator_name in enabled_indicators}) + except Exception as e: + logger.error(f"Error toggling indicator: {e}", exc_info=True) + return JSONResponse(content={"status": "error", "message": str(e)}, status_code=500) + + @self.app.put("/api/indicators/{indicator_name}/periods") + async def update_indicator_periods(indicator_name: str, request: Request): + """Update periods for a technical indicator""" + try: + body = await request.json() + periods = body.get('periods') + if periods is None: + return JSONResponse(content={"status": "error", "message": "Missing 'periods' in request"}, status_code=400) + + cfg = load_config() + periods_cfg = cfg.setdefault('technical_indicators', {}).setdefault('periods', {}) + if indicator_name not in periods_cfg: + return JSONResponse(content={"status": "error", "message": f"Unknown indicator: {indicator_name}"}, status_code=404) + + periods_cfg[indicator_name] = periods + save_config(cfg) + self.config.clear() + self.config.update(cfg) + logger.info(f"Updated {indicator_name} periods to {periods}") + return JSONResponse(content={"status": "success", "message": f"Updated {indicator_name} periods"}) + except Exception as e: + logger.error(f"Error updating indicator periods: {e}", exc_info=True) + return JSONResponse(content={"status": "error", "message": str(e)}, status_code=500) + + # --------------------------- + # Chart and Data + # --------------------------- + + @self.app.post("/api/chart-data") + async def get_chart_data(request: ChartDataRequest): + """Get chart data for visualization""" + try: + if not self.db_manager: + logger.error("Database manager not initialized") + return JSONResponse(content={"status": "error", "message": "Database not initialized"}, status_code=500) + + logger.info(f"Chart data request: symbol={request.symbol}, interval={request.interval}, limit={request.limit}") + data = await self.db_manager.get_recent_candles(request.symbol.upper(), request.interval, request.limit) + logger.info(f"Retrieved {len(data) if data else 0} candles from database") + if not data: + logger.warning(f"No data found for {request.symbol} at {request.interval}") + return JSONResponse(content={"status": "error", "message": f"No data found for {request.symbol} at {request.interval}. Please start data collection or download historical data first."}, status_code=404) + logger.info(f"Returning {len(data)} candles for {request.symbol}") + return JSONResponse(content={"status": "success", "data": data}) + except Exception as e: + logger.error(f"Error getting chart data: {e}", exc_info=True) + return JSONResponse(content={"status": "error", "message": f"Error retrieving chart data: {str(e)}"}, status_code=500) + + @self.app.post("/api/bulk-download") + async def bulk_download(request: BulkDownloadRequest): + """Download historical data in bulk""" + try: + if not self.data_collector: + raise HTTPException(status_code=500, detail="Data collector not initialized") + + start_date = datetime.fromisoformat(request.start_date) + end_date = datetime.fromisoformat(request.end_date) if request.end_date else datetime.utcnow() + start_date = _tz_aware(start_date) + end_date = _tz_aware(end_date) + + intervals = request.intervals or ['1h', '4h', '1d'] + results = [] + + for symbol in request.symbols: + try: + symu = symbol.upper() + # Initialize progress for UI + self.data_collector.download_progress[symu] = { + 'status': 'pending', + 'intervals': {i: {'status': 'pending', 'records': 0} for i in intervals}, + 'start_time': datetime.now(timezone.utc).isoformat(), + } + # Spawn task + task = asyncio.create_task( + self.data_collector.bulk_download_historical_data(symu, start_date, end_date, intervals) + ) + results.append({'symbol': symu, 'status': 'started', 'intervals': intervals}) + logger.info(f"Bulk download started for {symbol}") + except Exception as ie: + logger.error(f"Error starting bulk download for {symbol}: {ie}") + results.append({'symbol': symu, 'status': 'error', 'error': str(ie)}) + + return JSONResponse(content={"status": "success", "message": f"Bulk download started for {len(request.symbols)} symbol(s)", "results": results}) + except Exception as e: + logger.error(f"Error starting bulk download: {e}", exc_info=True) + return JSONResponse(content={"status": "error", "message": str(e)}, status_code=500) + + @self.app.get("/api/download-progress") + async def get_download_progress(): + """Get progress for all active downloads""" + try: + if not self.data_collector: + return JSONResponse(content={"status": "error", "message": "Data collector not initialized"}, status_code=500) + progress = await self.data_collector.get_download_progress() + return JSONResponse(content={"status": "success", "downloads": serialize_for_json(progress)}) + except Exception as e: + logger.error(f"Error getting download progress: {e}", exc_info=True) + return JSONResponse(content={"status": "error", "message": str(e)}, status_code=500) + + # --------------------------- + # Environment variables + # --------------------------- + + @self.app.get("/api/env") + async def get_env_vars(): + """Get environment variables""" + try: + env_vars = dotenv_values('variables.env') or {} + safe_vars = { + k: ('***' if any(s in k.upper() for s in ['SECRET', 'KEY', 'PASSWORD', 'TOKEN']) else v) + for k, v in env_vars.items() + } + return JSONResponse(content=safe_vars) + except Exception as e: + logger.error(f"Error getting env vars: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) + + @self.app.post("/api/env") + async def update_env_var(env_update: EnvVarUpdate): + """Update environment variable""" + try: + key_upper = env_update.key.upper() + display_value = env_update.value if not any(s in key_upper for s in ['PASSWORD', 'SECRET', 'KEY', 'TOKEN']) else '***' + logger.info(f"Updating env var: {env_update.key} = {display_value}") + set_key('variables.env', env_update.key, env_update.value) + reload_env_vars('variables.env') + logger.info(f"Updated and reloaded env var: {env_update.key}") + return JSONResponse(content={"status": "success", "message": f"Updated {env_update.key}"}) + except Exception as e: + logger.error(f"Error updating env var: {e}", exc_info=True) + return JSONResponse(content={"status": "error", "message": str(e)}, status_code=500) + + @self.app.delete("/api/env/{key}") + async def delete_env_var(key: str): + """Delete environment variable""" + try: + # Manual edit due to lack of delete in python-dotenv API + try: + with open('variables.env', 'r', encoding='utf-8') as f: + lines = f.readlines() + except FileNotFoundError: + lines = [] + new_lines = [line for line in lines if not line.startswith(f"{key}=")] + with open('variables.env', 'w', encoding='utf-8') as f: + f.writelines(new_lines) + reload_env_vars('variables.env') + logger.info(f"Deleted env var: {key}") + return JSONResponse(content={"status": "success", "message": f"Deleted {key}"}) + except Exception as e: + logger.error(f"Error deleting env var: {e}", exc_info=True) + return JSONResponse(content={"status": "error", "message": str(e)}, status_code=500) + + # --------------------------- + # Database stats + # --------------------------- + + @self.app.get("/api/database/stats") + async def get_database_stats(): + """Get detailed database statistics""" + try: + if not self.db_manager: + raise HTTPException(status_code=500, detail="Database not initialized") + stats = await self.db_manager.get_detailed_statistics() + return JSONResponse(content={"status": "success", "stats": serialize_for_json(stats)}) + except Exception as e: + logger.error(f"Error getting database stats: {e}", exc_info=True) + return JSONResponse(content={"status": "error", "message": str(e)}, status_code=500) diff --git a/ui_state.py b/ui_state.py new file mode 100644 index 0000000..a475287 --- /dev/null +++ b/ui_state.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python3 +""" +ui_state.py - State Management and Persistence +Handles persistent state across application reloads with file-based storage +""" + +import json +import logging +from datetime import datetime +from pathlib import Path +from typing import Dict, Any, Optional + +logger = logging.getLogger(__name__) + +STATE_FILE = Path(".collector_state.json") + + +class StateManager: + """Thread-safe state manager that persists across uvicorn reloads""" + + def __init__(self): + self.state = self._load_state() + + def _load_state(self) -> Dict[str, Any]: + """Load state from disk with integrity checks""" + try: + if STATE_FILE.exists(): + with open(STATE_FILE, 'r') as f: + state = json.load(f) + + # Check if state is recent (within last 60 seconds) + if 'timestamp' in state: + saved_time = datetime.fromisoformat(state['timestamp']) + age = (datetime.utcnow() - saved_time).total_seconds() + + if age < 60: # Extended validity window + logger.info( + f"Loaded persistent state (age: {age:.1f}s): " + f"collecting={state.get('is_collecting')}" + ) + return state + else: + logger.info(f"State too old ({age:.1f}s), starting fresh") + except Exception as e: + logger.error(f"Error loading state: {e}") + + return { + "is_collecting": False, + "websocket_collection_running": False, + "timestamp": datetime.utcnow().isoformat() + } + + def _save_state(self): + """Save state to disk atomically""" + try: + self.state['timestamp'] = datetime.utcnow().isoformat() + + # Atomic write using temp file + temp_file = STATE_FILE.with_suffix('.tmp') + with open(temp_file, 'w') as f: + json.dump(self.state, f) + temp_file.replace(STATE_FILE) + + logger.debug(f"Saved state: {self.state}") + except Exception as e: + logger.error(f"Error saving state: {e}") + + def update(self, **kwargs): + """Update state and persist""" + self.state.update(kwargs) + self._save_state() + + def get(self, key: str, default=None): + """Get state value""" + return self.state.get(key, default) + + def get_all(self) -> Dict[str, Any]: + """Get all state""" + return self.state.copy() + + +# Global state manager instance +state_manager = StateManager() + + +async def get_current_status(db_manager, data_collector, config) -> Dict[str, Any]: + """Get current system status - robust against reload issues""" + try: + # Use state manager as source of truth + is_collecting = state_manager.get("is_collecting", False) + + # Double-check with data collector if available + if data_collector and hasattr(data_collector, 'is_collecting'): + actual_collecting = data_collector.is_collecting + + # Sync state if mismatch detected + if actual_collecting != is_collecting: + logger.warning( + f"State mismatch detected! State: {is_collecting}, " + f"Actual: {actual_collecting}" + ) + is_collecting = actual_collecting + state_manager.update(is_collecting=actual_collecting) + + # Get database statistics + total_records = await db_manager.get_total_records() if db_manager else 0 + last_update = await db_manager.get_last_update_time() if db_manager else "Never" + + # Get active trading pairs + active_pairs = [] + if config and 'trading_pairs' in config: + active_pairs = [ + pair['symbol'] + for pair in config['trading_pairs'] + if pair.get('enabled', False) + ] + + return { + "status": "Active" if is_collecting else "Stopped", + "total_records": total_records, + "last_update": last_update, + "active_pairs": len(active_pairs), + "active_pair_list": active_pairs, + "is_collecting": is_collecting + } + except Exception as e: + logger.error(f"Error getting status: {e}") + return { + "status": "Error", + "total_records": 0, + "last_update": "Never", + "active_pairs": 0, + "active_pair_list": [], + "is_collecting": False, + "error": str(e) + } diff --git a/ui_template_config.py b/ui_template_config.py new file mode 100644 index 0000000..ada50e0 --- /dev/null +++ b/ui_template_config.py @@ -0,0 +1,981 @@ +#!/usr/bin/env python3 + +""" +ui_template_config.py - Configuration Management HTML Template + +Contains the configuration interface for managing trading pairs, indicators, +gap filling settings, and system configuration +""" + +def get_config_html(): + """Return the configuration management HTML""" + return """ + + + + + + Configuration - Trading Intelligence System + + + + +
+ +
+

⚙️ System Configuration

+

Manage trading pairs, indicators, and system settings

+ +
+ + +
+ + +
+

📊 Trading Pairs

+ + + + + + + + + + + + + + + + + + +
SymbolEnabledPriorityRecord From DateActions
Loading...
+
+ + +
+

📈 Technical Indicators

+

Enable or disable technical indicators and configure their parameters

+
+
Loading indicators...
+
+
+ + +
+

🔧 Gap Filling Configuration

+
+
Loading gap filling settings...
+
+
+ + +
+

📥 Collection Settings

+
+
Loading collection settings...
+
+
+ + +
+

🔧 Environment Variables

+ +
+ + + +
+ + + + + + + + + + + + + + +
KeyValueActions
Loading...
+
+ + +
+

💾 Save Configuration

+

Save all configuration changes to disk

+ + +
+
+ + + + + + + + + + + """ + diff --git a/ui_template_dashboard.py b/ui_template_dashboard.py new file mode 100644 index 0000000..28274ae --- /dev/null +++ b/ui_template_dashboard.py @@ -0,0 +1,1601 @@ +#!/usr/bin/env python3 + +""" +ui_template_dashboard.py - Dashboard HTML Template + +Contains the main dashboard interface with charts, status, and improved trading view +""" + +def get_dashboard_html(): + """Return the main dashboard HTML""" + return """ + + + + + + Trading Intelligence System - Dashboard + + + + + + + +
+ +
+

📈 Trading Intelligence System

+

Real-time market data collection and analysis platform

+ +
+ + +
+ + +
+
+

Status

+
+ Loading... +
+
+
+

Total Records

+
0
+
+
+

Active Pairs

+
0
+
+
+

Last Update

+
Never
+
+
+ + +
+

⚙️ Collection Controls

+
+ + + + + +
+
+ + +
+

💹 Active Trading Pairs

+
+ + + + + + + + + + + + + + + + + + +
PairPrice15M1H1D1WVolume StatusActive
Loading pairs...
+
+
+ + +
+

📊 Advanced Price Chart

+
+ + + + +
+ + +
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + +""" diff --git a/ui_template_gaps.py b/ui_template_gaps.py new file mode 100644 index 0000000..fc27a34 --- /dev/null +++ b/ui_template_gaps.py @@ -0,0 +1,780 @@ +#!/usr/bin/env python3 +""" +ui_template_gaps.py - Data Gap Monitoring Interface +Provides visual interface for tracking and filling data gaps +""" + +def get_gaps_monitoring_html(): + """Return the gaps monitoring page HTML""" + return """ + + + + + + Gap Monitoring - Trading System + + + +
+
+

📊 Data Gap Monitoring

+

Track and fill data gaps across all trading pairs

+
+ + + +
+
+
Total Pairs
+
-
+
+
+
Pairs with Gaps
+
-
+
+
+
Total Missing Records
+
-
+
+
+
Avg Coverage
+
-
+
+
+ +
+
+
Trading Pairs Gap Status
+
+
+

Loading gap data...

+
+ + + + + + + + + + + + + + + +
+
+
+ + + + + + + + """ diff --git a/ui_websocket.py b/ui_websocket.py new file mode 100644 index 0000000..2741713 --- /dev/null +++ b/ui_websocket.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +""" +ui_websocket.py - WebSocket Connections and Real-time Updates +Handles WebSocket connections and broadcasts real-time status updates +""" + +import asyncio +import logging +from typing import List +from fastapi import WebSocket, WebSocketDisconnect + +logger = logging.getLogger(__name__) + +# Global WebSocket connection pool +websocket_connections: List[WebSocket] = [] + + +async def broadcast_to_websockets(message: dict): + """Send message to all connected WebSocket clients""" + disconnected = [] + + for ws in websocket_connections: + try: + await ws.send_json(message) + except Exception: + disconnected.append(ws) + + # Remove disconnected clients + for ws in disconnected: + if ws in websocket_connections: + websocket_connections.remove(ws) + + +async def broadcast_status_updates(get_status_func): + """Background task to broadcast status updates to all WebSocket clients""" + while True: + try: + await asyncio.sleep(2) # Broadcast every 2 seconds + + if websocket_connections: + status = await get_status_func() + await broadcast_to_websockets({ + "type": "status_update", + "data": status + }) + except Exception as e: + logger.error(f"Error in broadcast task: {e}") + + +async def handle_websocket_connection(websocket: WebSocket): + """Handle individual WebSocket connection""" + await websocket.accept() + websocket_connections.append(websocket) + logger.info(f"WebSocket connected. Total connections: {len(websocket_connections)}") + + try: + while True: + # Keep connection alive and handle incoming messages + data = await websocket.receive_text() + logger.debug(f"Received WebSocket message: {data}") + + # Echo or handle specific commands if needed + # await websocket.send_json({"type": "ack", "message": "received"}) + + except WebSocketDisconnect: + logger.info("WebSocket disconnected normally") + except Exception as e: + logger.error(f"WebSocket error: {e}") + finally: + if websocket in websocket_connections: + websocket_connections.remove(websocket) + logger.info(f"WebSocket removed. Total connections: {len(websocket_connections)}") diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..7bf6707 --- /dev/null +++ b/utils.py @@ -0,0 +1,854 @@ +#!/usr/bin/env python3 + +""" +utils.py - Utility Functions for Data Processing and Technical Indicators + +Utility functions for data processing, technical indicators, validation, and configuration management +""" + +import json +import logging +import os +import re +import tempfile +import shutil +from datetime import datetime, timezone +from typing import Dict, List, Optional, Any, Union +import pandas as pd +import pandas_ta as ta +import numpy as np +from decimal import Decimal, ROUND_HALF_UP, InvalidOperation as DecimalException +from dotenv import load_dotenv + +# Load environment variables +load_dotenv('variables.env') + +def setup_logging(log_level: str = None, log_file: str = None): + """Setup logging configuration""" + # Use environment variables if parameters not provided + if log_level is None: + log_level = os.getenv('LOG_LEVEL', 'INFO') + if log_file is None: + log_file = os.getenv('LOG_FILE', 'crypto_collector.log') + + # Create logs directory if it doesn't exist + os.makedirs("logs", exist_ok=True) + + log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + date_format = "%Y-%m-%d %H:%M:%S" + + # Configure root logger + logging.basicConfig( + level=getattr(logging, log_level.upper()), + format=log_format, + datefmt=date_format, + handlers=[ + logging.FileHandler(f"logs/{log_file}"), + logging.StreamHandler() + ] + ) + + # Set specific log levels for external libraries + logging.getLogger("websockets").setLevel(logging.WARNING) + logging.getLogger("asyncio").setLevel(logging.WARNING) + logging.getLogger("urllib3").setLevel(logging.WARNING) + logging.getLogger("binance").setLevel(logging.WARNING) + +def load_config(config_file: str = "config.conf") -> Dict[str, Any]: + """Load configuration from JSON file""" + logger = logging.getLogger(__name__) + try: + with open(config_file, 'r') as f: + config = json.load(f) + + # Validate configuration structure + validate_config(config) + logger.debug(f"Successfully loaded config from {config_file}") + return config + + except FileNotFoundError: + logger.warning(f"Config file {config_file} not found, creating default") + # Create default configuration if file doesn't exist + default_config = create_default_config() + save_config(default_config, config_file) + return default_config + + except json.JSONDecodeError as e: + logger.error(f"Invalid JSON in configuration file: {e}") + raise ValueError(f"Invalid JSON in configuration file: {e}") + +def create_default_config() -> Dict[str, Any]: + """Create default configuration""" + return { + "trading_pairs": [ + {"symbol": "BTCUSDT", "enabled": True, "priority": 1}, + {"symbol": "ETHUSDT", "enabled": True, "priority": 1}, + {"symbol": "BNBUSDT", "enabled": True, "priority": 2}, + {"symbol": "XRPUSDT", "enabled": True, "priority": 3}, + {"symbol": "SOLUSDT", "enabled": True, "priority": 2} + ], + "technical_indicators": { + "enabled": ["sma", "ema", "rsi", "macd", "bb", "atr"], + "periods": { + "sma": [20, 50, 200], + "ema": [12, 26], + "rsi": [14], + "macd": {"fast": 12, "slow": 26, "signal": 9}, + "bb": {"period": 20, "std": 2}, + "atr": [14], + "stoch": {"k_period": 14, "d_period": 3}, + "adx": [14] + }, + "calculation_intervals": ["1m", "5m", "15m", "1h", "4h", "1d"] + }, + "collection": { + "bulk_chunk_size": 1000, + "websocket_reconnect_delay": 5, + "tick_batch_size": 100, + "candle_intervals": ["1m", "5m", "15m", "1h", "4h", "1d"], + "max_retries": 3, + "retry_delay": 1, + "rate_limit_requests_per_minute": 2000, + "concurrent_symbol_limit": 10 + }, + "database": { + "batch_insert_size": 1000, + "compression_after_days": 7, + "retention_policy_days": 365, + "vacuum_analyze_interval_hours": 24, + "connection_pool": { + "min_size": 10, + "max_size": 50, + "command_timeout": 60 + } + }, + "ui": { + "refresh_interval_seconds": 5, + "max_chart_points": 1000, + "default_timeframe": "1d", + "theme": "dark", + "enable_realtime_updates": True + }, + "gap_filling": { + "enable_auto_gap_filling": True, + "auto_fill_schedule_hours": 24, + "intervals_to_monitor": ["1m", "5m", "15m", "1h", "4h", "1d"], + "max_gap_size_candles": 1000, + "max_consecutive_empty_candles": 5, + "averaging_lookback_candles": 10, + "enable_intelligent_averaging": True, + "max_fill_attempts": 3 + }, + + } + +def save_config(config: Dict[str, Any], config_file: str = "config.conf"): + """Save configuration to JSON file using atomic write""" + logger = logging.getLogger(__name__) + + try: + # Validate before saving + validate_config(config) + + # Get the directory of the config file + config_dir = os.path.dirname(config_file) or '.' + + # Create a temporary file in the same directory + temp_fd, temp_path = tempfile.mkstemp( + dir=config_dir, + prefix='.tmp_config_', + suffix='.conf', + text=True + ) + + try: + # Write to temporary file + with os.fdopen(temp_fd, 'w') as f: + json.dump(config, f, indent=2, sort_keys=False) + f.flush() + os.fsync(f.fileno()) # Force write to disk + + # Atomic rename + shutil.move(temp_path, config_file) + logger.info(f"Configuration saved successfully to {config_file}") + + except Exception as e: + # Clean up temp file on error + try: + os.unlink(temp_path) + except: + pass + raise + + except Exception as e: + logger.error(f"Error saving config: {e}", exc_info=True) + raise + +def validate_config(config: Dict[str, Any]): + """Validate configuration structure""" + required_sections = ["trading_pairs", "technical_indicators", "collection", "database"] + + for section in required_sections: + if section not in config: + raise ValueError(f"Missing required configuration section: {section}") + + # Validate trading pairs + if not isinstance(config["trading_pairs"], list): + raise ValueError("trading_pairs must be a list") + + for pair in config["trading_pairs"]: + if not isinstance(pair, dict) or "symbol" not in pair: + raise ValueError("Invalid trading pair configuration") + if not validate_symbol(pair["symbol"]): + raise ValueError(f"Invalid symbol format: {pair['symbol']}") + + # Ensure required fields with defaults + if "enabled" not in pair: + pair["enabled"] = True + if "priority" not in pair: + pair["priority"] = 1 + + # Validate technical indicators + indicators_config = config["technical_indicators"] + if "enabled" not in indicators_config or "periods" not in indicators_config: + raise ValueError("Invalid technical indicators configuration") + + if not isinstance(indicators_config["enabled"], list): + raise ValueError("technical_indicators.enabled must be a list") + +def validate_symbol(symbol: str) -> bool: + """Validate trading pair symbol format""" + # Binance symbol format: base currency + quote currency (e.g., BTCUSDT) + if not symbol or len(symbol) < 6: + return False + + # Should be uppercase letters/numbers only + if not re.match(r'^[A-Z0-9]+$', symbol): + return False + + # Should end with common quote currencies + quote_currencies = ['USDT', 'BUSD', 'BTC', 'ETH', 'BNB', 'USDC', 'TUSD', 'DAI'] + if not any(symbol.endswith(quote) for quote in quote_currencies): + return False + + return True + +def reload_env_vars(env_file: str = 'variables.env'): + """Reload environment variables from file""" + from dotenv import load_dotenv + load_dotenv(env_file, override=True) + +def format_timestamp(timestamp: Union[int, float, str, datetime]) -> datetime: + """Format timestamp to datetime object""" + if isinstance(timestamp, datetime): + # Ensure timezone awareness + if timestamp.tzinfo is None: + return timestamp.replace(tzinfo=timezone.utc) + return timestamp + + if isinstance(timestamp, str): + try: + # Try parsing ISO format first + return datetime.fromisoformat(timestamp.replace('Z', '+00:00')) + except ValueError: + try: + # Try parsing as timestamp + timestamp = float(timestamp) + except ValueError: + raise ValueError(f"Invalid timestamp string format: {timestamp}") + + if isinstance(timestamp, (int, float)): + # Handle both seconds and milliseconds timestamps + if timestamp > 1e10: # Milliseconds + timestamp = timestamp / 1000 + return datetime.fromtimestamp(timestamp, tz=timezone.utc) + + raise ValueError(f"Invalid timestamp format: {type(timestamp)}") + +def parse_kline_data(data: Dict[str, Any]) -> Dict[str, Any]: + """Parse Binance kline/candlestick data""" + kline = data['k'] + return { + 'time': format_timestamp(kline['t']), + 'symbol': kline['s'], + 'exchange': 'binance', + 'interval': kline['i'], + 'open_price': Decimal(str(kline['o'])), + 'high_price': Decimal(str(kline['h'])), + 'low_price': Decimal(str(kline['l'])), + 'close_price': Decimal(str(kline['c'])), + 'volume': Decimal(str(kline['v'])), + 'quote_volume': Decimal(str(kline['q'])) if 'q' in kline else None, + 'trade_count': int(kline['n']) if 'n' in kline else None + } + +def parse_trade_data(data: Dict[str, Any]) -> Dict[str, Any]: + """Parse Binance trade data""" + return { + 'time': format_timestamp(data['T']), + 'symbol': data['s'], + 'exchange': 'binance', + 'price': Decimal(str(data['p'])), + 'quantity': Decimal(str(data['q'])), + 'trade_id': int(data['t']), + 'is_buyer_maker': bool(data['m']) + } + +def calculate_technical_indicators(df: pd.DataFrame, indicators_config: Dict[str, Any]) -> List[Dict[str, Any]]: + """ + Calculate technical indicators using pandas_ta + + Args: + df: DataFrame with OHLCV data (index: time, columns: open, high, low, close, volume) + indicators_config: Configuration for indicators to calculate + + Returns: + List of dictionaries with indicator data + """ + if len(df) < 50: # Need enough data for most indicators + return [] + + # Create a copy and ensure proper data types + df_ta = df.copy() + + # Rename columns to match pandas_ta expectations if needed + column_mapping = { + 'open_price': 'open', + 'high_price': 'high', + 'low_price': 'low', + 'close_price': 'close' + } + + for old_col, new_col in column_mapping.items(): + if old_col in df_ta.columns and new_col not in df_ta.columns: + df_ta.rename(columns={old_col: new_col}, inplace=True) + + # **CRITICAL FIX**: Convert all columns to float64 to avoid numba pyobject errors + # This ensures pandas_ta's numba-compiled functions receive proper numeric types + required_columns = ['open', 'high', 'low', 'close', 'volume'] + for col in required_columns: + if col in df_ta.columns: + df_ta[col] = pd.to_numeric(df_ta[col], errors='coerce').astype(np.float64) + + # Remove any NaN values that may have been introduced + df_ta = df_ta.dropna() + + if len(df_ta) < 50: # Check again after cleaning + return [] + + indicators_data = [] + enabled_indicators = indicators_config.get('enabled', []) + periods = indicators_config.get('periods', {}) + logger = logging.getLogger(__name__) + + try: + for indicator in enabled_indicators: + if indicator == 'sma': + # Simple Moving Average + for period in periods.get('sma', [20]): + try: + sma_values = ta.sma(df_ta['close'], length=period) + if sma_values is not None: + for idx, value in sma_values.dropna().items(): + indicators_data.append({ + 'time': idx, + 'indicator_name': f'sma_{period}', + 'indicator_value': round(float(value), 8), + 'metadata': json.dumps({'period': period}) + }) + except Exception as e: + logger.error(f"Error calculating SMA-{period}: {e}") + + elif indicator == 'ema': + # Exponential Moving Average + for period in periods.get('ema', [12, 26]): + try: + ema_values = ta.ema(df_ta['close'], length=period) + if ema_values is not None: + for idx, value in ema_values.dropna().items(): + indicators_data.append({ + 'time': idx, + 'indicator_name': f'ema_{period}', + 'indicator_value': round(float(value), 8), + 'metadata': json.dumps({'period': period}) + }) + except Exception as e: + logger.error(f"Error calculating EMA-{period}: {e}") + + elif indicator == 'rsi': + # Relative Strength Index + for period in periods.get('rsi', [14]): + try: + rsi_values = ta.rsi(df_ta['close'], length=period) + if rsi_values is not None: + for idx, value in rsi_values.dropna().items(): + indicators_data.append({ + 'time': idx, + 'indicator_name': f'rsi_{period}', + 'indicator_value': round(float(value), 8), + 'metadata': json.dumps({'period': period}) + }) + except Exception as e: + logger.error(f"Error calculating RSI-{period}: {e}") + + elif indicator == 'macd': + # MACD + macd_config = periods.get('macd', {'fast': 12, 'slow': 26, 'signal': 9}) + try: + macd_result = ta.macd( + df_ta['close'], + fast=macd_config['fast'], + slow=macd_config['slow'], + signal=macd_config['signal'] + ) + + if macd_result is not None: + # MACD Line + macd_col = f"MACD_{macd_config['fast']}_{macd_config['slow']}_{macd_config['signal']}" + if macd_col in macd_result.columns: + for idx, value in macd_result[macd_col].dropna().items(): + indicators_data.append({ + 'time': idx, + 'indicator_name': 'macd_line', + 'indicator_value': round(float(value), 8), + 'metadata': json.dumps(macd_config) + }) + + # MACD Signal + signal_col = f"MACDs_{macd_config['fast']}_{macd_config['slow']}_{macd_config['signal']}" + if signal_col in macd_result.columns: + for idx, value in macd_result[signal_col].dropna().items(): + indicators_data.append({ + 'time': idx, + 'indicator_name': 'macd_signal', + 'indicator_value': round(float(value), 8), + 'metadata': json.dumps(macd_config) + }) + + # MACD Histogram + hist_col = f"MACDh_{macd_config['fast']}_{macd_config['slow']}_{macd_config['signal']}" + if hist_col in macd_result.columns: + for idx, value in macd_result[hist_col].dropna().items(): + indicators_data.append({ + 'time': idx, + 'indicator_name': 'macd_histogram', + 'indicator_value': round(float(value), 8), + 'metadata': json.dumps(macd_config) + }) + except Exception as e: + logger.error(f"Error calculating MACD: {e}") + + elif indicator == 'bb': + # Bollinger Bands + bb_config = periods.get('bb', {'period': 20, 'std': 2}) + try: + bb_result = ta.bbands( + df_ta['close'], + length=bb_config['period'], + std=bb_config['std'] + ) + + if bb_result is not None: + # Upper Band + for col in bb_result.columns: + if col.startswith(f"BBU_{bb_config['period']}"): + for idx, value in bb_result[col].dropna().items(): + indicators_data.append({ + 'time': idx, + 'indicator_name': 'bb_upper', + 'indicator_value': round(float(value), 8), + 'metadata': json.dumps(bb_config) + }) + break + + # Middle Band + for col in bb_result.columns: + if col.startswith(f"BBM_{bb_config['period']}"): + for idx, value in bb_result[col].dropna().items(): + indicators_data.append({ + 'time': idx, + 'indicator_name': 'bb_middle', + 'indicator_value': round(float(value), 8), + 'metadata': json.dumps(bb_config) + }) + break + + # Lower Band + for col in bb_result.columns: + if col.startswith(f"BBL_{bb_config['period']}"): + for idx, value in bb_result[col].dropna().items(): + indicators_data.append({ + 'time': idx, + 'indicator_name': 'bb_lower', + 'indicator_value': round(float(value), 8), + 'metadata': json.dumps(bb_config) + }) + break + except Exception as e: + logger.error(f"Error calculating Bollinger Bands: {e}") + + elif indicator == 'atr': + # Average True Range + for period in periods.get('atr', [14]): + try: + atr_values = ta.atr(df_ta['high'], df_ta['low'], df_ta['close'], length=period) + if atr_values is not None: + for idx, value in atr_values.dropna().items(): + indicators_data.append({ + 'time': idx, + 'indicator_name': f'atr_{period}', + 'indicator_value': round(float(value), 8), + 'metadata': json.dumps({'period': period}) + }) + except Exception as e: + logger.error(f"Error calculating ATR-{period}: {e}") + + elif indicator == 'stoch': + # Stochastic Oscillator + stoch_config = periods.get('stoch', {'k_period': 14, 'd_period': 3}) + try: + stoch_result = ta.stoch( + df_ta['high'], df_ta['low'], df_ta['close'], + k=stoch_config['k_period'], + d=stoch_config['d_period'] + ) + + if stoch_result is not None: + # %K + for col in stoch_result.columns: + if 'STOCHk' in col: + for idx, value in stoch_result[col].dropna().items(): + indicators_data.append({ + 'time': idx, + 'indicator_name': 'stoch_k', + 'indicator_value': round(float(value), 8), + 'metadata': json.dumps(stoch_config) + }) + break + + # %D + for col in stoch_result.columns: + if 'STOCHd' in col: + for idx, value in stoch_result[col].dropna().items(): + indicators_data.append({ + 'time': idx, + 'indicator_name': 'stoch_d', + 'indicator_value': round(float(value), 8), + 'metadata': json.dumps(stoch_config) + }) + break + except Exception as e: + logger.error(f"Error calculating Stochastic: {e}") + + elif indicator == 'adx': + # Average Directional Index + for period in periods.get('adx', [14]): + try: + adx_result = ta.adx(df_ta['high'], df_ta['low'], df_ta['close'], length=period) + if adx_result is not None: + adx_col = f"ADX_{period}" + if adx_col in adx_result.columns: + for idx, value in adx_result[adx_col].dropna().items(): + indicators_data.append({ + 'time': idx, + 'indicator_name': f'adx_{period}', + 'indicator_value': round(float(value), 8), + 'metadata': json.dumps({'period': period}) + }) + except Exception as e: + logger.error(f"Error calculating ADX-{period}: {e}") + + except Exception as e: + logger.error(f"Error calculating technical indicators: {e}", exc_info=True) + + return indicators_data + +def resample_ticks_to_ohlcv(ticks: List[Dict[str, Any]], interval: str) -> List[Dict[str, Any]]: + """ + Resample tick data to OHLCV format + + Args: + ticks: List of tick data dictionaries + interval: Resampling interval (e.g., '1min', '5min', '1H') + + Returns: + List of OHLCV dictionaries + """ + if not ticks: + return [] + + # Convert to DataFrame + df = pd.DataFrame(ticks) + df['time'] = pd.to_datetime(df['time']) + df.set_index('time', inplace=True) + + # Convert price and quantity to float + df['price'] = pd.to_numeric(df['price'], errors='coerce') + df['quantity'] = pd.to_numeric(df['quantity'], errors='coerce') + + # Group by symbol and resample + ohlcv_data = [] + for symbol in df['symbol'].unique(): + symbol_df = df[df['symbol'] == symbol].copy() + + # Resample price data + ohlcv = symbol_df['price'].resample(interval).agg({ + 'open': 'first', + 'high': 'max', + 'low': 'min', + 'close': 'last' + }) + + # Resample volume and trade count + volume = symbol_df['quantity'].resample(interval).sum() + trade_count = symbol_df.resample(interval).size() + + # Combine data + for timestamp, row in ohlcv.iterrows(): + if pd.notna(row['open']): # Skip empty periods + ohlcv_data.append({ + 'time': timestamp, + 'symbol': symbol, + 'exchange': symbol_df['exchange'].iloc[0] if 'exchange' in symbol_df.columns else 'binance', + 'interval': interval, + 'open_price': Decimal(str(row['open'])).quantize(Decimal('0.00000001'), rounding=ROUND_HALF_UP), + 'high_price': Decimal(str(row['high'])).quantize(Decimal('0.00000001'), rounding=ROUND_HALF_UP), + 'low_price': Decimal(str(row['low'])).quantize(Decimal('0.00000001'), rounding=ROUND_HALF_UP), + 'close_price': Decimal(str(row['close'])).quantize(Decimal('0.00000001'), rounding=ROUND_HALF_UP), + 'volume': Decimal(str(volume.loc[timestamp])) if timestamp in volume.index else Decimal('0'), + 'quote_volume': None, + 'trade_count': int(trade_count.loc[timestamp]) if timestamp in trade_count.index else 0 + }) + + return ohlcv_data + +def validate_ohlcv_data(ohlcv: Dict[str, Any]) -> bool: + """Validate OHLCV data integrity""" + try: + # Check required fields + required_fields = ['time', 'symbol', 'open_price', 'high_price', 'low_price', 'close_price', 'volume'] + for field in required_fields: + if field not in ohlcv: + return False + + # Check price relationships + high = float(ohlcv['high_price']) + low = float(ohlcv['low_price']) + open_price = float(ohlcv['open_price']) + close = float(ohlcv['close_price']) + + # High should be >= all other prices + if high < max(low, open_price, close): + return False + + # Low should be <= all other prices + if low > min(high, open_price, close): + return False + + # All prices should be positive + if any(price <= 0 for price in [high, low, open_price, close]): + return False + + # Volume should be non-negative + if float(ohlcv['volume']) < 0: + return False + + return True + + except (ValueError, TypeError, KeyError): + return False + +def calculate_price_change(current_price: float, previous_price: float) -> Dict[str, float]: + """Calculate price change and percentage change""" + if previous_price == 0: + return {'change': 0.0, 'change_percent': 0.0} + + change = current_price - previous_price + change_percent = (change / previous_price) * 100 + + return { + 'change': round(change, 8), + 'change_percent': round(change_percent, 4) + } + +def format_volume(volume: Union[int, float, Decimal]) -> str: + """Format volume for display""" + volume = float(volume) + + if volume >= 1e9: + return f"{volume / 1e9:.2f}B" + elif volume >= 1e6: + return f"{volume / 1e6:.2f}M" + elif volume >= 1e3: + return f"{volume / 1e3:.2f}K" + else: + return f"{volume:.2f}" + +def get_interval_seconds(interval: str) -> int: + """Convert interval string to seconds""" + interval_map = { + '1s': 1, + '1m': 60, + '3m': 180, + '5m': 300, + '15m': 900, + '30m': 1800, + '1h': 3600, + '2h': 7200, + '4h': 14400, + '6h': 21600, + '8h': 28800, + '12h': 43200, + '1d': 86400, + '3d': 259200, + '1w': 604800, + '1M': 2592000 # Approximate + } + + return interval_map.get(interval, 60) # Default to 1 minute + +def safe_decimal_conversion(value: Any) -> Optional[Decimal]: + """Safely convert value to Decimal""" + try: + if value is None or value == '': + return None + return Decimal(str(value)).quantize(Decimal('0.00000001'), rounding=ROUND_HALF_UP) + except (ValueError, TypeError, DecimalException): + return None + +def batch_data(data: List[Any], batch_size: int) -> List[List[Any]]: + """Split data into batches""" + batches = [] + for i in range(0, len(data), batch_size): + batches.append(data[i:i + batch_size]) + return batches + +def get_binance_symbol_info(symbol: str) -> Dict[str, Any]: + """Get symbol information for validation""" + # This is a simplified version - in production you might want to fetch from Binance API + common_symbols = { + 'BTCUSDT': {'baseAsset': 'BTC', 'quoteAsset': 'USDT', 'status': 'TRADING'}, + 'ETHUSDT': {'baseAsset': 'ETH', 'quoteAsset': 'USDT', 'status': 'TRADING'}, + 'BNBUSDT': {'baseAsset': 'BNB', 'quoteAsset': 'USDT', 'status': 'TRADING'}, + 'XRPUSDT': {'baseAsset': 'XRP', 'quoteAsset': 'USDT', 'status': 'TRADING'}, + 'SOLUSDT': {'baseAsset': 'SOL', 'quoteAsset': 'USDT', 'status': 'TRADING'}, + 'ADAUSDT': {'baseAsset': 'ADA', 'quoteAsset': 'USDT', 'status': 'TRADING'}, + 'DOTUSDT': {'baseAsset': 'DOT', 'quoteAsset': 'USDT', 'status': 'TRADING'}, + 'LINKUSDT': {'baseAsset': 'LINK', 'quoteAsset': 'USDT', 'status': 'TRADING'}, + 'LTCUSDT': {'baseAsset': 'LTC', 'quoteAsset': 'USDT', 'status': 'TRADING'}, + 'HBARUSDT': {'baseAsset': 'HBAR', 'quoteAsset': 'USDT', 'status': 'TRADING'}, + 'HBARBTC': {'baseAsset': 'HBAR', 'quoteAsset': 'BTC', 'status': 'TRADING'} + } + + return common_symbols.get(symbol, {'status': 'UNKNOWN'}) + +class DataValidator: + """Class for validating trading data""" + + @staticmethod + def validate_tick_data(tick: Dict[str, Any]) -> bool: + """Validate tick/trade data""" + try: + required_fields = ['time', 'symbol', 'price', 'quantity', 'trade_id'] + for field in required_fields: + if field not in tick: + return False + + # Validate data types and ranges + if float(tick['price']) <= 0: + return False + + if float(tick['quantity']) <= 0: + return False + + if not isinstance(tick['trade_id'], (int, str)): + return False + + if not validate_symbol(tick['symbol']): + return False + + return True + + except (ValueError, TypeError): + return False + + @staticmethod + def validate_indicators_data(indicators: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Validate and clean indicators data""" + valid_indicators = [] + + for indicator in indicators: + try: + if ('time' in indicator and + 'indicator_name' in indicator and + 'indicator_value' in indicator): + + # Check for valid numeric value + value = float(indicator['indicator_value']) + if not (np.isnan(value) or np.isinf(value)): + valid_indicators.append(indicator) + + except (ValueError, TypeError): + continue + + return valid_indicators + +def create_error_response(error_message: str, error_code: str = "GENERAL_ERROR") -> Dict[str, Any]: + """Create standardized error response""" + return { + "success": False, + "error": { + "code": error_code, + "message": error_message, + "timestamp": datetime.utcnow().isoformat() + } + } + +def create_success_response(data: Any = None, message: str = "Success") -> Dict[str, Any]: + """Create standardized success response""" + response = { + "success": True, + "message": message, + "timestamp": datetime.utcnow().isoformat() + } + + if data is not None: + response["data"] = data + + return response + +class PerformanceTimer: + """Context manager for timing operations""" + + def __init__(self, operation_name: str): + self.operation_name = operation_name + self.start_time = None + self.logger = logging.getLogger(__name__) + + def __enter__(self): + self.start_time = datetime.utcnow() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.start_time: + duration = (datetime.utcnow() - self.start_time).total_seconds() + + # Log slow operations + slow_threshold = float(os.getenv('SLOW_QUERY_THRESHOLD_MS', 1000)) / 1000 + + if duration > slow_threshold: + self.logger.warning(f"SLOW OPERATION: {self.operation_name} took {duration:.3f}s") + else: + self.logger.debug(f"{self.operation_name} completed in {duration:.3f}s") + +# Export main functions +__all__ = [ + 'setup_logging', 'load_config', 'save_config', 'validate_config', + 'create_default_config', 'validate_symbol', 'format_timestamp', + 'parse_kline_data', 'parse_trade_data', 'calculate_technical_indicators', + 'resample_ticks_to_ohlcv', 'validate_ohlcv_data', 'calculate_price_change', + 'format_volume', 'get_interval_seconds', 'safe_decimal_conversion', + 'batch_data', 'get_binance_symbol_info', 'DataValidator', + 'create_error_response', 'create_success_response', 'PerformanceTimer', + 'reload_env_vars' +] diff --git a/variables.env b/variables.env new file mode 100644 index 0000000..a2d0459 --- /dev/null +++ b/variables.env @@ -0,0 +1,74 @@ +# Environment Variables for Crypto Trading Data Collector +# Database Configuration +DB_HOST=localhost +DB_PORT=5432 +DB_NAME=crypto_trading +DB_USER=postgres +DB_PASSWORD=your_secure_password_here + +# Database Connection Pool Settings +DB_POOL_MIN_SIZE=20 +DB_POOL_MAX_SIZE=250 +DB_COMMAND_TIMEOUT=120 + +# Binance API Configuration (Optional - not needed for market data) +# BINANCE_API_KEY=your_binance_api_key_here +# BINANCE_SECRET_KEY=your_binance_secret_key_here + +# Application Configuration +LOG_LEVEL=INFO +LOG_FILE=crypto_collector.log + +# Web UI Configuration +WEB_HOST=0.0.0.0 +WEB_PORT=8000 +WEB_RELOAD=true + +# Performance Settings +MAX_CONCURRENT_REQUESTS=100 +REQUEST_TIMEOUT=30 +WEBSOCKET_PING_INTERVAL=20 +WEBSOCKET_PING_TIMEOUT=60 + +# Data Collection Settings +BULK_DOWNLOAD_BATCH_SIZE=1000 +TICK_BATCH_SIZE=100 +WEBSOCKET_RECONNECT_DELAY=5 +MAX_RETRIES=3 + +# Database Maintenance +COMPRESSION_AFTER_DAYS=7 +RETENTION_POLICY_DAYS=365 +VACUUM_ANALYZE_INTERVAL_HOURS=24 + +# Monitoring and Alerting +ENABLE_METRICS=true +METRICS_PORT=9090 +ALERT_EMAIL_ENABLED=false +ALERT_EMAIL_SMTP_HOST=smtp.gmail.com +ALERT_EMAIL_SMTP_PORT=587 +ALERT_EMAIL_USERNAME=your_email@gmail.com +ALERT_EMAIL_PASSWORD=your_email_password +ALERT_EMAIL_TO=admin@yourcompany.com + +# Security Settings +SECRET_KEY=your_very_secure_secret_key_change_this_in_production +ALLOWED_HOSTS=localhost,127.0.0.1,0.0.0.0 +CORS_ORIGINS=http://localhost:3000,http://localhost:8000 + +# TimescaleDB Specific Settings +TIMESCALEDB_TELEMETRY=off +SHARED_PRELOAD_LIBRARIES=timescaledb + +# Memory and CPU Settings (adjust based on your 128GB RAM / 16-core setup) +WORK_MEM=1024MB +SHARED_BUFFERS=32GB +EFFECTIVE_CACHE_SIZE=64GB +MAX_CONNECTIONS=500 +MAX_WORKER_PROCESSES=14 +MAX_PARALLEL_WORKERS=14 +MAX_PARALLEL_WORKERS_PER_GATHER=8 + +# NEW: Concurrency Control +MAX_CONCURRENT_DOWNLOADS=3 +MAX_CONCURRENT_GAP_FILLS=2 \ No newline at end of file